| 
 | 1 | +"""Vector Indexing and Search Example  | 
 | 2 | +
  | 
 | 3 | +This script demonstrates a simple end-to-end example of vector indexing and semantic search  | 
 | 4 | +using PostgreSQL with pgvector extension. It's a quick demonstration that shows both indexing  | 
 | 5 | +and searching in a single script.  | 
 | 6 | +
  | 
 | 7 | +What it does:  | 
 | 8 | +    1. Creates 5 sample text chunks about various technologies  | 
 | 9 | +    2. Converts them into vector embeddings using OpenAI  | 
 | 10 | +    3. Indexes the chunks in PostgreSQL with pgvector  | 
 | 11 | +    4. Performs a semantic search query: "What is Python?"  | 
 | 12 | +    5. Displays the top 3 most similar results  | 
 | 13 | +
  | 
 | 14 | +Use cases:  | 
 | 15 | +    - Quick demonstration of vector search capabilities  | 
 | 16 | +    - Testing vector indexing and search functionality  | 
 | 17 | +    - Learning how to use the VectorIndex API  | 
 | 18 | +    - Smoke test for database and OpenAI connectivity  | 
 | 19 | +
  | 
 | 20 | +Usage:  | 
 | 21 | +    # Run using make  | 
 | 22 | +    make example-index  | 
 | 23 | +
  | 
 | 24 | +    # Run directly with Python  | 
 | 25 | +    python -m example.vector  | 
 | 26 | +
  | 
 | 27 | +Prerequisites:  | 
 | 28 | +    - PostgreSQL database with pgvector extension running (use 'make database' to start)  | 
 | 29 | +    - OpenAI API key configured in .env file:  | 
 | 30 | +        # OpenAI: Key for draive-examples  | 
 | 31 | +        OPENAI_API_KEY=sk-proj-xxxxxxxx  | 
 | 32 | +        OPENAI_MODEL=text-embedding-3-small  | 
 | 33 | +    - Dependencies installed (use 'make venv' or 'make sync')  | 
 | 34 | +
  | 
 | 35 | +Example output:  | 
 | 36 | +    The script will:  | 
 | 37 | +    1. Index 5 sample chunks with progress message  | 
 | 38 | +    2. Perform a search for "What is Python?"  | 
 | 39 | +    3. Display the 3 most relevant results  | 
 | 40 | +
  | 
 | 41 | +For more advanced examples:  | 
 | 42 | +    - Use 'make example-generate' to index larger datasets (10 to 100,000 documents)  | 
 | 43 | +    - Use 'make example-search' to perform custom searches  | 
 | 44 | +    - Use 'make example-delete' to clean up the vector index  | 
 | 45 | +"""  | 
 | 46 | + | 
 | 47 | +from asyncio import run  | 
 | 48 | + | 
 | 49 | +from draive import ctx  | 
 | 50 | +from draive.openai import OpenAI  | 
 | 51 | +from draive.postgres import PostgresConnectionPool, PostgresVectorIndex  | 
 | 52 | +from draive.utils import VectorIndex  | 
 | 53 | + | 
 | 54 | +from .common.connection import build_postgres_dsn, initialize_pgvector  | 
 | 55 | +from .common.model import Chunk  | 
 | 56 | + | 
 | 57 | + | 
 | 58 | +async def main() -> None:  | 
 | 59 | +    async with ctx.scope(  | 
 | 60 | +        "index",  | 
 | 61 | +        PostgresVectorIndex(),  | 
 | 62 | +        disposables=(  | 
 | 63 | +            OpenAI(),  # use OpenAI for embeddings  | 
 | 64 | +            PostgresConnectionPool.of(  | 
 | 65 | +                dsn=build_postgres_dsn(),  # you can pass custom dsn string ie. "postgresql://user:password@host:port/database"  | 
 | 66 | +                initialize=initialize_pgvector,  | 
 | 67 | +            ),  # use postgres connection pool with vector support  | 
 | 68 | +        ),  | 
 | 69 | +    ):  | 
 | 70 | +        # Sample chunks to index  | 
 | 71 | +        chunks = [  | 
 | 72 | +            Chunk(text="Python is a high-level programming language"),  | 
 | 73 | +            Chunk(text="PostgreSQL is a powerful relational database"),  | 
 | 74 | +            Chunk(text="Machine learning is a subset of artificial intelligence"),  | 
 | 75 | +            Chunk(text="Docker is a containerization platform"),  | 
 | 76 | +            Chunk(text="FastAPI is a modern Python web framework"),  | 
 | 77 | +        ]  | 
 | 78 | + | 
 | 79 | +        print("Indexing chunks...")  | 
 | 80 | +        await VectorIndex.index(  | 
 | 81 | +            Chunk,  | 
 | 82 | +            values=chunks,  | 
 | 83 | +            attribute=Chunk._.text,  | 
 | 84 | +        )  | 
 | 85 | +        print(f"...indexed {len(chunks)} chunks!")  | 
 | 86 | + | 
 | 87 | +        # Search for similar chunks  | 
 | 88 | +        query = "What is Python?"  | 
 | 89 | +        print(f"\nSearching for: '{query}'")  | 
 | 90 | +        results = await VectorIndex.search(  | 
 | 91 | +            Chunk,  | 
 | 92 | +            query=query,  | 
 | 93 | +            limit=3,  | 
 | 94 | +            score_threshold=0.0,  | 
 | 95 | +            rerank=False,  | 
 | 96 | +        )  | 
 | 97 | + | 
 | 98 | +        print(f"Found {len(results)} results:")  | 
 | 99 | +        for idx, result in enumerate(results, 1):  | 
 | 100 | +            print(f"{idx}. {result.text}")  | 
 | 101 | + | 
 | 102 | + | 
 | 103 | +run(main())  | 
0 commit comments