langchain vector storage connection and confguration

2026-02-03 20:42:09 +03:00
parent cd7c96e022
commit 762ed89843
3 changed files with 161 additions and 7 deletions
--- a/services/rag/langchain/vector_storage.py
+++ b/services/rag/langchain/vector_storage.py
@@ -0,0 +1,153 @@
+"""Vector storage module using Qdrant and Ollama embeddings for the RAG solution."""
+
+import os
+from typing import Optional
+from langchain_community.vectorstores import Qdrant
+from langchain_ollama import OllamaEmbeddings
+from langchain_core.documents import Document
+from qdrant_client import QdrantClient
+from dotenv import load_dotenv
+
+# Load environment variables
+load_dotenv()
+
+# Qdrant configuration
+QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost")
+QDRANT_REST_PORT = int(os.getenv("QDRANT_REST_PORT", 6333))
+QDRANT_GRPC_PORT = int(os.getenv("QDRANT_GRPC_PORT", 6334))
+
+# Ollama embedding model configuration
+OLLAMA_EMBEDDING_MODEL = os.getenv("OLLAMA_EMBEDDING_MODEL", "nomic-embed-text")
+
+
+def initialize_vector_store(
+    collection_name: str = "documents",
+    recreate_collection: bool = False
+) -> Qdrant:
+    """
+    Initialize and return a Qdrant vector store with Ollama embeddings.
+    
+    Args:
+        collection_name: Name of the Qdrant collection to use
+        recreate_collection: Whether to recreate the collection if it exists
+    
+    Returns:
+        Initialized Qdrant vector store
+    """
+    # Initialize Qdrant client
+    client = QdrantClient(
+        host=QDRANT_HOST,
+        port=QDRANT_REST_PORT,
+    )
+    
+    # Initialize Ollama embeddings
+    embeddings = OllamaEmbeddings(
+        model=OLLAMA_EMBEDDING_MODEL,
+        base_url="http://localhost:11434"  # Default Ollama URL
+    )
+    
+    # Create or get the vector store
+    vector_store = Qdrant(
+        client=client,
+        collection_name=collection_name,
+        embeddings=embeddings,
+    )
+    
+    # If recreate_collection is True, we'll delete and recreate the collection
+    if recreate_collection and collection_name in [col.name for col in client.get_collections().collections]:
+        client.delete_collection(collection_name)
+        
+        # Recreate with proper configuration
+        vector_store = Qdrant.from_documents(
+            documents=[],
+            embedding=embeddings,
+            url=f"http://{QDRANT_HOST}:{QDRANT_REST_PORT}",
+            collection_name=collection_name,
+            force_recreate=True
+        )
+    
+    return vector_store
+
+
+def add_documents_to_vector_store(
+    vector_store: Qdrant,
+    documents: list[Document],
+    batch_size: int = 10
+) -> None:
+    """
+    Add documents to the vector store.
+    
+    Args:
+        vector_store: Initialized Qdrant vector store
+        documents: List of documents to add
+        batch_size: Number of documents to add in each batch
+    """
+    # Add documents to the vector store in batches
+    for i in range(0, len(documents), batch_size):
+        batch = documents[i:i + batch_size]
+        vector_store.add_documents(batch)
+
+
+def search_vector_store(
+    vector_store: Qdrant,
+    query: str,
+    top_k: int = 5
+) -> list:
+    """
+    Search the vector store for similar documents.
+    
+    Args:
+        vector_store: Initialized Qdrant vector store
+        query: Query string to search for
+        top_k: Number of top results to return
+    
+    Returns:
+        List of similar documents
+    """
+    return vector_store.similarity_search(query, k=top_k)
+
+
+# Just in case add possibility to connect via openai embedding, using openrouter api key.
+# Comment this section, so it can be used in the future.
+"""
+# Alternative implementation using OpenAI embeddings via OpenRouter
+# Uncomment and configure as needed
+
+import os
+from langchain_openai import OpenAIEmbeddings
+
+OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
+OPENROUTER_EMBEDDING_MODEL = os.getenv("OPENROUTER_EMBEDDING_MODEL", "openai/text-embedding-ada-002")
+
+def initialize_vector_store_with_openrouter(
+    collection_name: str = "documents"
+) -> Qdrant:
+    # Initialize Qdrant client
+    client = QdrantClient(
+        host=QDRANT_HOST,
+        port=QDRANT_REST_PORT,
+    )
+    
+    # Initialize OpenAI embeddings via OpenRouter
+    embeddings = OpenAIEmbeddings(
+        model=OPENROUTER_EMBEDDING_MODEL,
+        openai_api_key=OPENROUTER_API_KEY,
+        openai_api_base="https://openrouter.ai/api/v1"
+    )
+    
+    # Create or get the vector store
+    vector_store = Qdrant(
+        client=client,
+        collection_name=collection_name,
+        embeddings=embeddings,
+    )
+    
+    return vector_store
+"""
+
+
+if __name__ == "__main__":
+    # Example usage
+    print(f"Initializing vector store with Ollama embedding model: {OLLAMA_EMBEDDING_MODEL}")
+    vector_store = initialize_vector_store()
+    print("Vector store initialized successfully!")