Working enrichment

2026-02-03 22:55:12 +03:00
parent 8d7e39a603
commit 4cbd5313d2
4 changed files with 134 additions and 60 deletions
--- a/services/rag/langchain/vector_storage.py
+++ b/services/rag/langchain/vector_storage.py
@@ -46,26 +46,47 @@ def initialize_vector_store(
        base_url="http://localhost:11434",  # Default Ollama URL
    )

-    # Create or get the vector store
-    vector_store = Qdrant(
-        client=client,
-        collection_name=collection_name,
-        embeddings=embeddings,
-    )
+    # Check if collection exists, if not create it
+    collection_exists = False
+    try:
+        client.get_collection(collection_name)
+        collection_exists = True
+    except Exception:
+        # Collection doesn't exist, we'll create it
+        collection_exists = False

-    # If recreate_collection is True, we'll delete and recreate the collection
-    if recreate_collection and collection_name in [
-        col.name for col in client.get_collections().collections
-    ]:
+    if recreate_collection and collection_exists:
        client.delete_collection(collection_name)
+        collection_exists = False

-        # Recreate with proper configuration
-        vector_store = Qdrant.from_documents(
-            documents=[],
-            embedding=embeddings,
-            url=f"http://{QDRANT_HOST}:{QDRANT_REST_PORT}",
+    # If collection doesn't exist, create it using the client directly
+    if not collection_exists:
+        # Create collection using the Qdrant client directly
+        from qdrant_client.http.models import Distance, VectorParams
+        import numpy as np
+
+        # First, we need to determine the embedding size by creating a sample embedding
+        sample_embedding = embeddings.embed_query("sample text for dimension detection")
+        vector_size = len(sample_embedding)
+
+        # Create the collection with appropriate vector size
+        client.create_collection(
            collection_name=collection_name,
-            force_recreate=True,
+            vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE),
+        )
+
+        # Now create the Qdrant instance connected to the newly created collection
+        vector_store = Qdrant(
+            client=client,
+            collection_name=collection_name,
+            embeddings=embeddings,
+        )
+    else:
+        # Collection exists, just connect to it
+        vector_store = Qdrant(
+            client=client,
+            collection_name=collection_name,
+            embeddings=embeddings,
        )

    return vector_store
@@ -116,7 +137,7 @@ OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
 OPENROUTER_EMBEDDING_MODEL = os.getenv("OPENROUTER_EMBEDDING_MODEL", "openai/text-embedding-ada-002")

 def initialize_vector_store_with_openrouter(
-    collection_name: str = "documents"
+    collection_name: str = "documents_langchain"
 ) -> Qdrant:
    # Initialize Qdrant client
    client = QdrantClient(