Enrichment for llamaindex. It goes for a long time using local model, so better use external model not local, for EMBEDDING

2026-02-04 16:06:01 +03:00
parent f36108d652
commit 3dea3605ad
5 changed files with 402 additions and 22 deletions
--- a/services/rag/llamaindex/vector_storage.py
+++ b/services/rag/llamaindex/vector_storage.py
@@ -43,7 +43,7 @@ def initialize_vector_storage(

    # Get embedding model from environment if not provided
    if ollama_embed_model is None:
-        ollama_embed_model = os.getenv("OLLAMA_EMBEDDING_MODEL", "nomic-embed-text")
+        ollama_embed_model = os.getenv("OLLAMA_EMBEDDING_MODEL", "qwen3-embedding:4b")

    logger.info(f"Using Ollama embedding model: {ollama_embed_model}")

@@ -51,6 +51,16 @@ def initialize_vector_storage(
        # Initialize Qdrant client
        client = QdrantClient(host=host, port=port)

+        # Initialize the embedding model first to get the correct dimensions
+        embed_model = OllamaEmbedding(
+            model_name=ollama_embed_model,
+            base_url=ollama_base_url
+        )
+        # Get a test embedding to determine the correct size
+        test_embedding = embed_model.get_query_embedding("test")
+        embedding_dimension = len(test_embedding)
+        logger.info(f"Detected embedding dimension: {embedding_dimension}")
+
        # Check if collection exists, create if it doesn't
        collections = client.get_collections().collections
        collection_names = [coll.name for coll in collections]
@@ -60,13 +70,45 @@ def initialize_vector_storage(
            client.create_collection(
                collection_name=collection_name,
                vectors_config={
-                    "size": 4096,  # Default size for most embedding models
+                    "size": embedding_dimension,  # Use the actual embedding size
                    "distance": "Cosine"  # Cosine distance is commonly used
                }
            )
-            logger.info(f"Collection '{collection_name}' created successfully")
+            logger.info(f"Collection '{collection_name}' created successfully with dimension {embedding_dimension}")
        else:
            logger.info(f"Collection '{collection_name}' already exists")
+            # Get the actual collection config to determine the vector size
+            collection_info = client.get_collection(collection_name)
+            # Access the vector configuration properly - handle different possible structures
+            if hasattr(collection_info.config.params, 'vectors') and collection_info.config.params.vectors is not None:
+                existing_dimension = collection_info.config.params.vectors.size
+                if existing_dimension != embedding_dimension:
+                    logger.warning(f"Existing collection dimension ({existing_dimension}) doesn't match embedding dimension ({embedding_dimension}), recreating...")
+                    # Delete and recreate the collection with the correct dimensions
+                    client.delete_collection(collection_name)
+                    client.create_collection(
+                        collection_name=collection_name,
+                        vectors_config={
+                            "size": embedding_dimension,  # Use the detected size
+                            "distance": "Cosine"
+                        }
+                    )
+                    logger.info(f"Collection '{collection_name}' recreated with dimension {embedding_dimension}")
+                else:
+                    logger.info(f"Using existing collection with matching dimension: {embedding_dimension}")
+            else:
+                # Last resort: recreate the collection with the correct dimensions
+                logger.warning(f"Could not determine vector dimension for existing collection, recreating...")
+                # Delete and recreate the collection with the correct dimensions
+                client.delete_collection(collection_name)
+                client.create_collection(
+                    collection_name=collection_name,
+                    vectors_config={
+                        "size": embedding_dimension,  # Use the detected size
+                        "distance": "Cosine"
+                    }
+                )
+                logger.info(f"Collection '{collection_name}' recreated with dimension {embedding_dimension}")

        # Initialize the Qdrant vector store
        vector_store = QdrantVectorStore(
@@ -74,13 +116,7 @@ def initialize_vector_storage(
            collection_name=collection_name
        )

-        # Initialize Ollama embedding
-        embed_model = OllamaEmbedding(
-            model_name=ollama_embed_model,
-            base_url=ollama_base_url
-        )
-
-        # Create index from vector store with the embedding model
+        # Create index from vector store with the embedding model we already created
        index = VectorStoreIndex.from_vector_store(
            vector_store=vector_store,
            embed_model=embed_model
@@ -116,7 +152,9 @@ def get_vector_store_and_index() -> tuple[QdrantVectorStore, VectorStoreIndex]:
    Returns:
        Tuple of (QdrantVectorStore, VectorStoreIndex)
    """
-    return initialize_vector_storage()
+    # Get the embedding model from environment variables
+    embed_model = os.getenv("OLLAMA_EMBEDDING_MODEL", "qwen3-embedding:4b")
+    return initialize_vector_storage(ollama_embed_model=embed_model)


 if __name__ == "__main__":