ragflow in the repository, with codex-created yandex disk plugin JUST IN CASE, also llamaindex enrichment with yandex disk predefined data

2026-02-25 11:28:29 +03:00
parent c29928cc89
commit 2c7ab06b3f
12 changed files with 98507 additions and 132 deletions
--- a/services/rag/llamaindex/vector_storage.py
+++ b/services/rag/llamaindex/vector_storage.py
@@ -10,6 +10,7 @@ This module provides initialization and configuration for:
 import os
 from typing import Optional

+from dotenv import load_dotenv
 from llama_index.core import VectorStoreIndex
 from llama_index.vector_stores.qdrant import QdrantVectorStore
 from loguru import logger
@@ -18,12 +19,26 @@ from qdrant_client import QdrantClient
 # Import the new configuration module
 from config import get_embedding_model

+load_dotenv()
+
+
+def get_qdrant_connection_config() -> dict[str, int | str]:
+    """Load Qdrant connection settings from environment variables."""
+    host = os.getenv("QDRANT_HOST", "localhost")
+    rest_port = int(os.getenv("QDRANT_REST_PORT", "6333"))
+    grpc_port = int(os.getenv("QDRANT_GRPC_PORT", "6334"))
+    return {
+        "host": host,
+        "port": rest_port,
+        "grpc_port": grpc_port,
+    }
+

 def initialize_vector_storage(
    collection_name: str = "documents_llamaindex",
-    host: str = "localhost",
-    port: int = 6333,
-    grpc_port: int = 6334,
+    host: Optional[str] = None,
+    port: Optional[int] = None,
+    grpc_port: Optional[int] = None,
 ) -> tuple[QdrantVectorStore, VectorStoreIndex]:
    """
    Initialize Qdrant vector storage with embedding model based on configured strategy.
@@ -37,11 +52,19 @@ def initialize_vector_storage(
    Returns:
        Tuple of (QdrantVectorStore, VectorStoreIndex)
    """
-    logger.info(f"Initializing vector storage with collection: {collection_name}")
+    qdrant_config = get_qdrant_connection_config()
+    host = host or str(qdrant_config["host"])
+    port = port or int(qdrant_config["port"])
+    grpc_port = grpc_port or int(qdrant_config["grpc_port"])
+
+    logger.info(
+        f"Initializing vector storage with collection: {collection_name} "
+        f"(host={host}, rest_port={port}, grpc_port={grpc_port})"
+    )

    try:
        # Initialize Qdrant client
-        client = QdrantClient(host=host, port=port)
+        client = QdrantClient(host=host, port=port, grpc_port=grpc_port)

        # Get the embedding model based on the configured strategy
        embed_model = get_embedding_model()
@@ -131,14 +154,24 @@ def initialize_vector_storage(
        raise


-def get_vector_store_and_index() -> tuple[QdrantVectorStore, VectorStoreIndex]:
+def get_vector_store_and_index(
+    collection_name: str = "documents_llamaindex",
+    host: Optional[str] = None,
+    port: Optional[int] = None,
+    grpc_port: Optional[int] = None,
+) -> tuple[QdrantVectorStore, VectorStoreIndex]:
    """
    Convenience function to get the initialized vector store and index.

    Returns:
        Tuple of (QdrantVectorStore, VectorStoreIndex)
    """
-    return initialize_vector_storage()
+    return initialize_vector_storage(
+        collection_name=collection_name,
+        host=host,
+        port=port,
+        grpc_port=grpc_port,
+    )


 if __name__ == "__main__":