ragflow in the repository, with codex-created yandex disk plugin JUST IN CASE, also llamaindex enrichment with yandex disk predefined data

This commit is contained in:
2026-02-25 11:28:29 +03:00
parent c29928cc89
commit 2c7ab06b3f
12 changed files with 98507 additions and 132 deletions

View File

@@ -14,7 +14,7 @@ from llama_index.core.retrievers import VectorIndexRetriever
from loguru import logger
from pathlib import Path
from vector_storage import get_vector_store_and_index
from vector_storage import get_qdrant_connection_config, get_vector_store_and_index
# Import the new configuration module
from config import setup_global_models
@@ -23,8 +23,9 @@ from config import setup_global_models
def initialize_retriever(
collection_name: str = "documents_llamaindex",
similarity_top_k: int = 5,
host: str = "localhost",
port: int = 6333
host: str | None = None,
port: int | None = None,
grpc_port: int | None = None,
) -> RetrieverQueryEngine:
"""
Initialize the retriever query engine with the vector store.
@@ -32,8 +33,9 @@ def initialize_retriever(
Args:
collection_name: Name of the Qdrant collection
similarity_top_k: Number of top similar documents to retrieve
host: Qdrant host address
port: Qdrant REST API port
host: Qdrant host address (defaults to QDRANT_HOST from .env)
port: Qdrant REST API port (defaults to QDRANT_REST_PORT from .env)
grpc_port: Qdrant gRPC API port (defaults to QDRANT_GRPC_PORT from .env)
Returns:
RetrieverQueryEngine configured with the vector store
@@ -44,8 +46,23 @@ def initialize_retriever(
# Set up the global models to prevent defaulting to OpenAI
setup_global_models()
qdrant_config = get_qdrant_connection_config()
resolved_host = host or str(qdrant_config["host"])
resolved_port = port or int(qdrant_config["port"])
resolved_grpc_port = grpc_port or int(qdrant_config["grpc_port"])
logger.info(
f"Retriever Qdrant connection: host={resolved_host}, "
f"rest_port={resolved_port}, grpc_port={resolved_grpc_port}"
)
# Get the vector store and index from the existing configuration
vector_store, index = get_vector_store_and_index()
vector_store, index = get_vector_store_and_index(
collection_name=collection_name,
host=resolved_host,
port=resolved_port,
grpc_port=resolved_grpc_port,
)
# Create a retriever from the index
retriever = VectorIndexRetriever(
@@ -310,4 +327,4 @@ if __name__ == "__main__":
except Exception as e:
logger.error(f"Error in test run: {e}")
print(f"Error: {e}")
print(f"Error: {e}")