ragflow in the repository, with codex-created yandex disk plugin JUST IN CASE, also llamaindex enrichment with yandex disk predefined data

This commit is contained in:
2026-02-25 11:28:29 +03:00
parent c29928cc89
commit 2c7ab06b3f
12 changed files with 98507 additions and 132 deletions

View File

@@ -10,6 +10,7 @@ This module provides initialization and configuration for:
import os
from typing import Optional
from dotenv import load_dotenv
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.qdrant import QdrantVectorStore
from loguru import logger
@@ -18,12 +19,26 @@ from qdrant_client import QdrantClient
# Import the new configuration module
from config import get_embedding_model
load_dotenv()
def get_qdrant_connection_config() -> dict[str, int | str]:
"""Load Qdrant connection settings from environment variables."""
host = os.getenv("QDRANT_HOST", "localhost")
rest_port = int(os.getenv("QDRANT_REST_PORT", "6333"))
grpc_port = int(os.getenv("QDRANT_GRPC_PORT", "6334"))
return {
"host": host,
"port": rest_port,
"grpc_port": grpc_port,
}
def initialize_vector_storage(
collection_name: str = "documents_llamaindex",
host: str = "localhost",
port: int = 6333,
grpc_port: int = 6334,
host: Optional[str] = None,
port: Optional[int] = None,
grpc_port: Optional[int] = None,
) -> tuple[QdrantVectorStore, VectorStoreIndex]:
"""
Initialize Qdrant vector storage with embedding model based on configured strategy.
@@ -37,11 +52,19 @@ def initialize_vector_storage(
Returns:
Tuple of (QdrantVectorStore, VectorStoreIndex)
"""
logger.info(f"Initializing vector storage with collection: {collection_name}")
qdrant_config = get_qdrant_connection_config()
host = host or str(qdrant_config["host"])
port = port or int(qdrant_config["port"])
grpc_port = grpc_port or int(qdrant_config["grpc_port"])
logger.info(
f"Initializing vector storage with collection: {collection_name} "
f"(host={host}, rest_port={port}, grpc_port={grpc_port})"
)
try:
# Initialize Qdrant client
client = QdrantClient(host=host, port=port)
client = QdrantClient(host=host, port=port, grpc_port=grpc_port)
# Get the embedding model based on the configured strategy
embed_model = get_embedding_model()
@@ -131,14 +154,24 @@ def initialize_vector_storage(
raise
def get_vector_store_and_index() -> tuple[QdrantVectorStore, VectorStoreIndex]:
def get_vector_store_and_index(
collection_name: str = "documents_llamaindex",
host: Optional[str] = None,
port: Optional[int] = None,
grpc_port: Optional[int] = None,
) -> tuple[QdrantVectorStore, VectorStoreIndex]:
"""
Convenience function to get the initialized vector store and index.
Returns:
Tuple of (QdrantVectorStore, VectorStoreIndex)
"""
return initialize_vector_storage()
return initialize_vector_storage(
collection_name=collection_name,
host=host,
port=port,
grpc_port=grpc_port,
)
if __name__ == "__main__":