Enrichment for llamaindex. It goes for a long time using local model, so better use external model not local, for EMBEDDING

This commit is contained in:
2026-02-04 16:06:01 +03:00
parent f36108d652
commit 3dea3605ad
5 changed files with 402 additions and 22 deletions

View File

@@ -43,7 +43,7 @@ def initialize_vector_storage(
# Get embedding model from environment if not provided
if ollama_embed_model is None:
ollama_embed_model = os.getenv("OLLAMA_EMBEDDING_MODEL", "nomic-embed-text")
ollama_embed_model = os.getenv("OLLAMA_EMBEDDING_MODEL", "qwen3-embedding:4b")
logger.info(f"Using Ollama embedding model: {ollama_embed_model}")
@@ -51,6 +51,16 @@ def initialize_vector_storage(
# Initialize Qdrant client
client = QdrantClient(host=host, port=port)
# Initialize the embedding model first to get the correct dimensions
embed_model = OllamaEmbedding(
model_name=ollama_embed_model,
base_url=ollama_base_url
)
# Get a test embedding to determine the correct size
test_embedding = embed_model.get_query_embedding("test")
embedding_dimension = len(test_embedding)
logger.info(f"Detected embedding dimension: {embedding_dimension}")
# Check if collection exists, create if it doesn't
collections = client.get_collections().collections
collection_names = [coll.name for coll in collections]
@@ -60,13 +70,45 @@ def initialize_vector_storage(
client.create_collection(
collection_name=collection_name,
vectors_config={
"size": 4096, # Default size for most embedding models
"size": embedding_dimension, # Use the actual embedding size
"distance": "Cosine" # Cosine distance is commonly used
}
)
logger.info(f"Collection '{collection_name}' created successfully")
logger.info(f"Collection '{collection_name}' created successfully with dimension {embedding_dimension}")
else:
logger.info(f"Collection '{collection_name}' already exists")
# Get the actual collection config to determine the vector size
collection_info = client.get_collection(collection_name)
# Access the vector configuration properly - handle different possible structures
if hasattr(collection_info.config.params, 'vectors') and collection_info.config.params.vectors is not None:
existing_dimension = collection_info.config.params.vectors.size
if existing_dimension != embedding_dimension:
logger.warning(f"Existing collection dimension ({existing_dimension}) doesn't match embedding dimension ({embedding_dimension}), recreating...")
# Delete and recreate the collection with the correct dimensions
client.delete_collection(collection_name)
client.create_collection(
collection_name=collection_name,
vectors_config={
"size": embedding_dimension, # Use the detected size
"distance": "Cosine"
}
)
logger.info(f"Collection '{collection_name}' recreated with dimension {embedding_dimension}")
else:
logger.info(f"Using existing collection with matching dimension: {embedding_dimension}")
else:
# Last resort: recreate the collection with the correct dimensions
logger.warning(f"Could not determine vector dimension for existing collection, recreating...")
# Delete and recreate the collection with the correct dimensions
client.delete_collection(collection_name)
client.create_collection(
collection_name=collection_name,
vectors_config={
"size": embedding_dimension, # Use the detected size
"distance": "Cosine"
}
)
logger.info(f"Collection '{collection_name}' recreated with dimension {embedding_dimension}")
# Initialize the Qdrant vector store
vector_store = QdrantVectorStore(
@@ -74,13 +116,7 @@ def initialize_vector_storage(
collection_name=collection_name
)
# Initialize Ollama embedding
embed_model = OllamaEmbedding(
model_name=ollama_embed_model,
base_url=ollama_base_url
)
# Create index from vector store with the embedding model
# Create index from vector store with the embedding model we already created
index = VectorStoreIndex.from_vector_store(
vector_store=vector_store,
embed_model=embed_model
@@ -116,7 +152,9 @@ def get_vector_store_and_index() -> tuple[QdrantVectorStore, VectorStoreIndex]:
Returns:
Tuple of (QdrantVectorStore, VectorStoreIndex)
"""
return initialize_vector_storage()
# Get the embedding model from environment variables
embed_model = os.getenv("OLLAMA_EMBEDDING_MODEL", "qwen3-embedding:4b")
return initialize_vector_storage(ollama_embed_model=embed_model)
if __name__ == "__main__":