diff --git a/services/rag/llamaindex/PLANNING.md b/services/rag/llamaindex/PLANNING.md index 77d26ba..1923152 100644 --- a/services/rag/llamaindex/PLANNING.md +++ b/services/rag/llamaindex/PLANNING.md @@ -20,10 +20,10 @@ Chosen data folder: relatve ./../../../data - from the current folder - [x] Install all needed libraries for loaders, mentioned in the `EXTENSIONS.md`. If some libraries require API keys for external services, add them to the `.env` file (create it if it does not exist) # Phase 3 (preparation for storing data in the vector storage + embeddings) -- [ ] Install needed library for using Qdrant connection as vector storage. Ensure ports are used (which are needed in the chosen framework): Rest Api: 6333, gRPC Api: 6334. Database available and running on localhost. -- [ ] Create file called `vector_storage.py`, which will contain vector storage initialization, available for import by other modules of initialized. If needed in chosen RAG framework, add embedding model initialization in the same file. Use ollama, model name defined in the .env file: OLLAMA_EMBEDDING_MODEL. Ollama available by the default local port: 11434. -- [ ] Add strategy of creating collection for this project (name: "documents_llamaindex"), if it does not exist. -- [ ] Just in case add possibility to connect via openai embedding, using openrouter api key. Comment this section, so it can be used in the future. +- [x] Install needed library for using Qdrant connection as vector storage. Ensure ports are used (which are needed in the chosen framework): Rest Api: 6333, gRPC Api: 6334. Database available and running on localhost. +- [x] Create file called `vector_storage.py`, which will contain vector storage initialization, available for import by other modules of initialized. If needed in chosen RAG framework, add embedding model initialization in the same file. Use ollama, model name defined in the .env file: OLLAMA_EMBEDDING_MODEL. Ollama available by the default local port: 11434. +- [x] Add strategy of creating collection for this project (name: "documents_llamaindex"), if it does not exist. +- [x] Just in case add possibility to connect via openai embedding, using openrouter api key. Comment this section, so it can be used in the future. # Phase 4 (creating module for loading documents from the folder) diff --git a/services/rag/llamaindex/QWEN.md b/services/rag/llamaindex/QWEN.md index 4ac932a..9d1124d 100644 --- a/services/rag/llamaindex/QWEN.md +++ b/services/rag/llamaindex/QWEN.md @@ -84,10 +84,11 @@ This is a Retrieval Augmented Generation (RAG) solution built using LlamaIndex a - [x] Required loader libraries installation ### Phase 3: Vector Storage Setup -- [ ] Qdrant library installation -- [ ] Vector storage initialization module -- [ ] Embedding model configuration with Ollama -- [ ] Collection creation strategy +- [x] Qdrant library installation +- [x] Vector storage initialization module +- [x] Collection creation strategy for "documents_llamaindex" +- [x] Ollama embedding model configuration +- [x] Optional OpenAI embedding via OpenRouter (commented) ### Phase 4: Document Enrichment - [ ] Document loading module with appropriate loaders @@ -109,7 +110,7 @@ This is a Retrieval Augmented Generation (RAG) solution built using LlamaIndex a llamaindex/ ├── venv/ # Python virtual environment ├── cli.py # CLI entry point -├── vector_storage.py # Vector storage configuration (to be created) +├── vector_storage.py # Vector storage configuration ├── enrichment.py # Document loading and processing (to be created) ├── retrieval.py # Search and retrieval functionality (to be created) ├── agent.py # Chat agent implementation (to be created) diff --git a/services/rag/llamaindex/vector_storage.py b/services/rag/llamaindex/vector_storage.py new file mode 100644 index 0000000..3b513aa --- /dev/null +++ b/services/rag/llamaindex/vector_storage.py @@ -0,0 +1,129 @@ +""" +Vector storage configuration for the RAG solution using LlamaIndex and Qdrant. + +This module provides initialization and configuration for: +- Qdrant vector storage connection +- Ollama embedding model +- Automatic collection creation +""" + +import os +from typing import Optional +from llama_index.core import VectorStoreIndex +from llama_index.vector_stores.qdrant import QdrantVectorStore +from llama_index.embeddings.ollama import OllamaEmbedding +from llama_index.llms.ollama import Ollama +from qdrant_client import QdrantClient +from loguru import logger + + +def initialize_vector_storage( + collection_name: str = "documents_llamaindex", + host: str = "localhost", + port: int = 6333, + grpc_port: int = 6334, + ollama_base_url: str = "http://localhost:11434", + ollama_embed_model: Optional[str] = None +) -> tuple[QdrantVectorStore, VectorStoreIndex]: + """ + Initialize Qdrant vector storage with Ollama embeddings. + + Args: + collection_name: Name of the Qdrant collection + host: Qdrant host address + port: Qdrant REST API port + grpc_port: Qdrant gRPC API port + ollama_base_url: Base URL for Ollama API + ollama_embed_model: Name of the Ollama embedding model + + Returns: + Tuple of (QdrantVectorStore, VectorStoreIndex) + """ + logger.info(f"Initializing vector storage with collection: {collection_name}") + + # Get embedding model from environment if not provided + if ollama_embed_model is None: + ollama_embed_model = os.getenv("OLLAMA_EMBEDDING_MODEL", "nomic-embed-text") + + logger.info(f"Using Ollama embedding model: {ollama_embed_model}") + + try: + # Initialize Qdrant client + client = QdrantClient(host=host, port=port) + + # Check if collection exists, create if it doesn't + collections = client.get_collections().collections + collection_names = [coll.name for coll in collections] + + if collection_name not in collection_names: + logger.info(f"Collection '{collection_name}' does not exist, creating...") + client.create_collection( + collection_name=collection_name, + vectors_config={ + "size": 4096, # Default size for most embedding models + "distance": "Cosine" # Cosine distance is commonly used + } + ) + logger.info(f"Collection '{collection_name}' created successfully") + else: + logger.info(f"Collection '{collection_name}' already exists") + + # Initialize the Qdrant vector store + vector_store = QdrantVectorStore( + client=client, + collection_name=collection_name + ) + + # Initialize Ollama embedding + embed_model = OllamaEmbedding( + model_name=ollama_embed_model, + base_url=ollama_base_url + ) + + # Create index from vector store with the embedding model + index = VectorStoreIndex.from_vector_store( + vector_store=vector_store, + embed_model=embed_model + ) + + logger.info("Vector storage initialized successfully") + return vector_store, index + + except Exception as e: + logger.error(f"Failed to initialize vector storage: {str(e)}") + raise + + +# Optional: Alternative embedding configuration using OpenAI via OpenRouter +# Uncomment and configure as needed for future use +# from llama_index.embeddings.openai import OpenAIEmbedding +# +# def initialize_openai_embeddings(): +# # Use OpenRouter API key from environment +# os.environ["OPENAI_API_KEY"] = os.getenv("OPENROUTER_API_KEY", "") +# +# embed_model = OpenAIEmbedding( +# model="openai/text-embedding-3-small", # Or another suitable model +# api_base="https://openrouter.ai/api/v1" # OpenRouter endpoint +# ) +# return embed_model + + +def get_vector_store_and_index() -> tuple[QdrantVectorStore, VectorStoreIndex]: + """ + Convenience function to get the initialized vector store and index. + + Returns: + Tuple of (QdrantVectorStore, VectorStoreIndex) + """ + return initialize_vector_storage() + + +if __name__ == "__main__": + # Example usage + logger.info("Testing vector storage initialization...") + try: + vector_store, index = get_vector_store_and_index() + logger.info("Vector storage test successful!") + except Exception as e: + logger.error(f"Vector storage test failed: {e}")