diff --git a/services/rag/langchain/QWEN.md b/services/rag/langchain/QWEN.md index 60e89aa..ef78e2f 100644 --- a/services/rag/langchain/QWEN.md +++ b/services/rag/langchain/QWEN.md @@ -105,12 +105,19 @@ The project is organized into 8 development phases as outlined in `PLANNING.md`: The project uses environment variables for configuration: ```env +# Embedding configuration OLLAMA_EMBEDDING_MODEL=MODEL # Name of the Ollama model for embeddings +OPENAI_EMBEDDING_MODEL=MODEL # Name of the OpenAI model for embeddings (default: text-embedding-ada-002) +OPENAI_EMBEDDING_BASE_URL=URL # OpenAI-compatible API URL for embeddings +OPENAI_EMBEDDING_API_KEY=KEY # API key for OpenAI-compatible embedding service +EMBEDDING_STRATEGY=ollama # Strategy to use for embeddings: "ollama" (default) or "openai" + +# Chat model configuration OLLAMA_CHAT_MODEL=MODEL # Name of the Ollama model for chat -OPENAI_CHAT_URL=URL # OpenAI-compatible API URL -OPENAI_CHAT_KEY=KEY # Authorization token for OpenAI-compatible API -OPENAI_CHAT_MODEL=MODEL # Name of the OpenAI-compatible model to use -CHAT_MODEL_STRATEGY=ollama # Strategy to use: "ollama" (default) or "openai" +OPENAI_CHAT_URL=URL # OpenAI-compatible API URL for chat +OPENAI_CHAT_KEY=KEY # Authorization token for OpenAI-compatible API for chat +OPENAI_CHAT_MODEL=MODEL # Name of the OpenAI-compatible model to use for chat +CHAT_MODEL_STRATEGY=ollama # Strategy to use for chat: "ollama" (default) or "openai" ``` ## Building and Running @@ -234,4 +241,6 @@ The project is in early development phase. The virtual environment is set up and ### Troubleshooting Notes - If encountering "No module named 'unstructured_inference'" error, install unstructured-inference - If seeing OCR-related errors, ensure tesseract is installed at the system level and unstructured-pytesseract is available -- For language detection issues, verify that appropriate spaCy models are downloaded \ No newline at end of file +- For language detection issues, verify that appropriate spaCy models are downloaded +- If getting Ollama connection errors when using OpenAI strategy, ensure EMBEDDING_STRATEGY is set correctly in .env +- When deploying without Ollama, set both CHAT_MODEL_STRATEGY and EMBEDDING_STRATEGY to "openai" in your .env file \ No newline at end of file diff --git a/services/rag/langchain/vector_storage.py b/services/rag/langchain/vector_storage.py index 45868f3..fccd8b6 100644 --- a/services/rag/langchain/vector_storage.py +++ b/services/rag/langchain/vector_storage.py @@ -1,4 +1,4 @@ -"""Vector storage module using Qdrant and Ollama embeddings for the RAG solution.""" +"""Vector storage module using Qdrant and configurable embeddings for the RAG solution.""" import os from typing import Optional @@ -7,6 +7,7 @@ from dotenv import load_dotenv from langchain_qdrant import QdrantVectorStore from langchain_core.documents import Document from langchain_ollama import OllamaEmbeddings +from langchain_openai import OpenAIEmbeddings from qdrant_client import QdrantClient # Load environment variables @@ -17,15 +18,19 @@ QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost") QDRANT_REST_PORT = int(os.getenv("QDRANT_REST_PORT", 6333)) QDRANT_GRPC_PORT = int(os.getenv("QDRANT_GRPC_PORT", 6334)) -# Ollama embedding model configuration +# Embedding model configuration +EMBEDDING_STRATEGY = os.getenv("EMBEDDING_STRATEGY", "ollama").lower() OLLAMA_EMBEDDING_MODEL = os.getenv("OLLAMA_EMBEDDING_MODEL", "nomic-embed-text") +OPENAI_EMBEDDING_MODEL = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-ada-002") +OPENAI_EMBEDDING_BASE_URL = os.getenv("OPENAI_EMBEDDING_BASE_URL") +OPENAI_EMBEDDING_API_KEY = os.getenv("OPENAI_EMBEDDING_API_KEY") def initialize_vector_store( collection_name: str = "documents_langchain", recreate_collection: bool = False ) -> QdrantVectorStore: """ - Initialize and return a Qdrant vector store with Ollama embeddings. + Initialize and return a Qdrant vector store with configurable embeddings. Args: collection_name: Name of the Qdrant collection to use @@ -34,11 +39,24 @@ def initialize_vector_store( Returns: Initialized Qdrant vector store """ - # Initialize Ollama embeddings - embeddings = OllamaEmbeddings( - model=OLLAMA_EMBEDDING_MODEL, - base_url="http://localhost:11434", # Default Ollama URL - ) + # Determine which embedding strategy to use + if EMBEDDING_STRATEGY == "openai": + # Validate required OpenAI embedding variables + if not OPENAI_EMBEDDING_API_KEY or not OPENAI_EMBEDDING_BASE_URL: + raise ValueError("OPENAI_EMBEDDING_API_KEY and OPENAI_EMBEDDING_BASE_URL must be set when using OpenAI embedding strategy") + + # Initialize OpenAI embeddings + embeddings = OpenAIEmbeddings( + model=OPENAI_EMBEDDING_MODEL, + openai_api_base=OPENAI_EMBEDDING_BASE_URL, + openai_api_key=OPENAI_EMBEDDING_API_KEY, + ) + else: # Default to ollama + # Initialize Ollama embeddings + embeddings = OllamaEmbeddings( + model=OLLAMA_EMBEDDING_MODEL, + base_url="http://localhost:11434", # Default Ollama URL + ) # Check if collection exists and create if needed client = QdrantClient(