From bf3a3735cb92d53fe0008bd46e0c61da88f2969f Mon Sep 17 00:00:00 2001 From: idchlife Date: Wed, 4 Feb 2026 22:30:57 +0300 Subject: [PATCH] openai compatible integration done --- services/rag/langchain/.env.dist | 3 + services/rag/langchain/PLANNING.md | 6 +- services/rag/langchain/QWEN.md | 31 ++++-- services/rag/langchain/agent.py | 125 ++++++++++++++++-------- services/rag/langchain/requirements.txt | 3 + 5 files changed, 116 insertions(+), 52 deletions(-) diff --git a/services/rag/langchain/.env.dist b/services/rag/langchain/.env.dist index f1e7672..c894d57 100644 --- a/services/rag/langchain/.env.dist +++ b/services/rag/langchain/.env.dist @@ -1,2 +1,5 @@ OLLAMA_EMBEDDING_MODEL=MODEL OLLAMA_CHAT_MODEL=MODEL +OPENAI_CHAT_URL=URL +OPENAI_CHAT_KEY=KEY +CHAT_MODEL_STRATEGY=ollama diff --git a/services/rag/langchain/PLANNING.md b/services/rag/langchain/PLANNING.md index 75f1297..f8d53bf 100644 --- a/services/rag/langchain/PLANNING.md +++ b/services/rag/langchain/PLANNING.md @@ -41,10 +41,10 @@ Chosen data folder: relatve ./../../../data - from the current folder - [x] Integrate this agent with the cli, as command to start chatting with the agent. If there is a built-in solution for console communication with the agent, initiate this on cli command. # Phase 7 (openai integration for chat model) -- [ ] Create openai integration, with using .env variables `OPENAI_CHAT_URL`, `OPENAI_CHAT_KEY`. First one for openai compatible URL, second one for Authorization Bearer token. -- [ ] Make this integration optional, with using .env variable `CHAT_MODEL_STRATEGY`. There can be 2 options: "ollama", "openai". Ollama currently already done and working, so we should write code for checking which option is chosen in .env, with ollama being the default. +- [x] Create openai integration, with using .env variables `OPENAI_CHAT_URL`, `OPENAI_CHAT_KEY`. First one for openai compatible URL, second one for Authorization Bearer token. +- [x] Make this integration optional, with using .env variable `CHAT_MODEL_STRATEGY`. There can be 2 options: "ollama", "openai". Ollama currently already done and working, so we should write code for checking which option is chosen in .env, with ollama being the default. # Phase 8 (http endpoint to retrieve data from the vector storage by query) -- [ ] Create file `server.py`, with web framework +- [ ] Create file `server.py`, with web framework fastapi, for example - [ ] Add POST endpoint "/api/test-query" which will use agent, and retrieve response for query, sent in JSON format, field "query" diff --git a/services/rag/langchain/QWEN.md b/services/rag/langchain/QWEN.md index fe10c1d..e0d0cff 100644 --- a/services/rag/langchain/QWEN.md +++ b/services/rag/langchain/QWEN.md @@ -2,7 +2,7 @@ ## Project Overview -This is a Retrieval-Augmented Generation (RAG) solution built using the Langchain framework. The project is designed to load documents from a data directory, store them in a vector database (Qdrant), and enable semantic search and chat capabilities using local LLMs via Ollama. +This is a Retrieval-Augmented Generation (RAG) solution built using the Langchain framework. The project is designed to load documents from a data directory, store them in a vector database (Qdrant), and enable semantic search and chat capabilities using local LLMs via Ollama or OpenAI-compatible APIs. The project follows a phased development approach with CLI entry points for different functionalities like document loading, retrieval, and chat. @@ -10,7 +10,7 @@ The project follows a phased development approach with CLI entry points for diff - **Framework**: Langchain - **Vector Storage**: Qdrant - **Embeddings**: Ollama (with fallback option for OpenAI via OpenRouter) -- **Chat Models**: Ollama +- **Chat Models**: Ollama and OpenAI-compatible APIs - **Data Directory**: `./../../../data` (relative to project root) - **Virtual Environment**: Python venv in `venv/` directory @@ -35,7 +35,7 @@ rag-solution/services/rag/langchain/ ## Dependencies The project relies on several key libraries: -- `langchain` and related ecosystem (`langchain-community`, `langchain-core`, `langchain-ollama`) +- `langchain` and related ecosystem (`langchain-community`, `langchain-core`, `langchain-ollama`, `langchain-openai`) - `langgraph` for workflow management - `qdrant-client` for vector storage (to be installed) - `ollama` for local LLM interaction @@ -45,7 +45,7 @@ The project relies on several key libraries: ## Development Phases -The project is organized into 6 development phases as outlined in `PLANNING.md`: +The project is organized into 8 development phases as outlined in `PLANNING.md`: ### Phase 1: CLI Entrypoint - [x] Virtual environment setup @@ -79,13 +79,25 @@ The project is organized into 6 development phases as outlined in `PLANNING.md`: - [x] Integrate with retrieval functionality - [x] Add CLI command for chat interaction +### Phase 7: OpenAI Integration for Chat Model +- [x] Create OpenAI-compatible integration using `.env` variables `OPENAI_CHAT_URL` and `OPENAI_CHAT_KEY` +- [x] Make this integration optional using `.env` variable `CHAT_MODEL_STRATEGY` with "ollama" as default +- [x] Allow switching between "ollama" and "openai" strategies + +### Phase 8: HTTP Endpoint +- [ ] Create web framework with POST endpoint `/api/test-query` for agent queries + ## Environment Configuration The project uses environment variables for configuration: ```env -OLLAMA_EMBEDDING_MODEL=MODEL # Name of the Ollama model for embeddings -OLLAMA_CHAT_MODEL=MODEL # Name of the Ollama model for chat +OLLAMA_EMBEDDING_MODEL=MODEL # Name of the Ollama model for embeddings +OLLAMA_CHAT_MODEL=MODEL # Name of the Ollama model for chat +OPENAI_CHAT_URL=URL # OpenAI-compatible API URL +OPENAI_CHAT_KEY=KEY # Authorization token for OpenAI-compatible API +OPENAI_CHAT_MODEL=MODEL # Name of the OpenAI-compatible model to use +CHAT_MODEL_STRATEGY=ollama # Strategy to use: "ollama" (default) or "openai" ``` ## Building and Running @@ -176,6 +188,13 @@ The project is in early development phase. The virtual environment is set up and - Agent uses document retrieval tool to fetch relevant information based on user queries - Implemented proper error handling and conversation history management +### Phase 7 Implementation Notes +- Enhanced `agent.py` to support both Ollama and OpenAI-compatible chat models +- Added conditional logic to select chat model based on `CHAT_MODEL_STRATEGY` environment variable +- When strategy is "openai", uses `ChatOpenAI` with `OPENAI_CHAT_URL` and `OPENAI_CHAT_KEY` from environment +- When strategy is "ollama" (default), uses existing `ChatOllama` implementation +- Updated CLI chat command to show which model strategy is being used + ### Issue Fix Notes - Fixed DocumentRetrievalTool class to properly declare and initialize the retriever field - Resolved Pydantic field declaration issue that caused "object has no field" error diff --git a/services/rag/langchain/agent.py b/services/rag/langchain/agent.py index 929b585..78328dd 100644 --- a/services/rag/langchain/agent.py +++ b/services/rag/langchain/agent.py @@ -8,6 +8,7 @@ from langchain_core.messages import HumanMessage, AIMessage, BaseMessage from langchain_core.agents import AgentFinish from langgraph.prebuilt import create_react_agent from langchain_ollama import ChatOllama +from langchain_openai import ChatOpenAI from langchain_core.prompts import ChatPromptTemplate from loguru import logger @@ -69,36 +70,64 @@ def create_chat_agent( ) -> Any: """ Create a chat agent with document retrieval capabilities. - + Args: collection_name: Name of the Qdrant collection to use - llm_model: Name of the Ollama model to use (defaults to OLLAMA_CHAT_MODEL env var) - + llm_model: Name of the model to use (defaults to environment variable based on strategy) + Returns: Configured chat agent """ logger.info("Creating chat agent with document retrieval capabilities") - - # Get the model name from environment if not provided - if llm_model is None: - llm_model = os.getenv("OLLAMA_CHAT_MODEL", "llama3.1") - - # Initialize the Ollama chat model - llm = ChatOllama( - model=llm_model, - base_url="http://localhost:11434", # Default Ollama URL - temperature=0.1, - ) - + + # Determine which model strategy to use + chat_model_strategy = os.getenv("CHAT_MODEL_STRATEGY", "ollama").lower() + + if chat_model_strategy == "openai": + # Use OpenAI-compatible API + openai_chat_url = os.getenv("OPENAI_CHAT_URL") + openai_chat_key = os.getenv("OPENAI_CHAT_KEY") + + if not openai_chat_url or not openai_chat_key: + raise ValueError("OPENAI_CHAT_URL and OPENAI_CHAT_KEY must be set when using OpenAI strategy") + + # Get the model name from environment if not provided + if llm_model is None: + llm_model = os.getenv("OPENAI_CHAT_MODEL", "gpt-3.5-turbo") # Default to a common model + + # Initialize the OpenAI-compatible chat model + llm = ChatOpenAI( + model=llm_model, + openai_api_base=openai_chat_url, + openai_api_key=openai_chat_key, + temperature=0.1, + ) + + logger.info(f"Using OpenAI-compatible model: {llm_model} via {openai_chat_url}") + else: # Default to ollama + # Use Ollama + # Get the model name from environment if not provided + if llm_model is None: + llm_model = os.getenv("OLLAMA_CHAT_MODEL", "llama3.1") + + # Initialize the Ollama chat model + llm = ChatOllama( + model=llm_model, + base_url="http://localhost:11434", # Default Ollama URL + temperature=0.1, + ) + + logger.info(f"Using Ollama model: {llm_model}") + # Create the document retrieval tool retrieval_tool = DocumentRetrievalTool() - + # Create the agent with the LLM and tools tools = [retrieval_tool] agent = create_react_agent(llm, tools) - + logger.info("Chat agent created successfully") - + return agent @@ -110,47 +139,47 @@ def chat_with_agent( ) -> Dict[str, Any]: """ Chat with the agent and get a response based on the query and document retrieval. - + Args: query: The user's query collection_name: Name of the Qdrant collection to use - llm_model: Name of the Ollama model to use + llm_model: Name of the model to use (defaults to environment variable based on strategy) history: Conversation history (list of messages) - + Returns: Dictionary containing the agent's response and metadata """ logger.info(f"Starting chat with query: {query}") - + # Create the agent agent = create_chat_agent(collection_name, llm_model) - + # Prepare the input for the agent if history is None: history = [] - + # Add the user's query to the history history.append(HumanMessage(content=query)) - + # Prepare the input for the agent executor agent_input = { "messages": history } - + try: # Invoke the agent result = agent.invoke(agent_input) - + # Extract the agent's response messages = result.get("messages", []) ai_message = None - + # Find the AI message in the results for msg in reversed(messages): if isinstance(msg, AIMessage): ai_message = msg break - + if ai_message is None: # If no AI message was found, return the last message content if messages: @@ -160,7 +189,7 @@ def chat_with_agent( response_content = "I couldn't generate a response to your query." else: response_content = ai_message.content - + # Create the response dictionary response = { "response": response_content, @@ -168,10 +197,10 @@ def chat_with_agent( "history": messages, # Return updated history "success": True } - + logger.info("Chat completed successfully") return response - + except Exception as e: logger.error(f"Error during chat: {str(e)}") return { @@ -188,29 +217,39 @@ def run_chat_loop( ): """ Run an interactive chat loop with the agent. - + Args: collection_name: Name of the Qdrant collection to use - llm_model: Name of the Ollama model to use + llm_model: Name of the model to use (defaults to environment variable based on strategy) """ logger.info("Starting interactive chat loop") - print("Chat Agent initialized. Type 'quit' or 'exit' to end the conversation.\n") - + + # Determine which model strategy is being used and inform the user + chat_model_strategy = os.getenv("CHAT_MODEL_STRATEGY", "ollama").lower() + if chat_model_strategy == "openai": + model_info = os.getenv("OPENAI_CHAT_MODEL", "gpt-3.5-turbo") + print(f"Chat Agent initialized with OpenAI-compatible model: {model_info}") + else: + model_info = os.getenv("OLLAMA_CHAT_MODEL", "llama3.1") + print(f"Chat Agent initialized with Ollama model: {model_info}") + + print("Type 'quit' or 'exit' to end the conversation.\n") + history = [] - + while True: try: # Get user input user_input = input("You: ").strip() - + # Check for exit commands if user_input.lower() in ['quit', 'exit', 'q']: print("Ending chat session. Goodbye!") break - + if not user_input: continue - + # Get response from the agent response_data = chat_with_agent( query=user_input, @@ -218,13 +257,13 @@ def run_chat_loop( llm_model=llm_model, history=history ) - + # Update history with the new messages history = response_data.get("history", []) - + # Print the agent's response print(f"Agent: {response_data.get('response', 'No response generated')}\n") - + except KeyboardInterrupt: print("\nEnding chat session. Goodbye!") break diff --git a/services/rag/langchain/requirements.txt b/services/rag/langchain/requirements.txt index ad58432..1c8a307 100644 --- a/services/rag/langchain/requirements.txt +++ b/services/rag/langchain/requirements.txt @@ -28,7 +28,10 @@ langgraph==1.0.5 langgraph-checkpoint==3.0.1 langgraph-prebuilt==1.0.5 langgraph-sdk==0.3.1 +langgraph-tools==1.0.20 langsmith==0.5.2 +langserve==0.3.0 +langchain-openai==0.2.0 marshmallow==3.26.2 multidict==6.7.0 mypy_extensions==1.1.0