From bf3a3735cb92d53fe0008bd46e0c61da88f2969f Mon Sep 17 00:00:00 2001
From: idchlife <idchlife@gmail.com>
Date: Wed, 4 Feb 2026 22:30:57 +0300
Subject: [PATCH] openai compatible integration done

---
 services/rag/langchain/.env.dist        |   3 +
 services/rag/langchain/PLANNING.md      |   6 +-
 services/rag/langchain/QWEN.md          |  31 ++++--
 services/rag/langchain/agent.py         | 125 ++++++++++++++++--------
 services/rag/langchain/requirements.txt |   3 +
 5 files changed, 116 insertions(+), 52 deletions(-)

diff --git a/services/rag/langchain/.env.dist b/services/rag/langchain/.env.dist
index f1e7672..c894d57 100644
--- a/services/rag/langchain/.env.dist
+++ b/services/rag/langchain/.env.dist
@@ -1,2 +1,5 @@
 OLLAMA_EMBEDDING_MODEL=MODEL
 OLLAMA_CHAT_MODEL=MODEL
+OPENAI_CHAT_URL=URL
+OPENAI_CHAT_KEY=KEY
+CHAT_MODEL_STRATEGY=ollama
diff --git a/services/rag/langchain/PLANNING.md b/services/rag/langchain/PLANNING.md
index 75f1297..f8d53bf 100644
--- a/services/rag/langchain/PLANNING.md
+++ b/services/rag/langchain/PLANNING.md
@@ -41,10 +41,10 @@ Chosen data folder: relatve ./../../../data - from the current folder
 - [x] Integrate this agent with the cli, as command to start chatting with the agent. If there is a built-in solution for console communication with the agent, initiate this on cli command.
 
 # Phase 7 (openai integration for chat model)
-- [ ] Create openai integration, with using .env variables `OPENAI_CHAT_URL`, `OPENAI_CHAT_KEY`. First one for openai compatible URL, second one for Authorization Bearer token.
-- [ ] Make this integration optional, with using .env variable `CHAT_MODEL_STRATEGY`. There can be 2 options: "ollama", "openai". Ollama currently already done and working, so we should write code for checking which option is chosen in .env, with ollama being the default.
+- [x] Create openai integration, with using .env variables `OPENAI_CHAT_URL`, `OPENAI_CHAT_KEY`. First one for openai compatible URL, second one for Authorization Bearer token.
+- [x] Make this integration optional, with using .env variable `CHAT_MODEL_STRATEGY`. There can be 2 options: "ollama", "openai". Ollama currently already done and working, so we should write code for checking which option is chosen in .env, with ollama being the default.
 
 # Phase 8 (http endpoint to retrieve data from the vector storage by query)
 
-- [ ] Create file `server.py`, with web framework
+- [ ] Create file `server.py`, with web framework fastapi, for example
 - [ ] Add POST endpoint "/api/test-query" which will use agent, and retrieve response for query, sent in JSON format, field "query"
diff --git a/services/rag/langchain/QWEN.md b/services/rag/langchain/QWEN.md
index fe10c1d..e0d0cff 100644
--- a/services/rag/langchain/QWEN.md
+++ b/services/rag/langchain/QWEN.md
@@ -2,7 +2,7 @@
 
 ## Project Overview
 
-This is a Retrieval-Augmented Generation (RAG) solution built using the Langchain framework. The project is designed to load documents from a data directory, store them in a vector database (Qdrant), and enable semantic search and chat capabilities using local LLMs via Ollama.
+This is a Retrieval-Augmented Generation (RAG) solution built using the Langchain framework. The project is designed to load documents from a data directory, store them in a vector database (Qdrant), and enable semantic search and chat capabilities using local LLMs via Ollama or OpenAI-compatible APIs.
 
 The project follows a phased development approach with CLI entry points for different functionalities like document loading, retrieval, and chat.
 
@@ -10,7 +10,7 @@ The project follows a phased development approach with CLI entry points for diff
 - **Framework**: Langchain
 - **Vector Storage**: Qdrant
 - **Embeddings**: Ollama (with fallback option for OpenAI via OpenRouter)
-- **Chat Models**: Ollama
+- **Chat Models**: Ollama and OpenAI-compatible APIs
 - **Data Directory**: `./../../../data` (relative to project root)
 - **Virtual Environment**: Python venv in `venv/` directory
 
@@ -35,7 +35,7 @@ rag-solution/services/rag/langchain/
 ## Dependencies
 
 The project relies on several key libraries:
-- `langchain` and related ecosystem (`langchain-community`, `langchain-core`, `langchain-ollama`)
+- `langchain` and related ecosystem (`langchain-community`, `langchain-core`, `langchain-ollama`, `langchain-openai`)
 - `langgraph` for workflow management
 - `qdrant-client` for vector storage (to be installed)
 - `ollama` for local LLM interaction
@@ -45,7 +45,7 @@ The project relies on several key libraries:
 
 ## Development Phases
 
-The project is organized into 6 development phases as outlined in `PLANNING.md`:
+The project is organized into 8 development phases as outlined in `PLANNING.md`:
 
 ### Phase 1: CLI Entrypoint
 - [x] Virtual environment setup
@@ -79,13 +79,25 @@ The project is organized into 6 development phases as outlined in `PLANNING.md`:
 - [x] Integrate with retrieval functionality
 - [x] Add CLI command for chat interaction
 
+### Phase 7: OpenAI Integration for Chat Model
+- [x] Create OpenAI-compatible integration using `.env` variables `OPENAI_CHAT_URL` and `OPENAI_CHAT_KEY`
+- [x] Make this integration optional using `.env` variable `CHAT_MODEL_STRATEGY` with "ollama" as default
+- [x] Allow switching between "ollama" and "openai" strategies
+
+### Phase 8: HTTP Endpoint
+- [ ] Create web framework with POST endpoint `/api/test-query` for agent queries
+
 ## Environment Configuration
 
 The project uses environment variables for configuration:
 
 ```env
-OLLAMA_EMBEDDING_MODEL=MODEL  # Name of the Ollama model for embeddings
-OLLAMA_CHAT_MODEL=MODEL       # Name of the Ollama model for chat
+OLLAMA_EMBEDDING_MODEL=MODEL      # Name of the Ollama model for embeddings
+OLLAMA_CHAT_MODEL=MODEL           # Name of the Ollama model for chat
+OPENAI_CHAT_URL=URL               # OpenAI-compatible API URL
+OPENAI_CHAT_KEY=KEY               # Authorization token for OpenAI-compatible API
+OPENAI_CHAT_MODEL=MODEL           # Name of the OpenAI-compatible model to use
+CHAT_MODEL_STRATEGY=ollama        # Strategy to use: "ollama" (default) or "openai"
 ```
 
 ## Building and Running
@@ -176,6 +188,13 @@ The project is in early development phase. The virtual environment is set up and
 - Agent uses document retrieval tool to fetch relevant information based on user queries
 - Implemented proper error handling and conversation history management
 
+### Phase 7 Implementation Notes
+- Enhanced `agent.py` to support both Ollama and OpenAI-compatible chat models
+- Added conditional logic to select chat model based on `CHAT_MODEL_STRATEGY` environment variable
+- When strategy is "openai", uses `ChatOpenAI` with `OPENAI_CHAT_URL` and `OPENAI_CHAT_KEY` from environment
+- When strategy is "ollama" (default), uses existing `ChatOllama` implementation
+- Updated CLI chat command to show which model strategy is being used
+
 ### Issue Fix Notes
 - Fixed DocumentRetrievalTool class to properly declare and initialize the retriever field
 - Resolved Pydantic field declaration issue that caused "object has no field" error
diff --git a/services/rag/langchain/agent.py b/services/rag/langchain/agent.py
index 929b585..78328dd 100644
--- a/services/rag/langchain/agent.py
+++ b/services/rag/langchain/agent.py
@@ -8,6 +8,7 @@ from langchain_core.messages import HumanMessage, AIMessage, BaseMessage
 from langchain_core.agents import AgentFinish
 from langgraph.prebuilt import create_react_agent
 from langchain_ollama import ChatOllama
+from langchain_openai import ChatOpenAI
 from langchain_core.prompts import ChatPromptTemplate
 from loguru import logger
 
@@ -69,36 +70,64 @@ def create_chat_agent(
 ) -> Any:
     """
     Create a chat agent with document retrieval capabilities.
-    
+
     Args:
         collection_name: Name of the Qdrant collection to use
-        llm_model: Name of the Ollama model to use (defaults to OLLAMA_CHAT_MODEL env var)
-        
+        llm_model: Name of the model to use (defaults to environment variable based on strategy)
+
     Returns:
         Configured chat agent
     """
     logger.info("Creating chat agent with document retrieval capabilities")
-    
-    # Get the model name from environment if not provided
-    if llm_model is None:
-        llm_model = os.getenv("OLLAMA_CHAT_MODEL", "llama3.1")
-    
-    # Initialize the Ollama chat model
-    llm = ChatOllama(
-        model=llm_model,
-        base_url="http://localhost:11434",  # Default Ollama URL
-        temperature=0.1,
-    )
-    
+
+    # Determine which model strategy to use
+    chat_model_strategy = os.getenv("CHAT_MODEL_STRATEGY", "ollama").lower()
+
+    if chat_model_strategy == "openai":
+        # Use OpenAI-compatible API
+        openai_chat_url = os.getenv("OPENAI_CHAT_URL")
+        openai_chat_key = os.getenv("OPENAI_CHAT_KEY")
+
+        if not openai_chat_url or not openai_chat_key:
+            raise ValueError("OPENAI_CHAT_URL and OPENAI_CHAT_KEY must be set when using OpenAI strategy")
+
+        # Get the model name from environment if not provided
+        if llm_model is None:
+            llm_model = os.getenv("OPENAI_CHAT_MODEL", "gpt-3.5-turbo")  # Default to a common model
+
+        # Initialize the OpenAI-compatible chat model
+        llm = ChatOpenAI(
+            model=llm_model,
+            openai_api_base=openai_chat_url,
+            openai_api_key=openai_chat_key,
+            temperature=0.1,
+        )
+
+        logger.info(f"Using OpenAI-compatible model: {llm_model} via {openai_chat_url}")
+    else:  # Default to ollama
+        # Use Ollama
+        # Get the model name from environment if not provided
+        if llm_model is None:
+            llm_model = os.getenv("OLLAMA_CHAT_MODEL", "llama3.1")
+
+        # Initialize the Ollama chat model
+        llm = ChatOllama(
+            model=llm_model,
+            base_url="http://localhost:11434",  # Default Ollama URL
+            temperature=0.1,
+        )
+
+        logger.info(f"Using Ollama model: {llm_model}")
+
     # Create the document retrieval tool
     retrieval_tool = DocumentRetrievalTool()
-    
+
     # Create the agent with the LLM and tools
     tools = [retrieval_tool]
     agent = create_react_agent(llm, tools)
-    
+
     logger.info("Chat agent created successfully")
-    
+
     return agent
 
 
@@ -110,47 +139,47 @@ def chat_with_agent(
 ) -> Dict[str, Any]:
     """
     Chat with the agent and get a response based on the query and document retrieval.
-    
+
     Args:
         query: The user's query
         collection_name: Name of the Qdrant collection to use
-        llm_model: Name of the Ollama model to use
+        llm_model: Name of the model to use (defaults to environment variable based on strategy)
         history: Conversation history (list of messages)
-        
+
     Returns:
         Dictionary containing the agent's response and metadata
     """
     logger.info(f"Starting chat with query: {query}")
-    
+
     # Create the agent
     agent = create_chat_agent(collection_name, llm_model)
-    
+
     # Prepare the input for the agent
     if history is None:
         history = []
-    
+
     # Add the user's query to the history
     history.append(HumanMessage(content=query))
-    
+
     # Prepare the input for the agent executor
     agent_input = {
         "messages": history
     }
-    
+
     try:
         # Invoke the agent
         result = agent.invoke(agent_input)
-        
+
         # Extract the agent's response
         messages = result.get("messages", [])
         ai_message = None
-        
+
         # Find the AI message in the results
         for msg in reversed(messages):
             if isinstance(msg, AIMessage):
                 ai_message = msg
                 break
-        
+
         if ai_message is None:
             # If no AI message was found, return the last message content
             if messages:
@@ -160,7 +189,7 @@ def chat_with_agent(
                 response_content = "I couldn't generate a response to your query."
         else:
             response_content = ai_message.content
-        
+
         # Create the response dictionary
         response = {
             "response": response_content,
@@ -168,10 +197,10 @@ def chat_with_agent(
             "history": messages,  # Return updated history
             "success": True
         }
-        
+
         logger.info("Chat completed successfully")
         return response
-        
+
     except Exception as e:
         logger.error(f"Error during chat: {str(e)}")
         return {
@@ -188,29 +217,39 @@ def run_chat_loop(
 ):
     """
     Run an interactive chat loop with the agent.
-    
+
     Args:
         collection_name: Name of the Qdrant collection to use
-        llm_model: Name of the Ollama model to use
+        llm_model: Name of the model to use (defaults to environment variable based on strategy)
     """
     logger.info("Starting interactive chat loop")
-    print("Chat Agent initialized. Type 'quit' or 'exit' to end the conversation.\n")
-    
+
+    # Determine which model strategy is being used and inform the user
+    chat_model_strategy = os.getenv("CHAT_MODEL_STRATEGY", "ollama").lower()
+    if chat_model_strategy == "openai":
+        model_info = os.getenv("OPENAI_CHAT_MODEL", "gpt-3.5-turbo")
+        print(f"Chat Agent initialized with OpenAI-compatible model: {model_info}")
+    else:
+        model_info = os.getenv("OLLAMA_CHAT_MODEL", "llama3.1")
+        print(f"Chat Agent initialized with Ollama model: {model_info}")
+
+    print("Type 'quit' or 'exit' to end the conversation.\n")
+
     history = []
-    
+
     while True:
         try:
             # Get user input
             user_input = input("You: ").strip()
-            
+
             # Check for exit commands
             if user_input.lower() in ['quit', 'exit', 'q']:
                 print("Ending chat session. Goodbye!")
                 break
-            
+
             if not user_input:
                 continue
-            
+
             # Get response from the agent
             response_data = chat_with_agent(
                 query=user_input,
@@ -218,13 +257,13 @@ def run_chat_loop(
                 llm_model=llm_model,
                 history=history
             )
-            
+
             # Update history with the new messages
             history = response_data.get("history", [])
-            
+
             # Print the agent's response
             print(f"Agent: {response_data.get('response', 'No response generated')}\n")
-            
+
         except KeyboardInterrupt:
             print("\nEnding chat session. Goodbye!")
             break
diff --git a/services/rag/langchain/requirements.txt b/services/rag/langchain/requirements.txt
index ad58432..1c8a307 100644
--- a/services/rag/langchain/requirements.txt
+++ b/services/rag/langchain/requirements.txt
@@ -28,7 +28,10 @@ langgraph==1.0.5
 langgraph-checkpoint==3.0.1
 langgraph-prebuilt==1.0.5
 langgraph-sdk==0.3.1
+langgraph-tools==1.0.20
 langsmith==0.5.2
+langserve==0.3.0
+langchain-openai==0.2.0
 marshmallow==3.26.2
 multidict==6.7.0
 mypy_extensions==1.1.0