Working demo.html with connection to the api endpoint

2026-02-04 23:13:00 +03:00
parent 9188b672c2
commit 8c57921b7f
5 changed files with 430 additions and 304 deletions
--- a/services/rag/langchain/PLANNING.md
+++ b/services/rag/langchain/PLANNING.md
@@ -51,8 +51,8 @@ Chosen data folder: relatve ./../../../data - from the current folder
 # Phase 9 (simple html web page with chat interface)
- [ ] Create html webpage called demo.html, with simple UI for chat interface. It can be taken with predefined data from codepen or something
+- [x] Create html webpage called demo.html, with simple UI for chat interface. It can be taken with predefined data from codepen or something
- [ ] Adjust demo.html code, so it would in fact work with the API endpoint, as chat with the agent. API endpoint should be asked beforehand in propmt message.
+- [x] Adjust demo.html code, so it would in fact work with the API endpoint, as chat with the agent. API endpoint should be asked beforehand in propmt message.
- [ ] After accepting API endpont address, it should be used to send requests and process responses to imitate chat with the agent by the provided API endpoint.
+- [x] After accepting API endpont address, it should be used to send requests and process responses to imitate chat with the agent by the provided API endpoint.
- [ ] Show API endpoint in the header of the chat.
+- [x] Show API endpoint in the header of the chat.
- [ ] If there is error connecting with the API, imitate bot sending message about error with the connection and suggestion to reload page to provide new API endpoint
+- [x] If there is error connecting with the API, imitate bot sending message about error with the connection and suggestion to reload page to provide new API endpoint
--- a/services/rag/langchain/QWEN.md
+++ b/services/rag/langchain/QWEN.md
@@ -23,6 +23,7 @@ rag-solution/services/rag/langchain/
 ├── .gitignore         # Git ignore rules
 ├── app.py             # Main application file (currently empty)
 ├── cli.py             # CLI entrypoint with click library
 ├── demo.html          # HTML demo with chat interface for the RAG agent
 ├── EXTENSIONS.md      # Supported file extensions and LangChain loaders
 ├── enrichment.py      # Document enrichment module for loading documents to vector storage
 ├── PLANNING.md        # Development roadmap and phases
@@ -92,6 +93,13 @@ The project is organized into 8 development phases as outlined in `PLANNING.md`:
 - [x] Include CORS middleware for cross-origin requests
 - [x] Add health check endpoint
 ### Phase 9: HTML Chat Interface
 - [x] Create HTML webpage called `demo.html` with simple UI for chat interface
 - [x] Implement functionality to connect to the API endpoint
 - [x] Add ability to configure API endpoint in the UI
 - [x] Display conversation history with visual distinction between user and bot messages
 - [x] Add error handling for API connection issues
 ## Environment Configuration
 The project uses environment variables for configuration:
@@ -209,6 +217,15 @@ The project is in early development phase. The virtual environment is set up and
 - Server runs on port 8000 by default
 - Supports both Ollama and OpenAI strategies through existing configuration
 ### Phase 9 Implementation Notes
 - Created `demo.html` with a responsive chat interface
 - Implemented API endpoint configuration in the UI
 - Added functionality to send messages to the RAG agent via the API
 - Included proper error handling for API connection issues
 - Added typing indicators during API requests
 - Implemented responsive design for different screen sizes
 - Added visual distinction between user and bot messages
 ### Issue Fix Notes
 - Fixed DocumentRetrievalTool class to properly declare and initialize the retriever field
 - Resolved Pydantic field declaration issue that caused "object has no field" error
--- a/services/rag/langchain/agent.py
+++ b/services/rag/langchain/agent.py
@@ -1,7 +1,7 @@
 """Agent module for the RAG solution with Ollama-powered chat agent."""
 import os
-from typing import List, Dict, Any, Optional
+from typing import List, Dict, Any, Optional, Tuple
 from langchain_core.tools import BaseTool, tool
 from langchain_core.runnables import RunnableConfig
 from langchain_core.messages import HumanMessage, AIMessage, BaseMessage
@@ -16,6 +16,41 @@ from retrieval import create_retriever
 from vector_storage import initialize_vector_store
 def get_llm_model_info(llm_model: str = None) -> Tuple[str, str, str, str, str]:
    """
    Get LLM model information based on environment configuration.
    Args:
        llm_model: Name of the model to use (defaults to environment variable based on strategy)
    Returns:
        Tuple containing (strategy, model_name, base_url_or_api_base, api_key, model_type)
    """
    # Determine which model strategy to use
    chat_model_strategy = os.getenv("CHAT_MODEL_STRATEGY", "ollama").lower()
    if chat_model_strategy == "openai":
        # Use OpenAI-compatible API
        openai_chat_url = os.getenv("OPENAI_CHAT_URL")
        openai_chat_key = os.getenv("OPENAI_CHAT_KEY")
        if not openai_chat_url or not openai_chat_key:
            raise ValueError("OPENAI_CHAT_URL and OPENAI_CHAT_KEY must be set when using OpenAI strategy")
        # Get the model name from environment if not provided
        if llm_model is None:
            llm_model = os.getenv("OPENAI_CHAT_MODEL", "gpt-3.5-turbo")  # Default to a common model
        return chat_model_strategy, llm_model, openai_chat_url, openai_chat_key, "ChatOpenAI"
    else:  # Default to ollama
        # Use Ollama
        # Get the model name from environment if not provided
        if llm_model is None:
            llm_model = os.getenv("OLLAMA_CHAT_MODEL", "llama3.1")
        return chat_model_strategy, llm_model, "http://localhost:11434", "", "ChatOllama"
 class DocumentRetrievalTool(BaseTool):
    """Tool for retrieving documents from the vector store based on a query."""
@@ -80,44 +115,28 @@ def create_chat_agent(
    """
    logger.info("Creating chat agent with document retrieval capabilities")
-    # Determine which model strategy to use
+    # Get model information using the utility function
-    chat_model_strategy = os.getenv("CHAT_MODEL_STRATEGY", "ollama").lower()
+    strategy, model_name, base_url_or_api_base, api_key, model_type = get_llm_model_info(llm_model)
    if chat_model_strategy == "openai":
        # Use OpenAI-compatible API
        openai_chat_url = os.getenv("OPENAI_CHAT_URL")
        openai_chat_key = os.getenv("OPENAI_CHAT_KEY")
        if not openai_chat_url or not openai_chat_key:
            raise ValueError("OPENAI_CHAT_URL and OPENAI_CHAT_KEY must be set when using OpenAI strategy")
        # Get the model name from environment if not provided
        if llm_model is None:
            llm_model = os.getenv("OPENAI_CHAT_MODEL", "gpt-3.5-turbo")  # Default to a common model
    if strategy == "openai":
        # Initialize the OpenAI-compatible chat model
        llm = ChatOpenAI(
-            model=llm_model,
+            model=model_name,
-            openai_api_base=openai_chat_url,
+            openai_api_base=base_url_or_api_base,
-            openai_api_key=openai_chat_key,
+            openai_api_key=api_key,
            temperature=0.1,
        )
-        logger.info(f"Using OpenAI-compatible model: {llm_model} via {openai_chat_url}")
+        logger.info(f"Using OpenAI-compatible model: {model_name} via {base_url_or_api_base}")
    else:  # Default to ollama
        # Use Ollama
        # Get the model name from environment if not provided
        if llm_model is None:
            llm_model = os.getenv("OLLAMA_CHAT_MODEL", "llama3.1")
        # Initialize the Ollama chat model
        llm = ChatOllama(
-            model=llm_model,
+            model=model_name,
-            base_url="http://localhost:11434",  # Default Ollama URL
+            base_url=base_url_or_api_base,  # Default Ollama URL
            temperature=0.1,
        )
-        logger.info(f"Using Ollama model: {llm_model}")
+        logger.info(f"Using Ollama model: {model_name}")
    # Create the document retrieval tool
    retrieval_tool = DocumentRetrievalTool()
@@ -224,14 +243,13 @@ def run_chat_loop(
    """
    logger.info("Starting interactive chat loop")
-    # Determine which model strategy is being used and inform the user
+    # Get model information using the utility function
-    chat_model_strategy = os.getenv("CHAT_MODEL_STRATEGY", "ollama").lower()
+    strategy, model_name, _, _, _ = get_llm_model_info(llm_model)
-    if chat_model_strategy == "openai":
+
-        model_info = os.getenv("OPENAI_CHAT_MODEL", "gpt-3.5-turbo")
+    if strategy == "openai":
-        print(f"Chat Agent initialized with OpenAI-compatible model: {model_info}")
+        print(f"Chat Agent initialized with OpenAI-compatible model: {model_name}")
    else:
-        model_info = os.getenv("OLLAMA_CHAT_MODEL", "llama3.1")
+        print(f"Chat Agent initialized with Ollama model: {model_name}")
        print(f"Chat Agent initialized with Ollama model: {model_info}")
    print("Type 'quit' or 'exit' to end the conversation.\n")
--- a/services/rag/langchain/demo.html
+++ b/services/rag/langchain/demo.html
--- a/services/rag/langchain/server.py
+++ b/services/rag/langchain/server.py
@@ -10,7 +10,7 @@ from fastapi.middleware.cors import CORSMiddleware
 from loguru import logger
 from pydantic import BaseModel
-from agent import chat_with_agent
+from agent import chat_with_agent, get_llm_model_info
 class QueryRequest(BaseModel):
@@ -18,7 +18,7 @@ class QueryRequest(BaseModel):
    query: str
    collection_name: str = "documents_langchain"
-    # llm_model: str = None
+    llm_model: str = None
 class QueryResponse(BaseModel):
@@ -113,6 +113,6 @@ if __name__ == "__main__":
    uvicorn.run(
        "server:app",
        host="0.0.0.0",
-        port=8000,
+        port=8331,
        reload=True,  # Enable auto-reload during development
    )