diff --git a/services/rag/.DS_Store b/services/rag/.DS_Store new file mode 100644 index 0000000..163a714 Binary files /dev/null and b/services/rag/.DS_Store differ diff --git a/services/rag/langchain/PLANNING.md b/services/rag/langchain/PLANNING.md index f8d53bf..9346961 100644 --- a/services/rag/langchain/PLANNING.md +++ b/services/rag/langchain/PLANNING.md @@ -46,5 +46,13 @@ Chosen data folder: relatve ./../../../data - from the current folder # Phase 8 (http endpoint to retrieve data from the vector storage by query) -- [ ] Create file `server.py`, with web framework fastapi, for example -- [ ] Add POST endpoint "/api/test-query" which will use agent, and retrieve response for query, sent in JSON format, field "query" +- [x] Create file `server.py`, with web framework fastapi, for example +- [x] Add POST endpoint "/api/test-query" which will use agent, and retrieve response for query, sent in JSON format, field "query" + +# Phase 9 (simple html web page with chat interface) + +- [ ] Create html webpage called demo.html, with simple UI for chat interface. It can be taken with predefined data from codepen or something +- [ ] Adjust demo.html code, so it would in fact work with the API endpoint, as chat with the agent. API endpoint should be asked beforehand in propmt message. +- [ ] After accepting API endpont address, it should be used to send requests and process responses to imitate chat with the agent by the provided API endpoint. +- [ ] Show API endpoint in the header of the chat. +- [ ] If there is error connecting with the API, imitate bot sending message about error with the connection and suggestion to reload page to provide new API endpoint diff --git a/services/rag/langchain/QWEN.md b/services/rag/langchain/QWEN.md index e0d0cff..5e76f96 100644 --- a/services/rag/langchain/QWEN.md +++ b/services/rag/langchain/QWEN.md @@ -28,6 +28,7 @@ rag-solution/services/rag/langchain/ ├── PLANNING.md # Development roadmap and phases ├── QWEN.md # Current file - project context ├── requirements.txt # Python dependencies +├── server.py # Web server with API endpoints for the RAG agent ├── vector_storage.py # Vector storage module with Qdrant and Ollama embeddings └── venv/ # Virtual environment ``` @@ -85,7 +86,11 @@ The project is organized into 8 development phases as outlined in `PLANNING.md`: - [x] Allow switching between "ollama" and "openai" strategies ### Phase 8: HTTP Endpoint -- [ ] Create web framework with POST endpoint `/api/test-query` for agent queries +- [x] Create web framework with POST endpoint `/api/test-query` for agent queries +- [x] Implement server using FastAPI and LangServe +- [x] Add request/response validation with Pydantic models +- [x] Include CORS middleware for cross-origin requests +- [x] Add health check endpoint ## Environment Configuration @@ -195,6 +200,15 @@ The project is in early development phase. The virtual environment is set up and - When strategy is "ollama" (default), uses existing `ChatOllama` implementation - Updated CLI chat command to show which model strategy is being used +### Phase 8 Implementation Notes +- Created `server.py` with FastAPI and integrated with existing agent functionality +- Implemented `/api/test-query` POST endpoint that accepts JSON with "query" field +- Added request/response validation using Pydantic models +- Included CORS middleware to support cross-origin requests +- Added health check endpoint at root path +- Server runs on port 8000 by default +- Supports both Ollama and OpenAI strategies through existing configuration + ### Issue Fix Notes - Fixed DocumentRetrievalTool class to properly declare and initialize the retriever field - Resolved Pydantic field declaration issue that caused "object has no field" error diff --git a/services/rag/langchain/demo.html b/services/rag/langchain/demo.html new file mode 100644 index 0000000..13c6ed1 --- /dev/null +++ b/services/rag/langchain/demo.html @@ -0,0 +1,293 @@ + + +
+
+
+ SimpleChat +
+
+ +
+
+ +
+
+
+ +
+
+
BOT
+
12:45
+
+ +
+ Hi, welcome to SimpleChat! Go ahead and send me a message. 😄 +
+
+
+ +
+
+ +
+
+
Sajad
+
12:46
+
+ +
You can change your name in JS section!
+
+
+
+ +
+ + +
+
+ + diff --git a/services/rag/langchain/server.py b/services/rag/langchain/server.py new file mode 100644 index 0000000..c284e97 --- /dev/null +++ b/services/rag/langchain/server.py @@ -0,0 +1,118 @@ +"""Web server for the RAG solution with LangServe integration.""" + +import json +import os +from contextlib import asynccontextmanager +from typing import Any, Dict + +from fastapi import FastAPI, HTTPException +from fastapi.middleware.cors import CORSMiddleware +from loguru import logger +from pydantic import BaseModel + +from agent import chat_with_agent + + +class QueryRequest(BaseModel): + """Request model for the query endpoint.""" + + query: str + collection_name: str = "documents_langchain" + # llm_model: str = None + + +class QueryResponse(BaseModel): + """Response model for the query endpoint.""" + + response: str + query: str + success: bool + error: str = None + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """Lifespan event handler for startup and shutdown.""" + # Startup + logger.info("Starting RAG server...") + yield + # Shutdown + logger.info("Shutting down RAG server...") + + +# Create FastAPI app +app = FastAPI( + title="RAG Solution API", + description="API for Retrieval-Augmented Generation solution with Langchain", + version="1.0.0", + lifespan=lifespan, +) + +# Add CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], # In production, configure this properly + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +@app.post("/api/test-query", response_model=QueryResponse) +async def test_query(request: QueryRequest) -> QueryResponse: + """ + POST endpoint to query the RAG agent. + + Accepts a JSON payload with a "query" field and returns the agent's response. + """ + logger.info(f"Received query: {request.query}") + + try: + # Call the existing chat_with_agent function from agent.py + response_data = chat_with_agent( + query=request.query, + collection_name=request.collection_name, + llm_model=request.llm_model, + ) + + logger.info("Query processed successfully") + + return QueryResponse( + response=response_data.get("response", ""), + query=request.query, + success=response_data.get("success", False), + ) + + except Exception as e: + logger.error(f"Error processing query: {str(e)}") + error_msg = f"Error processing query: {str(e)}" + + return QueryResponse( + response="I encountered an error while processing your request.", + query=request.query, + success=False, + error=error_msg, + ) + + +@app.get("/") +async def root(): + """Root endpoint for health check.""" + return {"message": "RAG Solution API is running", "status": "healthy"} + + +if __name__ == "__main__": + import uvicorn + + # Configure logging to output to both file and stdout as specified in requirements + logs_dir = os.path.join(os.getcwd(), "logs") + os.makedirs(logs_dir, exist_ok=True) + logger.add("logs/dev.log", rotation="10 MB", retention="10 days") + + # Run the server + uvicorn.run( + "server:app", + host="0.0.0.0", + port=8000, + reload=True, # Enable auto-reload during development + )