diff --git a/services/rag/.DS_Store b/services/rag/.DS_Store
new file mode 100644
index 0000000..163a714
Binary files /dev/null and b/services/rag/.DS_Store differ
diff --git a/services/rag/langchain/PLANNING.md b/services/rag/langchain/PLANNING.md
index f8d53bf..9346961 100644
--- a/services/rag/langchain/PLANNING.md
+++ b/services/rag/langchain/PLANNING.md
@@ -46,5 +46,13 @@ Chosen data folder: relatve ./../../../data - from the current folder
# Phase 8 (http endpoint to retrieve data from the vector storage by query)
-- [ ] Create file `server.py`, with web framework fastapi, for example
-- [ ] Add POST endpoint "/api/test-query" which will use agent, and retrieve response for query, sent in JSON format, field "query"
+- [x] Create file `server.py`, with web framework fastapi, for example
+- [x] Add POST endpoint "/api/test-query" which will use agent, and retrieve response for query, sent in JSON format, field "query"
+
+# Phase 9 (simple html web page with chat interface)
+
+- [ ] Create html webpage called demo.html, with simple UI for chat interface. It can be taken with predefined data from codepen or something
+- [ ] Adjust demo.html code, so it would in fact work with the API endpoint, as chat with the agent. API endpoint should be asked beforehand in propmt message.
+- [ ] After accepting API endpont address, it should be used to send requests and process responses to imitate chat with the agent by the provided API endpoint.
+- [ ] Show API endpoint in the header of the chat.
+- [ ] If there is error connecting with the API, imitate bot sending message about error with the connection and suggestion to reload page to provide new API endpoint
diff --git a/services/rag/langchain/QWEN.md b/services/rag/langchain/QWEN.md
index e0d0cff..5e76f96 100644
--- a/services/rag/langchain/QWEN.md
+++ b/services/rag/langchain/QWEN.md
@@ -28,6 +28,7 @@ rag-solution/services/rag/langchain/
├── PLANNING.md # Development roadmap and phases
├── QWEN.md # Current file - project context
├── requirements.txt # Python dependencies
+├── server.py # Web server with API endpoints for the RAG agent
├── vector_storage.py # Vector storage module with Qdrant and Ollama embeddings
└── venv/ # Virtual environment
```
@@ -85,7 +86,11 @@ The project is organized into 8 development phases as outlined in `PLANNING.md`:
- [x] Allow switching between "ollama" and "openai" strategies
### Phase 8: HTTP Endpoint
-- [ ] Create web framework with POST endpoint `/api/test-query` for agent queries
+- [x] Create web framework with POST endpoint `/api/test-query` for agent queries
+- [x] Implement server using FastAPI and LangServe
+- [x] Add request/response validation with Pydantic models
+- [x] Include CORS middleware for cross-origin requests
+- [x] Add health check endpoint
## Environment Configuration
@@ -195,6 +200,15 @@ The project is in early development phase. The virtual environment is set up and
- When strategy is "ollama" (default), uses existing `ChatOllama` implementation
- Updated CLI chat command to show which model strategy is being used
+### Phase 8 Implementation Notes
+- Created `server.py` with FastAPI and integrated with existing agent functionality
+- Implemented `/api/test-query` POST endpoint that accepts JSON with "query" field
+- Added request/response validation using Pydantic models
+- Included CORS middleware to support cross-origin requests
+- Added health check endpoint at root path
+- Server runs on port 8000 by default
+- Supports both Ollama and OpenAI strategies through existing configuration
+
### Issue Fix Notes
- Fixed DocumentRetrievalTool class to properly declare and initialize the retriever field
- Resolved Pydantic field declaration issue that caused "object has no field" error
diff --git a/services/rag/langchain/demo.html b/services/rag/langchain/demo.html
new file mode 100644
index 0000000..13c6ed1
--- /dev/null
+++ b/services/rag/langchain/demo.html
@@ -0,0 +1,293 @@
+
+
+
+
+
+ SimpleChat
+
+
+
+
+
+
+
+
+
+
+
+
+
BOT
+
12:45
+
+
+
+ Hi, welcome to SimpleChat! Go ahead and send me a message. 😄
+
+
+
+
+
+
+
+
+
+
Sajad
+
12:46
+
+
+
You can change your name in JS section!
+
+
+
+
+
+
+
+
diff --git a/services/rag/langchain/server.py b/services/rag/langchain/server.py
new file mode 100644
index 0000000..c284e97
--- /dev/null
+++ b/services/rag/langchain/server.py
@@ -0,0 +1,118 @@
+"""Web server for the RAG solution with LangServe integration."""
+
+import json
+import os
+from contextlib import asynccontextmanager
+from typing import Any, Dict
+
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from loguru import logger
+from pydantic import BaseModel
+
+from agent import chat_with_agent
+
+
+class QueryRequest(BaseModel):
+ """Request model for the query endpoint."""
+
+ query: str
+ collection_name: str = "documents_langchain"
+ # llm_model: str = None
+
+
+class QueryResponse(BaseModel):
+ """Response model for the query endpoint."""
+
+ response: str
+ query: str
+ success: bool
+ error: str = None
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+ """Lifespan event handler for startup and shutdown."""
+ # Startup
+ logger.info("Starting RAG server...")
+ yield
+ # Shutdown
+ logger.info("Shutting down RAG server...")
+
+
+# Create FastAPI app
+app = FastAPI(
+ title="RAG Solution API",
+ description="API for Retrieval-Augmented Generation solution with Langchain",
+ version="1.0.0",
+ lifespan=lifespan,
+)
+
+# Add CORS middleware
+app.add_middleware(
+ CORSMiddleware,
+ allow_origins=["*"], # In production, configure this properly
+ allow_credentials=True,
+ allow_methods=["*"],
+ allow_headers=["*"],
+)
+
+
+@app.post("/api/test-query", response_model=QueryResponse)
+async def test_query(request: QueryRequest) -> QueryResponse:
+ """
+ POST endpoint to query the RAG agent.
+
+ Accepts a JSON payload with a "query" field and returns the agent's response.
+ """
+ logger.info(f"Received query: {request.query}")
+
+ try:
+ # Call the existing chat_with_agent function from agent.py
+ response_data = chat_with_agent(
+ query=request.query,
+ collection_name=request.collection_name,
+ llm_model=request.llm_model,
+ )
+
+ logger.info("Query processed successfully")
+
+ return QueryResponse(
+ response=response_data.get("response", ""),
+ query=request.query,
+ success=response_data.get("success", False),
+ )
+
+ except Exception as e:
+ logger.error(f"Error processing query: {str(e)}")
+ error_msg = f"Error processing query: {str(e)}"
+
+ return QueryResponse(
+ response="I encountered an error while processing your request.",
+ query=request.query,
+ success=False,
+ error=error_msg,
+ )
+
+
+@app.get("/")
+async def root():
+ """Root endpoint for health check."""
+ return {"message": "RAG Solution API is running", "status": "healthy"}
+
+
+if __name__ == "__main__":
+ import uvicorn
+
+ # Configure logging to output to both file and stdout as specified in requirements
+ logs_dir = os.path.join(os.getcwd(), "logs")
+ os.makedirs(logs_dir, exist_ok=True)
+ logger.add("logs/dev.log", rotation="10 MB", retention="10 days")
+
+ # Run the server
+ uvicorn.run(
+ "server:app",
+ host="0.0.0.0",
+ port=8000,
+ reload=True, # Enable auto-reload during development
+ )