preparations for demo html page
This commit is contained in:
BIN
services/rag/.DS_Store
vendored
Normal file
BIN
services/rag/.DS_Store
vendored
Normal file
Binary file not shown.
@@ -46,5 +46,13 @@ Chosen data folder: relatve ./../../../data - from the current folder
|
||||
|
||||
# Phase 8 (http endpoint to retrieve data from the vector storage by query)
|
||||
|
||||
- [ ] Create file `server.py`, with web framework fastapi, for example
|
||||
- [ ] Add POST endpoint "/api/test-query" which will use agent, and retrieve response for query, sent in JSON format, field "query"
|
||||
- [x] Create file `server.py`, with web framework fastapi, for example
|
||||
- [x] Add POST endpoint "/api/test-query" which will use agent, and retrieve response for query, sent in JSON format, field "query"
|
||||
|
||||
# Phase 9 (simple html web page with chat interface)
|
||||
|
||||
- [ ] Create html webpage called demo.html, with simple UI for chat interface. It can be taken with predefined data from codepen or something
|
||||
- [ ] Adjust demo.html code, so it would in fact work with the API endpoint, as chat with the agent. API endpoint should be asked beforehand in propmt message.
|
||||
- [ ] After accepting API endpont address, it should be used to send requests and process responses to imitate chat with the agent by the provided API endpoint.
|
||||
- [ ] Show API endpoint in the header of the chat.
|
||||
- [ ] If there is error connecting with the API, imitate bot sending message about error with the connection and suggestion to reload page to provide new API endpoint
|
||||
|
||||
@@ -28,6 +28,7 @@ rag-solution/services/rag/langchain/
|
||||
├── PLANNING.md # Development roadmap and phases
|
||||
├── QWEN.md # Current file - project context
|
||||
├── requirements.txt # Python dependencies
|
||||
├── server.py # Web server with API endpoints for the RAG agent
|
||||
├── vector_storage.py # Vector storage module with Qdrant and Ollama embeddings
|
||||
└── venv/ # Virtual environment
|
||||
```
|
||||
@@ -85,7 +86,11 @@ The project is organized into 8 development phases as outlined in `PLANNING.md`:
|
||||
- [x] Allow switching between "ollama" and "openai" strategies
|
||||
|
||||
### Phase 8: HTTP Endpoint
|
||||
- [ ] Create web framework with POST endpoint `/api/test-query` for agent queries
|
||||
- [x] Create web framework with POST endpoint `/api/test-query` for agent queries
|
||||
- [x] Implement server using FastAPI and LangServe
|
||||
- [x] Add request/response validation with Pydantic models
|
||||
- [x] Include CORS middleware for cross-origin requests
|
||||
- [x] Add health check endpoint
|
||||
|
||||
## Environment Configuration
|
||||
|
||||
@@ -195,6 +200,15 @@ The project is in early development phase. The virtual environment is set up and
|
||||
- When strategy is "ollama" (default), uses existing `ChatOllama` implementation
|
||||
- Updated CLI chat command to show which model strategy is being used
|
||||
|
||||
### Phase 8 Implementation Notes
|
||||
- Created `server.py` with FastAPI and integrated with existing agent functionality
|
||||
- Implemented `/api/test-query` POST endpoint that accepts JSON with "query" field
|
||||
- Added request/response validation using Pydantic models
|
||||
- Included CORS middleware to support cross-origin requests
|
||||
- Added health check endpoint at root path
|
||||
- Server runs on port 8000 by default
|
||||
- Supports both Ollama and OpenAI strategies through existing configuration
|
||||
|
||||
### Issue Fix Notes
|
||||
- Fixed DocumentRetrievalTool class to properly declare and initialize the retriever field
|
||||
- Resolved Pydantic field declaration issue that caused "object has no field" error
|
||||
|
||||
293
services/rag/langchain/demo.html
Normal file
293
services/rag/langchain/demo.html
Normal file
File diff suppressed because one or more lines are too long
118
services/rag/langchain/server.py
Normal file
118
services/rag/langchain/server.py
Normal file
@@ -0,0 +1,118 @@
|
||||
"""Web server for the RAG solution with LangServe integration."""
|
||||
|
||||
import json
|
||||
import os
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import Any, Dict
|
||||
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel
|
||||
|
||||
from agent import chat_with_agent
|
||||
|
||||
|
||||
class QueryRequest(BaseModel):
|
||||
"""Request model for the query endpoint."""
|
||||
|
||||
query: str
|
||||
collection_name: str = "documents_langchain"
|
||||
# llm_model: str = None
|
||||
|
||||
|
||||
class QueryResponse(BaseModel):
|
||||
"""Response model for the query endpoint."""
|
||||
|
||||
response: str
|
||||
query: str
|
||||
success: bool
|
||||
error: str = None
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Lifespan event handler for startup and shutdown."""
|
||||
# Startup
|
||||
logger.info("Starting RAG server...")
|
||||
yield
|
||||
# Shutdown
|
||||
logger.info("Shutting down RAG server...")
|
||||
|
||||
|
||||
# Create FastAPI app
|
||||
app = FastAPI(
|
||||
title="RAG Solution API",
|
||||
description="API for Retrieval-Augmented Generation solution with Langchain",
|
||||
version="1.0.0",
|
||||
lifespan=lifespan,
|
||||
)
|
||||
|
||||
# Add CORS middleware
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"], # In production, configure this properly
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
|
||||
@app.post("/api/test-query", response_model=QueryResponse)
|
||||
async def test_query(request: QueryRequest) -> QueryResponse:
|
||||
"""
|
||||
POST endpoint to query the RAG agent.
|
||||
|
||||
Accepts a JSON payload with a "query" field and returns the agent's response.
|
||||
"""
|
||||
logger.info(f"Received query: {request.query}")
|
||||
|
||||
try:
|
||||
# Call the existing chat_with_agent function from agent.py
|
||||
response_data = chat_with_agent(
|
||||
query=request.query,
|
||||
collection_name=request.collection_name,
|
||||
llm_model=request.llm_model,
|
||||
)
|
||||
|
||||
logger.info("Query processed successfully")
|
||||
|
||||
return QueryResponse(
|
||||
response=response_data.get("response", ""),
|
||||
query=request.query,
|
||||
success=response_data.get("success", False),
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing query: {str(e)}")
|
||||
error_msg = f"Error processing query: {str(e)}"
|
||||
|
||||
return QueryResponse(
|
||||
response="I encountered an error while processing your request.",
|
||||
query=request.query,
|
||||
success=False,
|
||||
error=error_msg,
|
||||
)
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
"""Root endpoint for health check."""
|
||||
return {"message": "RAG Solution API is running", "status": "healthy"}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
|
||||
# Configure logging to output to both file and stdout as specified in requirements
|
||||
logs_dir = os.path.join(os.getcwd(), "logs")
|
||||
os.makedirs(logs_dir, exist_ok=True)
|
||||
logger.add("logs/dev.log", rotation="10 MB", retention="10 days")
|
||||
|
||||
# Run the server
|
||||
uvicorn.run(
|
||||
"server:app",
|
||||
host="0.0.0.0",
|
||||
port=8000,
|
||||
reload=True, # Enable auto-reload during development
|
||||
)
|
||||
Reference in New Issue
Block a user