evaluation for rag systems update, script for evaluating against questions

2026-03-13 08:20:18 +03:00
parent 6c953a327f
commit 236f44b2c3
7 changed files with 562 additions and 28947 deletions
--- a/services/rag/langchain/agent.py
+++ b/services/rag/langchain/agent.py
@@ -20,6 +20,9 @@ from vector_storage import initialize_vector_store
 # Load environment variables
 load_dotenv()

+CHAT_REQUEST_TIMEOUT_SECONDS = float(os.getenv("CHAT_REQUEST_TIMEOUT_SECONDS", "45"))
+CHAT_MAX_RETRIES = int(os.getenv("CHAT_MAX_RETRIES", "0"))
+

 def get_llm_model_info(
    llm_model: Optional[str] = None,
@@ -149,10 +152,12 @@ def create_chat_agent(
            openai_api_base=base_url_or_api_base,
            openai_api_key=api_key,
            temperature=0.1,
+            request_timeout=CHAT_REQUEST_TIMEOUT_SECONDS,
+            max_retries=CHAT_MAX_RETRIES,
        )

        logger.info(
-            f"Using OpenAI-compatible model: {model_name} via {base_url_or_api_base}"
+            f"Using OpenAI-compatible model: {model_name} via {base_url_or_api_base}, timeout={CHAT_REQUEST_TIMEOUT_SECONDS}s, retries={CHAT_MAX_RETRIES}"
        )
    else:  # Default to ollama
        # Initialize the Ollama chat model
@@ -160,9 +165,13 @@ def create_chat_agent(
            model=model_name,
            base_url=base_url_or_api_base,  # Default Ollama URL
            temperature=0.1,
+            sync_client_kwargs={"timeout": CHAT_REQUEST_TIMEOUT_SECONDS},
+            async_client_kwargs={"timeout": CHAT_REQUEST_TIMEOUT_SECONDS},
        )

-        logger.info(f"Using Ollama model: {model_name}")
+        logger.info(
+            f"Using Ollama model: {model_name}, timeout={CHAT_REQUEST_TIMEOUT_SECONDS}s"
+        )

    # Create the document retrieval tool
    retrieval_tool = DocumentRetrievalTool()