evaluation for rag systems update, script for evaluating against questions
This commit is contained in:
@@ -20,6 +20,9 @@ from vector_storage import initialize_vector_store
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
CHAT_REQUEST_TIMEOUT_SECONDS = float(os.getenv("CHAT_REQUEST_TIMEOUT_SECONDS", "45"))
|
||||
CHAT_MAX_RETRIES = int(os.getenv("CHAT_MAX_RETRIES", "0"))
|
||||
|
||||
|
||||
def get_llm_model_info(
|
||||
llm_model: Optional[str] = None,
|
||||
@@ -149,10 +152,12 @@ def create_chat_agent(
|
||||
openai_api_base=base_url_or_api_base,
|
||||
openai_api_key=api_key,
|
||||
temperature=0.1,
|
||||
request_timeout=CHAT_REQUEST_TIMEOUT_SECONDS,
|
||||
max_retries=CHAT_MAX_RETRIES,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Using OpenAI-compatible model: {model_name} via {base_url_or_api_base}"
|
||||
f"Using OpenAI-compatible model: {model_name} via {base_url_or_api_base}, timeout={CHAT_REQUEST_TIMEOUT_SECONDS}s, retries={CHAT_MAX_RETRIES}"
|
||||
)
|
||||
else: # Default to ollama
|
||||
# Initialize the Ollama chat model
|
||||
@@ -160,9 +165,13 @@ def create_chat_agent(
|
||||
model=model_name,
|
||||
base_url=base_url_or_api_base, # Default Ollama URL
|
||||
temperature=0.1,
|
||||
sync_client_kwargs={"timeout": CHAT_REQUEST_TIMEOUT_SECONDS},
|
||||
async_client_kwargs={"timeout": CHAT_REQUEST_TIMEOUT_SECONDS},
|
||||
)
|
||||
|
||||
logger.info(f"Using Ollama model: {model_name}")
|
||||
logger.info(
|
||||
f"Using Ollama model: {model_name}, timeout={CHAT_REQUEST_TIMEOUT_SECONDS}s"
|
||||
)
|
||||
|
||||
# Create the document retrieval tool
|
||||
retrieval_tool = DocumentRetrievalTool()
|
||||
|
||||
@@ -25,6 +25,10 @@ OLLAMA_EMBEDDING_MODEL = os.getenv("OLLAMA_EMBEDDING_MODEL", "nomic-embed-text")
|
||||
OPENAI_EMBEDDING_MODEL = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-ada-002")
|
||||
OPENAI_EMBEDDING_BASE_URL = os.getenv("OPENAI_EMBEDDING_BASE_URL")
|
||||
OPENAI_EMBEDDING_API_KEY = os.getenv("OPENAI_EMBEDDING_API_KEY")
|
||||
EMBEDDING_REQUEST_TIMEOUT_SECONDS = float(
|
||||
os.getenv("EMBEDDING_REQUEST_TIMEOUT_SECONDS", "30")
|
||||
)
|
||||
EMBEDDING_MAX_RETRIES = int(os.getenv("EMBEDDING_MAX_RETRIES", "0"))
|
||||
|
||||
|
||||
def initialize_vector_store(
|
||||
@@ -53,6 +57,8 @@ def initialize_vector_store(
|
||||
model=OPENAI_EMBEDDING_MODEL,
|
||||
openai_api_base=OPENAI_EMBEDDING_BASE_URL,
|
||||
openai_api_key=OPENAI_EMBEDDING_API_KEY,
|
||||
request_timeout=EMBEDDING_REQUEST_TIMEOUT_SECONDS,
|
||||
max_retries=EMBEDDING_MAX_RETRIES,
|
||||
)
|
||||
elif EMBEDDING_STRATEGY == "none":
|
||||
embeddings = None
|
||||
@@ -63,6 +69,8 @@ def initialize_vector_store(
|
||||
embeddings = OllamaEmbeddings(
|
||||
model=OLLAMA_EMBEDDING_MODEL,
|
||||
base_url="http://localhost:11434", # Default Ollama URL
|
||||
sync_client_kwargs={"timeout": EMBEDDING_REQUEST_TIMEOUT_SECONDS},
|
||||
async_client_kwargs={"timeout": EMBEDDING_REQUEST_TIMEOUT_SECONDS},
|
||||
)
|
||||
|
||||
# Check if collection exists and create if needed
|
||||
|
||||
Reference in New Issue
Block a user