removed test retrieval feature. off you go

This commit is contained in:
2026-02-09 21:17:42 +03:00
parent f9c47c772f
commit 2cb9b39bf2
2 changed files with 30 additions and 94 deletions

View File

@@ -1,8 +1,8 @@
import os import os
from pathlib import Path from pathlib import Path
from dotenv import load_dotenv
import click import click
from dotenv import load_dotenv
from loguru import logger from loguru import logger
# Load environment variables # Load environment variables
@@ -85,36 +85,9 @@ def retrieve(query, collection_name, top_k):
"""Retrieve documents from vector database based on a query""" """Retrieve documents from vector database based on a query"""
logger.info(f"Starting retrieval process for query: {query}") logger.info(f"Starting retrieval process for query: {query}")
try: click.echo(
# Import here to avoid circular dependencies "WARNING: Retrieval disabled, since it is no longer relevant for the testing of the retrieving feature. Use chat with agent instead. xoxo"
from retrieval import search_documents_with_metadata )
# Perform retrieval
results = search_documents_with_metadata(
query=query,
collection_name=collection_name,
top_k=top_k
)
if not results:
click.echo("No relevant documents found for the query.")
return
click.echo(f"Found {len(results)} relevant documents:\n")
for i, result in enumerate(results, 1):
click.echo(f"{i}. Source: {result['source']}")
click.echo(f" Filename: {result['filename']}")
click.echo(f" Page: {result['page_number']}")
click.echo(f" File Extension: {result['file_extension']}")
click.echo(f" Content Preview: {result['content'][:200]}...")
click.echo(f" Metadata: {result['metadata']}\n")
logger.info("Retrieval process completed successfully!")
except Exception as e:
logger.error(f"Error during retrieval process: {str(e)}")
click.echo(f"Error: {str(e)}")
@cli.command( @cli.command(
@@ -143,10 +116,7 @@ def chat(collection_name, model):
click.echo("Type 'quit' or 'exit' to end the conversation.\n") click.echo("Type 'quit' or 'exit' to end the conversation.\n")
# Run the interactive chat loop # Run the interactive chat loop
run_chat_loop( run_chat_loop(collection_name=collection_name, llm_model=model)
collection_name=collection_name,
llm_model=model
)
logger.info("Chat session ended") logger.info("Chat session ended")

View File

@@ -2,10 +2,11 @@
import os import os
from typing import List, Optional from typing import List, Optional
from dotenv import load_dotenv from dotenv import load_dotenv
from langchain_core.retrievers import BaseRetriever
from langchain_core.callbacks import CallbackManagerForRetrieverRun from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain_core.documents import Document from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever
from loguru import logger from loguru import logger
from vector_storage import initialize_vector_store from vector_storage import initialize_vector_store
@@ -18,31 +19,31 @@ class VectorStoreRetriever(BaseRetriever):
""" """
A custom retriever that uses the Qdrant vector store to retrieve relevant documents. A custom retriever that uses the Qdrant vector store to retrieve relevant documents.
""" """
vector_store: object # Qdrant vector store instance vector_store: object # Qdrant vector store instance
top_k: int = 5 # Number of documents to retrieve top_k: int = 5 # Number of documents to retrieve
def _get_relevant_documents( def _get_relevant_documents(
self, query: str, *, run_manager: CallbackManagerForRetrieverRun self, query: str, *, run_manager: CallbackManagerForRetrieverRun
) -> List[Document]: ) -> List[Document]:
""" """
Retrieve relevant documents based on the query. Retrieve relevant documents based on the query.
Args: Args:
query: The query string to search for query: The query string to search for
run_manager: Callback manager for the run run_manager: Callback manager for the run
Returns: Returns:
List of relevant documents with metadata List of relevant documents with metadata
""" """
logger.info(f"Searching for documents related to query: {query[:50]}...") logger.info(f"Searching for documents related to query: {query[:50]}...")
try: try:
# Perform similarity search on the vector store # Perform similarity search on the vector store
results = self.vector_store.similarity_search(query, k=self.top_k) results = self.vector_store.similarity_search(query, k=self.top_k)
logger.info(f"Found {len(results)} relevant documents") logger.info(f"Found {len(results)} relevant documents")
return results return results
except Exception as e: except Exception as e:
logger.error(f"Error during similarity search: {str(e)}") logger.error(f"Error during similarity search: {str(e)}")
@@ -52,54 +53,29 @@ class VectorStoreRetriever(BaseRetriever):
def create_retriever(collection_name: str = "documents_langchain", top_k: int = 5): def create_retriever(collection_name: str = "documents_langchain", top_k: int = 5):
""" """
Create and return a retriever instance connected to the vector store. Create and return a retriever instance connected to the vector store.
Args: Args:
collection_name: Name of the Qdrant collection to use collection_name: Name of the Qdrant collection to use
top_k: Number of documents to retrieve top_k: Number of documents to retrieve
Returns: Returns:
VectorStoreRetriever instance VectorStoreRetriever instance
""" """
logger.info(f"Initializing vector store for retrieval from collection: {collection_name}") logger.info(
f"Initializing vector store for retrieval from collection: {collection_name}"
)
# Initialize the vector store # Initialize the vector store
vector_store = initialize_vector_store(collection_name=collection_name) vector_store = initialize_vector_store(collection_name=collection_name)
# Create and return the retriever # Create and return the retriever
retriever = VectorStoreRetriever(vector_store=vector_store, top_k=top_k) retriever = VectorStoreRetriever(vector_store=vector_store, top_k=top_k)
return retriever return retriever
def search_documents(query: str, collection_name: str = "documents_langchain", top_k: int = 5) -> List[Document]:
"""
Search for documents in the vector store based on the query.
Args:
query: The query string to search for
collection_name: Name of the Qdrant collection to use
top_k: Number of documents to retrieve
Returns:
List of documents with metadata
"""
logger.info(f"Starting document search for query: {query}")
# Create the retriever
retriever = create_retriever(collection_name=collection_name, top_k=top_k)
# Perform the search
results = retriever.invoke(query)
logger.info(f"Search completed, returned {len(results)} documents")
return results
def search_documents_with_metadata( def search_documents_with_metadata(
query: str, query: str, collection_name: str = "documents_langchain", top_k: int = 5
collection_name: str = "documents_langchain",
top_k: int = 5
) -> List[dict]: ) -> List[dict]:
""" """
Search for documents and return them with detailed metadata. Search for documents and return them with detailed metadata.
@@ -129,30 +105,20 @@ def search_documents_with_metadata(
"metadata": doc.metadata, "metadata": doc.metadata,
"source": doc.metadata.get("source", "Unknown"), "source": doc.metadata.get("source", "Unknown"),
"filename": doc.metadata.get("filename", "Unknown"), "filename": doc.metadata.get("filename", "Unknown"),
"page_number": doc.metadata.get("page_number", doc.metadata.get("page", "N/A")), "page_number": doc.metadata.get(
"page_number", doc.metadata.get("page", "N/A")
),
"file_extension": doc.metadata.get("file_extension", "N/A"), "file_extension": doc.metadata.get("file_extension", "N/A"),
"file_size": doc.metadata.get("file_size", "N/A") "file_size": doc.metadata.get("file_size", "N/A"),
} }
formatted_results.append(formatted_result) formatted_results.append(formatted_result)
logger.info(f"Metadata search completed, returned {len(formatted_results)} documents") logger.info(
f"Metadata search completed, returned {len(formatted_results)} documents"
)
return formatted_results return formatted_results
except Exception as e: except Exception as e:
logger.error(f"Error during document search with metadata: {str(e)}") logger.error(f"Error during document search with metadata: {str(e)}")
return [] return []
if __name__ == "__main__":
# Example usage
query = "What is the main topic discussed in the documents?"
results = search_documents_with_metadata(query, top_k=5)
print(f"Found {len(results)} documents:")
for i, result in enumerate(results, 1):
print(f"\n{i}. Source: {result['source']}")
print(f" Filename: {result['filename']}")
print(f" Page: {result['page_number']}")
print(f" Content preview: {result['content'][:200]}...")
print(f" Metadata: {result['metadata']}")