From 2cb9b39bf2806d8e776f1de9aacb8eadeea4eae5 Mon Sep 17 00:00:00 2001 From: idchlife Date: Mon, 9 Feb 2026 21:17:42 +0300 Subject: [PATCH] removed test retrieval feature. off you go --- services/rag/langchain/cli.py | 40 ++------------ services/rag/langchain/retrieval.py | 84 +++++++++-------------------- 2 files changed, 30 insertions(+), 94 deletions(-) diff --git a/services/rag/langchain/cli.py b/services/rag/langchain/cli.py index 5017ee4..f732c4e 100644 --- a/services/rag/langchain/cli.py +++ b/services/rag/langchain/cli.py @@ -1,8 +1,8 @@ import os from pathlib import Path -from dotenv import load_dotenv import click +from dotenv import load_dotenv from loguru import logger # Load environment variables @@ -85,36 +85,9 @@ def retrieve(query, collection_name, top_k): """Retrieve documents from vector database based on a query""" logger.info(f"Starting retrieval process for query: {query}") - try: - # Import here to avoid circular dependencies - from retrieval import search_documents_with_metadata - - # Perform retrieval - results = search_documents_with_metadata( - query=query, - collection_name=collection_name, - top_k=top_k - ) - - if not results: - click.echo("No relevant documents found for the query.") - return - - click.echo(f"Found {len(results)} relevant documents:\n") - - for i, result in enumerate(results, 1): - click.echo(f"{i}. Source: {result['source']}") - click.echo(f" Filename: {result['filename']}") - click.echo(f" Page: {result['page_number']}") - click.echo(f" File Extension: {result['file_extension']}") - click.echo(f" Content Preview: {result['content'][:200]}...") - click.echo(f" Metadata: {result['metadata']}\n") - - logger.info("Retrieval process completed successfully!") - - except Exception as e: - logger.error(f"Error during retrieval process: {str(e)}") - click.echo(f"Error: {str(e)}") + click.echo( + "WARNING: Retrieval disabled, since it is no longer relevant for the testing of the retrieving feature. Use chat with agent instead. xoxo" + ) @cli.command( @@ -143,10 +116,7 @@ def chat(collection_name, model): click.echo("Type 'quit' or 'exit' to end the conversation.\n") # Run the interactive chat loop - run_chat_loop( - collection_name=collection_name, - llm_model=model - ) + run_chat_loop(collection_name=collection_name, llm_model=model) logger.info("Chat session ended") diff --git a/services/rag/langchain/retrieval.py b/services/rag/langchain/retrieval.py index 8ef228b..5d2b16e 100644 --- a/services/rag/langchain/retrieval.py +++ b/services/rag/langchain/retrieval.py @@ -2,10 +2,11 @@ import os from typing import List, Optional + from dotenv import load_dotenv -from langchain_core.retrievers import BaseRetriever from langchain_core.callbacks import CallbackManagerForRetrieverRun from langchain_core.documents import Document +from langchain_core.retrievers import BaseRetriever from loguru import logger from vector_storage import initialize_vector_store @@ -18,31 +19,31 @@ class VectorStoreRetriever(BaseRetriever): """ A custom retriever that uses the Qdrant vector store to retrieve relevant documents. """ - + vector_store: object # Qdrant vector store instance top_k: int = 5 # Number of documents to retrieve - + def _get_relevant_documents( self, query: str, *, run_manager: CallbackManagerForRetrieverRun ) -> List[Document]: """ Retrieve relevant documents based on the query. - + Args: query: The query string to search for run_manager: Callback manager for the run - + Returns: List of relevant documents with metadata """ logger.info(f"Searching for documents related to query: {query[:50]}...") - + try: # Perform similarity search on the vector store results = self.vector_store.similarity_search(query, k=self.top_k) - + logger.info(f"Found {len(results)} relevant documents") - + return results except Exception as e: logger.error(f"Error during similarity search: {str(e)}") @@ -52,54 +53,29 @@ class VectorStoreRetriever(BaseRetriever): def create_retriever(collection_name: str = "documents_langchain", top_k: int = 5): """ Create and return a retriever instance connected to the vector store. - + Args: collection_name: Name of the Qdrant collection to use top_k: Number of documents to retrieve - + Returns: VectorStoreRetriever instance """ - logger.info(f"Initializing vector store for retrieval from collection: {collection_name}") - + logger.info( + f"Initializing vector store for retrieval from collection: {collection_name}" + ) + # Initialize the vector store vector_store = initialize_vector_store(collection_name=collection_name) - + # Create and return the retriever retriever = VectorStoreRetriever(vector_store=vector_store, top_k=top_k) - + return retriever -def search_documents(query: str, collection_name: str = "documents_langchain", top_k: int = 5) -> List[Document]: - """ - Search for documents in the vector store based on the query. - - Args: - query: The query string to search for - collection_name: Name of the Qdrant collection to use - top_k: Number of documents to retrieve - - Returns: - List of documents with metadata - """ - logger.info(f"Starting document search for query: {query}") - - # Create the retriever - retriever = create_retriever(collection_name=collection_name, top_k=top_k) - - # Perform the search - results = retriever.invoke(query) - - logger.info(f"Search completed, returned {len(results)} documents") - - return results - - def search_documents_with_metadata( - query: str, - collection_name: str = "documents_langchain", - top_k: int = 5 + query: str, collection_name: str = "documents_langchain", top_k: int = 5 ) -> List[dict]: """ Search for documents and return them with detailed metadata. @@ -129,30 +105,20 @@ def search_documents_with_metadata( "metadata": doc.metadata, "source": doc.metadata.get("source", "Unknown"), "filename": doc.metadata.get("filename", "Unknown"), - "page_number": doc.metadata.get("page_number", doc.metadata.get("page", "N/A")), + "page_number": doc.metadata.get( + "page_number", doc.metadata.get("page", "N/A") + ), "file_extension": doc.metadata.get("file_extension", "N/A"), - "file_size": doc.metadata.get("file_size", "N/A") + "file_size": doc.metadata.get("file_size", "N/A"), } formatted_results.append(formatted_result) - logger.info(f"Metadata search completed, returned {len(formatted_results)} documents") + logger.info( + f"Metadata search completed, returned {len(formatted_results)} documents" + ) return formatted_results except Exception as e: logger.error(f"Error during document search with metadata: {str(e)}") return [] - - -if __name__ == "__main__": - # Example usage - query = "What is the main topic discussed in the documents?" - results = search_documents_with_metadata(query, top_k=5) - - print(f"Found {len(results)} documents:") - for i, result in enumerate(results, 1): - print(f"\n{i}. Source: {result['source']}") - print(f" Filename: {result['filename']}") - print(f" Page: {result['page_number']}") - print(f" Content preview: {result['content'][:200]}...") - print(f" Metadata: {result['metadata']}") \ No newline at end of file