Working retrieval with the cli

This commit is contained in:
2026-02-03 23:25:24 +03:00
parent 4cbd5313d2
commit 299ee0acb5
5 changed files with 274 additions and 31 deletions

View File

@@ -62,5 +62,56 @@ def enrich(data_dir, collection_name):
click.echo(f"Error: {str(e)}")
@cli.command(
name="retrieve",
help="Retrieve documents from vector database based on a query",
)
@click.argument("query")
@click.option(
"--collection-name",
default="documents_langchain",
help="Name of the vector store collection",
)
@click.option(
"--top-k",
default=5,
help="Number of documents to retrieve",
)
def retrieve(query, collection_name, top_k):
"""Retrieve documents from vector database based on a query"""
logger.info(f"Starting retrieval process for query: {query}")
try:
# Import here to avoid circular dependencies
from retrieval import search_documents_with_metadata
# Perform retrieval
results = search_documents_with_metadata(
query=query,
collection_name=collection_name,
top_k=top_k
)
if not results:
click.echo("No relevant documents found for the query.")
return
click.echo(f"Found {len(results)} relevant documents:\n")
for i, result in enumerate(results, 1):
click.echo(f"{i}. Source: {result['source']}")
click.echo(f" Filename: {result['filename']}")
click.echo(f" Page: {result['page_number']}")
click.echo(f" File Extension: {result['file_extension']}")
click.echo(f" Content Preview: {result['content'][:200]}...")
click.echo(f" Metadata: {result['metadata']}\n")
logger.info("Retrieval process completed successfully!")
except Exception as e:
logger.error(f"Error during retrieval process: {str(e)}")
click.echo(f"Error: {str(e)}")
if __name__ == "__main__":
cli()