Working retrieval with the cli
This commit is contained in:
@@ -62,5 +62,56 @@ def enrich(data_dir, collection_name):
|
||||
click.echo(f"Error: {str(e)}")
|
||||
|
||||
|
||||
@cli.command(
|
||||
name="retrieve",
|
||||
help="Retrieve documents from vector database based on a query",
|
||||
)
|
||||
@click.argument("query")
|
||||
@click.option(
|
||||
"--collection-name",
|
||||
default="documents_langchain",
|
||||
help="Name of the vector store collection",
|
||||
)
|
||||
@click.option(
|
||||
"--top-k",
|
||||
default=5,
|
||||
help="Number of documents to retrieve",
|
||||
)
|
||||
def retrieve(query, collection_name, top_k):
|
||||
"""Retrieve documents from vector database based on a query"""
|
||||
logger.info(f"Starting retrieval process for query: {query}")
|
||||
|
||||
try:
|
||||
# Import here to avoid circular dependencies
|
||||
from retrieval import search_documents_with_metadata
|
||||
|
||||
# Perform retrieval
|
||||
results = search_documents_with_metadata(
|
||||
query=query,
|
||||
collection_name=collection_name,
|
||||
top_k=top_k
|
||||
)
|
||||
|
||||
if not results:
|
||||
click.echo("No relevant documents found for the query.")
|
||||
return
|
||||
|
||||
click.echo(f"Found {len(results)} relevant documents:\n")
|
||||
|
||||
for i, result in enumerate(results, 1):
|
||||
click.echo(f"{i}. Source: {result['source']}")
|
||||
click.echo(f" Filename: {result['filename']}")
|
||||
click.echo(f" Page: {result['page_number']}")
|
||||
click.echo(f" File Extension: {result['file_extension']}")
|
||||
click.echo(f" Content Preview: {result['content'][:200]}...")
|
||||
click.echo(f" Metadata: {result['metadata']}\n")
|
||||
|
||||
logger.info("Retrieval process completed successfully!")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during retrieval process: {str(e)}")
|
||||
click.echo(f"Error: {str(e)}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cli()
|
||||
|
||||
Reference in New Issue
Block a user