139 lines
4.8 KiB
Python
139 lines
4.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
CLI entry point for the RAG solution using LlamaIndex and Qdrant.
|
|
"""
|
|
|
|
import click
|
|
from loguru import logger
|
|
import sys
|
|
from pathlib import Path
|
|
from dotenv import load_dotenv
|
|
|
|
# Load environment variables from .env file
|
|
load_dotenv()
|
|
|
|
|
|
def setup_logging():
|
|
"""Setup logging with loguru to file and stdout."""
|
|
# Create logs directory if it doesn't exist
|
|
logs_dir = Path("logs")
|
|
logs_dir.mkdir(exist_ok=True)
|
|
|
|
# Remove default logger to customize it
|
|
logger.remove()
|
|
|
|
# Add file handler with rotation
|
|
logger.add(
|
|
"logs/dev.log",
|
|
rotation="10 MB",
|
|
retention="10 days",
|
|
level="INFO",
|
|
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {file}:{line} | {message}"
|
|
)
|
|
|
|
# Add stdout handler
|
|
logger.add(
|
|
sys.stdout,
|
|
level="INFO",
|
|
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}",
|
|
colorize=True
|
|
)
|
|
|
|
|
|
@click.group()
|
|
@click.version_option(version='1.0.0')
|
|
def main():
|
|
"""Main CLI entry point for the RAG solution."""
|
|
setup_logging()
|
|
logger.info("Starting RAG solution CLI")
|
|
|
|
|
|
@main.command(help="Basic connectivity test that returns 'pong'")
|
|
@click.option('--verbose', '-v', is_flag=True, help="Enable verbose output")
|
|
def ping(verbose):
|
|
"""Ping command that outputs 'pong'."""
|
|
if verbose:
|
|
logger.info("Executing ping command")
|
|
click.echo("pong")
|
|
logger.info("Ping command completed successfully")
|
|
else:
|
|
click.echo("pong")
|
|
logger.info("Ping command executed")
|
|
|
|
|
|
@main.command(help="Load and process documents from the data folder into vector storage")
|
|
@click.option('--data-path', '-d', default="../../../data", help="Path to the data folder relative to current directory")
|
|
@click.option('--recursive', '-r', default=True, is_flag=True, help="Process subdirectories recursively")
|
|
@click.option('--verbose', '-v', is_flag=True, help="Enable verbose output")
|
|
def enrich(data_path, recursive, verbose):
|
|
"""Load and process documents from the data folder into vector storage."""
|
|
if verbose:
|
|
logger.enable("__main__")
|
|
|
|
logger.info(f"Starting document enrichment from: {data_path}")
|
|
logger.info(f"Recursive processing: {recursive}")
|
|
|
|
try:
|
|
# Import the enrichment module
|
|
from enrichment import enrich_documents, process_documents_from_data_folder
|
|
logger.info("Enrichment module imported successfully")
|
|
|
|
# Call the enrichment function
|
|
process_documents_from_data_folder(data_path=data_path, recursive=recursive)
|
|
|
|
logger.info("Document enrichment completed successfully")
|
|
click.echo("Document enrichment completed successfully")
|
|
except ImportError as e:
|
|
logger.error(f"Failed to import enrichment module: {e}")
|
|
click.echo(f"Error: Could not import enrichment module: {e}")
|
|
except Exception as e:
|
|
logger.error(f"Error during document enrichment: {e}")
|
|
click.echo(f"Error during document enrichment: {e}")
|
|
|
|
|
|
@main.command(help="Retrieve documents from vector storage based on a query")
|
|
@click.argument('query', type=str)
|
|
@click.option('--top-k', '-k', default=5, help="Number of top similar documents to retrieve")
|
|
@click.option('--verbose', '-v', is_flag=True, help="Enable verbose output")
|
|
def retrieve(query, top_k, verbose):
|
|
"""Retrieve documents from vector storage based on a query."""
|
|
if verbose:
|
|
logger.enable("__main__")
|
|
|
|
logger.info(f"Starting document retrieval for query: {query}")
|
|
logger.info(f"Top-K results: {top_k}")
|
|
|
|
try:
|
|
# Import the retrieval module
|
|
from retrieval import retrieve_documents_with_query_engine
|
|
logger.info("Retrieval module imported successfully")
|
|
|
|
# Call the retrieval function
|
|
results = retrieve_documents_with_query_engine(query=query, top_k=top_k)
|
|
|
|
logger.info(f"Retrieved {len(results)} documents for query: {query}")
|
|
|
|
# Display results
|
|
click.echo(f"\nFound {len(results)} results for query: '{query}'\n")
|
|
|
|
for i, result in enumerate(results, 1):
|
|
click.echo(f"Result {i}:")
|
|
click.echo(f" Content preview: {result['content'][:200]}{'...' if len(result['content']) > 200 else ''}")
|
|
click.echo(f" Score: {result['score']}")
|
|
click.echo(f" Metadata:")
|
|
for key, value in result['metadata'].items():
|
|
click.echo(f" {key}: {value}")
|
|
click.echo("")
|
|
|
|
click.echo("Document retrieval completed successfully")
|
|
except ImportError as e:
|
|
logger.error(f"Failed to import retrieval module: {e}")
|
|
click.echo(f"Error: Could not import retrieval module: {e}")
|
|
except Exception as e:
|
|
logger.error(f"Error during document retrieval: {e}")
|
|
click.echo(f"Error during document retrieval: {e}")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|