#!/usr/bin/env python3 """ CLI entry point for the RAG solution using LlamaIndex and Qdrant. """ import click from loguru import logger import sys from pathlib import Path from dotenv import load_dotenv # Load environment variables from .env file load_dotenv() def setup_logging(): """Setup logging with loguru to file and stdout.""" # Create logs directory if it doesn't exist logs_dir = Path("logs") logs_dir.mkdir(exist_ok=True) # Remove default logger to customize it logger.remove() # Add file handler with rotation logger.add( "logs/dev.log", rotation="10 MB", retention="10 days", level="INFO", format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {file}:{line} | {message}" ) # Add stdout handler logger.add( sys.stdout, level="INFO", format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}", colorize=True ) @click.group() @click.version_option(version='1.0.0') def main(): """Main CLI entry point for the RAG solution.""" setup_logging() logger.info("Starting RAG solution CLI") @main.command(help="Basic connectivity test that returns 'pong'") @click.option('--verbose', '-v', is_flag=True, help="Enable verbose output") def ping(verbose): """Ping command that outputs 'pong'.""" if verbose: logger.info("Executing ping command") click.echo("pong") logger.info("Ping command completed successfully") else: click.echo("pong") logger.info("Ping command executed") @main.command(help="Load and process documents from the data folder into vector storage") @click.option('--data-path', '-d', default="../../../data", help="Path to the data folder relative to current directory") @click.option('--recursive', '-r', default=True, is_flag=True, help="Process subdirectories recursively") @click.option('--verbose', '-v', is_flag=True, help="Enable verbose output") def enrich(data_path, recursive, verbose): """Load and process documents from the data folder into vector storage.""" if verbose: logger.enable("__main__") logger.info(f"Starting document enrichment from: {data_path}") logger.info(f"Recursive processing: {recursive}") try: # Import the enrichment module from enrichment import enrich_documents, process_documents_from_data_folder logger.info("Enrichment module imported successfully") # Call the enrichment function process_documents_from_data_folder(data_path=data_path, recursive=recursive) logger.info("Document enrichment completed successfully") click.echo("Document enrichment completed successfully") except ImportError as e: logger.error(f"Failed to import enrichment module: {e}") click.echo(f"Error: Could not import enrichment module: {e}") except Exception as e: logger.error(f"Error during document enrichment: {e}") click.echo(f"Error during document enrichment: {e}") @main.command(help="Retrieve documents from vector storage based on a query") @click.argument('query', type=str) @click.option('--top-k', '-k', default=5, help="Number of top similar documents to retrieve") @click.option('--verbose', '-v', is_flag=True, help="Enable verbose output") def retrieve(query, top_k, verbose): """Retrieve documents from vector storage based on a query.""" if verbose: logger.enable("__main__") logger.info(f"Starting document retrieval for query: {query}") logger.info(f"Top-K results: {top_k}") try: # Import the retrieval module from retrieval import retrieve_documents_with_query_engine logger.info("Retrieval module imported successfully") # Call the retrieval function results = retrieve_documents_with_query_engine(query=query, top_k=top_k) logger.info(f"Retrieved {len(results)} documents for query: {query}") # Display results click.echo(f"\nFound {len(results)} results for query: '{query}'\n") for i, result in enumerate(results, 1): click.echo(f"Result {i}:") click.echo(f" Content preview: {result['content'][:200]}{'...' if len(result['content']) > 200 else ''}") click.echo(f" Score: {result['score']}") click.echo(f" Metadata:") for key, value in result['metadata'].items(): click.echo(f" {key}: {value}") click.echo("") click.echo("Document retrieval completed successfully") except ImportError as e: logger.error(f"Failed to import retrieval module: {e}") click.echo(f"Error: Could not import retrieval module: {e}") except Exception as e: logger.error(f"Error during document retrieval: {e}") click.echo(f"Error during document retrieval: {e}") if __name__ == '__main__': main()