Enrichment for llamaindex. It goes for a long time using local model, so better use external model not local, for EMBEDDING

This commit is contained in:
2026-02-04 16:06:01 +03:00
parent f36108d652
commit 3dea3605ad
5 changed files with 402 additions and 22 deletions

View File

@@ -14,10 +14,10 @@ def setup_logging():
# Create logs directory if it doesn't exist
logs_dir = Path("logs")
logs_dir.mkdir(exist_ok=True)
# Remove default logger to customize it
logger.remove()
# Add file handler with rotation
logger.add(
"logs/dev.log",
@@ -26,7 +26,7 @@ def setup_logging():
level="INFO",
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {file}:{line} | {message}"
)
# Add stdout handler
logger.add(
sys.stdout,
@@ -57,5 +57,35 @@ def ping(verbose):
logger.info("Ping command executed")
@main.command(help="Load and process documents from the data folder into vector storage")
@click.option('--data-path', '-d', default="../../../data", help="Path to the data folder relative to current directory")
@click.option('--recursive', '-r', default=True, is_flag=True, help="Process subdirectories recursively")
@click.option('--verbose', '-v', is_flag=True, help="Enable verbose output")
def enrich(data_path, recursive, verbose):
"""Load and process documents from the data folder into vector storage."""
if verbose:
logger.enable("__main__")
logger.info(f"Starting document enrichment from: {data_path}")
logger.info(f"Recursive processing: {recursive}")
try:
# Import the enrichment module
from enrichment import enrich_documents, process_documents_from_data_folder
logger.info("Enrichment module imported successfully")
# Call the enrichment function
process_documents_from_data_folder(data_path=data_path, recursive=recursive)
logger.info("Document enrichment completed successfully")
click.echo("Document enrichment completed successfully")
except ImportError as e:
logger.error(f"Failed to import enrichment module: {e}")
click.echo(f"Error: Could not import enrichment module: {e}")
except Exception as e:
logger.error(f"Error during document enrichment: {e}")
click.echo(f"Error during document enrichment: {e}")
if __name__ == '__main__':
main()
main()