Enrichment now processed via chunks. 2 documents -> into the vector storage. Also geussing source from the file extension

This commit is contained in:
2026-02-11 11:23:50 +03:00
parent 1e6ab247b9
commit 7b52887558
4 changed files with 127 additions and 81 deletions

View File

@@ -37,15 +37,16 @@ def ping():
name="enrich",
help="Load documents from data directory and store in vector database",
)
@click.option("--data-dir", default="../../../data", help="Path to the data directory")
@click.option(
"--collection-name",
default="documents_langchain",
help="Name of the vector store collection",
)
def enrich(data_dir, collection_name):
def enrich(collection_name):
"""Load documents from data directory and store in vector database"""
logger.info(f"Starting enrichment process for directory: {data_dir}")
logger.info(
f"Starting enrichment process. Enrichment source: {os.getenv('ENRICHMENT_SOURCE')}"
)
try:
# Import here to avoid circular dependencies
@@ -56,7 +57,7 @@ def enrich(data_dir, collection_name):
vector_store = initialize_vector_store(collection_name=collection_name)
# Run enrichment process
run_enrichment_process(vector_store, data_dir=data_dir)
run_enrichment_process(vector_store)
logger.info("Enrichment process completed successfully!")
click.echo("Documents have been successfully loaded into the vector store.")