Files
rag-solution/services/rag/langchain/cli.py

188 lines
5.6 KiB
Python
Raw Normal View History

2026-02-03 19:51:35 +03:00
import os
2026-03-11 22:30:02 +03:00
import csv
import json
2026-02-03 19:51:35 +03:00
from pathlib import Path
2026-02-03 22:55:12 +03:00
import click
from dotenv import load_dotenv
2026-02-03 22:55:12 +03:00
from loguru import logger
2026-02-05 00:08:59 +03:00
# Load environment variables
load_dotenv()
2026-02-03 19:51:35 +03:00
# Configure logging to output to both file and stdout as specified in requirements
def setup_logging():
# Create logs directory if it doesn't exist
logs_dir = Path("logs")
logs_dir.mkdir(exist_ok=True)
2026-02-03 19:51:35 +03:00
# Add file logging with rotation
logger.add("logs/dev.log", rotation="10 MB", retention="10 days")
2026-02-03 19:51:35 +03:00
@click.group()
def cli():
"""Main CLI group"""
setup_logging()
pass
@cli.command(name="ping", help="Ping command that outputs pong")
def ping():
"""Ping command that outputs pong"""
logger.info("Ping command executed")
click.echo("pong")
2026-02-03 22:55:12 +03:00
@cli.command(
name="enrich",
help="Load documents from data directory and store in vector database",
)
@click.option(
"--collection-name",
default="documents_langchain",
help="Name of the vector store collection",
)
def enrich(collection_name):
"""Load documents from data directory and store in vector database"""
logger.info(
f"Starting enrichment process. Enrichment source: {os.getenv('ENRICHMENT_SOURCE')}"
)
try:
# Import here to avoid circular dependencies
from enrichment import run_enrichment_process
2026-02-03 22:55:12 +03:00
from vector_storage import initialize_vector_store
# Initialize vector store
vector_store = initialize_vector_store(collection_name=collection_name)
# Run enrichment process
run_enrichment_process(vector_store)
logger.info("Enrichment process completed successfully!")
click.echo("Documents have been successfully loaded into the vector store.")
except Exception as e:
logger.error(f"Error during enrichment process: {str(e)}")
click.echo(f"Error: {str(e)}")
2026-02-03 23:25:24 +03:00
@cli.command(
name="retrieve",
help="Retrieve documents from vector database based on a query",
)
@click.argument("query")
@click.option(
"--collection-name",
default="documents_langchain",
help="Name of the vector store collection",
)
@click.option(
"--top-k",
default=5,
help="Number of documents to retrieve",
)
def retrieve(query, collection_name, top_k):
"""Retrieve documents from vector database based on a query"""
logger.info(f"Starting retrieval process for query: {query}")
click.echo(
"WARNING: Retrieval disabled, since it is no longer relevant for the testing of the retrieving feature. Use chat with agent instead. xoxo"
)
2026-02-03 23:25:24 +03:00
@cli.command(
name="chat",
help="Start an interactive chat session with the RAG agent",
)
@click.option(
"--collection-name",
default="documents_langchain",
help="Name of the vector store collection",
)
@click.option(
"--model",
default=None,
help="Name of the Ollama model to use for chat",
)
def chat(collection_name, model):
"""Start an interactive chat session with the RAG agent"""
logger.info("Starting chat session with RAG agent")
try:
# Import here to avoid circular dependencies and only when needed
from agent import run_chat_loop
click.echo("Initializing chat agent...")
click.echo("Type 'quit' or 'exit' to end the conversation.\n")
# Run the interactive chat loop
run_chat_loop(collection_name=collection_name, llm_model=model)
logger.info("Chat session ended")
except Exception as e:
logger.error(f"Error during chat session: {str(e)}")
click.echo(f"Error: {str(e)}")
2026-03-11 22:30:02 +03:00
@cli.command(
name="export-supported-paths",
help="Filter JSON paths by enrichment-supported extensions and export JSON/CSV",
)
@click.argument("input_json", type=click.Path(exists=True, dir_okay=False, path_type=Path))
def export_supported_paths(input_json: Path):
"""Export supported document paths into yadisk_imported_paths.json and yadisk_imported_paths.csv."""
logger.info(f"Filtering supported paths from input file: {input_json}")
try:
from enrichment import SUPPORTED_EXTENSIONS
with input_json.open("r", encoding="utf-8") as source_file:
raw_data = json.load(source_file)
if not isinstance(raw_data, list):
raise ValueError("Input JSON must contain an array of file paths")
filtered_paths = []
seen_paths = set()
for item in raw_data:
path_str = str(item).strip()
if not path_str:
continue
if path_str in seen_paths:
continue
extension = Path(path_str).suffix.lower()
if extension in SUPPORTED_EXTENSIONS:
filtered_paths.append(path_str)
seen_paths.add(path_str)
output_json = Path.cwd() / "yadisk_imported_paths.json"
output_csv = Path.cwd() / "yadisk_imported_paths.csv"
with output_json.open("w", encoding="utf-8") as output_json_file:
json.dump(filtered_paths, output_json_file, ensure_ascii=False, indent=2)
with output_csv.open("w", encoding="utf-8", newline="") as output_csv_file:
writer = csv.writer(output_csv_file)
writer.writerow(["path"])
for path_item in filtered_paths:
writer.writerow([path_item])
click.echo(
f"Export complete: {len(filtered_paths)} supported paths saved to {output_json.name} and {output_csv.name}"
)
logger.info(
f"Exported {len(filtered_paths)} supported paths to {output_json} and {output_csv}"
)
except Exception as error:
logger.error(f"Failed to export supported paths: {error}")
click.echo(f"Error: {error}")
2026-02-03 19:51:35 +03:00
if __name__ == "__main__":
2026-02-03 22:55:12 +03:00
cli()