evaluation for rag systems

This commit is contained in:
2026-03-11 22:30:02 +03:00
parent 5721bad117
commit 6c953a327f
11 changed files with 31897 additions and 1 deletions

View File

@@ -1,4 +1,6 @@
import os
import csv
import json
from pathlib import Path
import click
@@ -126,5 +128,60 @@ def chat(collection_name, model):
click.echo(f"Error: {str(e)}")
@cli.command(
name="export-supported-paths",
help="Filter JSON paths by enrichment-supported extensions and export JSON/CSV",
)
@click.argument("input_json", type=click.Path(exists=True, dir_okay=False, path_type=Path))
def export_supported_paths(input_json: Path):
"""Export supported document paths into yadisk_imported_paths.json and yadisk_imported_paths.csv."""
logger.info(f"Filtering supported paths from input file: {input_json}")
try:
from enrichment import SUPPORTED_EXTENSIONS
with input_json.open("r", encoding="utf-8") as source_file:
raw_data = json.load(source_file)
if not isinstance(raw_data, list):
raise ValueError("Input JSON must contain an array of file paths")
filtered_paths = []
seen_paths = set()
for item in raw_data:
path_str = str(item).strip()
if not path_str:
continue
if path_str in seen_paths:
continue
extension = Path(path_str).suffix.lower()
if extension in SUPPORTED_EXTENSIONS:
filtered_paths.append(path_str)
seen_paths.add(path_str)
output_json = Path.cwd() / "yadisk_imported_paths.json"
output_csv = Path.cwd() / "yadisk_imported_paths.csv"
with output_json.open("w", encoding="utf-8") as output_json_file:
json.dump(filtered_paths, output_json_file, ensure_ascii=False, indent=2)
with output_csv.open("w", encoding="utf-8", newline="") as output_csv_file:
writer = csv.writer(output_csv_file)
writer.writerow(["path"])
for path_item in filtered_paths:
writer.writerow([path_item])
click.echo(
f"Export complete: {len(filtered_paths)} supported paths saved to {output_json.name} and {output_csv.name}"
)
logger.info(
f"Exported {len(filtered_paths)} supported paths to {output_json} and {output_csv}"
)
except Exception as error:
logger.error(f"Failed to export supported paths: {error}")
click.echo(f"Error: {error}")
if __name__ == "__main__":
cli()