evaluation for rag systems
This commit is contained in:
@@ -1,4 +1,6 @@
|
||||
import os
|
||||
import csv
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import click
|
||||
@@ -126,5 +128,60 @@ def chat(collection_name, model):
|
||||
click.echo(f"Error: {str(e)}")
|
||||
|
||||
|
||||
@cli.command(
|
||||
name="export-supported-paths",
|
||||
help="Filter JSON paths by enrichment-supported extensions and export JSON/CSV",
|
||||
)
|
||||
@click.argument("input_json", type=click.Path(exists=True, dir_okay=False, path_type=Path))
|
||||
def export_supported_paths(input_json: Path):
|
||||
"""Export supported document paths into yadisk_imported_paths.json and yadisk_imported_paths.csv."""
|
||||
logger.info(f"Filtering supported paths from input file: {input_json}")
|
||||
|
||||
try:
|
||||
from enrichment import SUPPORTED_EXTENSIONS
|
||||
|
||||
with input_json.open("r", encoding="utf-8") as source_file:
|
||||
raw_data = json.load(source_file)
|
||||
|
||||
if not isinstance(raw_data, list):
|
||||
raise ValueError("Input JSON must contain an array of file paths")
|
||||
|
||||
filtered_paths = []
|
||||
seen_paths = set()
|
||||
for item in raw_data:
|
||||
path_str = str(item).strip()
|
||||
if not path_str:
|
||||
continue
|
||||
if path_str in seen_paths:
|
||||
continue
|
||||
|
||||
extension = Path(path_str).suffix.lower()
|
||||
if extension in SUPPORTED_EXTENSIONS:
|
||||
filtered_paths.append(path_str)
|
||||
seen_paths.add(path_str)
|
||||
|
||||
output_json = Path.cwd() / "yadisk_imported_paths.json"
|
||||
output_csv = Path.cwd() / "yadisk_imported_paths.csv"
|
||||
|
||||
with output_json.open("w", encoding="utf-8") as output_json_file:
|
||||
json.dump(filtered_paths, output_json_file, ensure_ascii=False, indent=2)
|
||||
|
||||
with output_csv.open("w", encoding="utf-8", newline="") as output_csv_file:
|
||||
writer = csv.writer(output_csv_file)
|
||||
writer.writerow(["path"])
|
||||
for path_item in filtered_paths:
|
||||
writer.writerow([path_item])
|
||||
|
||||
click.echo(
|
||||
f"Export complete: {len(filtered_paths)} supported paths saved to {output_json.name} and {output_csv.name}"
|
||||
)
|
||||
logger.info(
|
||||
f"Exported {len(filtered_paths)} supported paths to {output_json} and {output_csv}"
|
||||
)
|
||||
except Exception as error:
|
||||
logger.error(f"Failed to export supported paths: {error}")
|
||||
click.echo(f"Error: {error}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cli()
|
||||
|
||||
Reference in New Issue
Block a user