Files
rag-solution/ext_stats.py

49 lines
1.3 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
import argparse
import json
from collections import Counter
from pathlib import Path
def normalize_ext(path_str: str) -> str:
ext = Path(path_str).suffix.lower()
return ext if ext else "(no_ext)"
def load_paths(json_path: Path) -> list[str]:
with json_path.open("r", encoding="utf-8") as f:
data = json.load(f)
if not isinstance(data, list):
raise ValueError("Expected JSON to be a list of file paths")
if not all(isinstance(item, str) for item in data):
raise ValueError("Expected JSON list to contain only strings")
return data
def main() -> int:
parser = argparse.ArgumentParser(
description="Count file extensions from a JSON list of paths."
)
parser.add_argument(
"json_path",
nargs="?",
default="yadisk_files.json",
help="Path to JSON file (default: yadisk_files.json)",
)
args = parser.parse_args()
json_path = Path(args.json_path)
if not json_path.exists():
raise SystemExit(f"JSON file not found: {json_path}")
paths = load_paths(json_path)
counts = Counter(normalize_ext(p) for p in paths)
for ext, count in counts.most_common():
print(f"{ext}\t{count}")
return 0
if __name__ == "__main__":
raise SystemExit(main())