ragflow in the repository, with codex-created yandex disk plugin JUST IN CASE, also llamaindex enrichment with yandex disk predefined data
This commit is contained in:
48
ext_stats.py
Normal file
48
ext_stats.py
Normal file
@@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import json
|
||||
from collections import Counter
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def normalize_ext(path_str: str) -> str:
|
||||
ext = Path(path_str).suffix.lower()
|
||||
return ext if ext else "(no_ext)"
|
||||
|
||||
|
||||
def load_paths(json_path: Path) -> list[str]:
|
||||
with json_path.open("r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
if not isinstance(data, list):
|
||||
raise ValueError("Expected JSON to be a list of file paths")
|
||||
if not all(isinstance(item, str) for item in data):
|
||||
raise ValueError("Expected JSON list to contain only strings")
|
||||
return data
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Count file extensions from a JSON list of paths."
|
||||
)
|
||||
parser.add_argument(
|
||||
"json_path",
|
||||
nargs="?",
|
||||
default="yadisk_files.json",
|
||||
help="Path to JSON file (default: yadisk_files.json)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
json_path = Path(args.json_path)
|
||||
if not json_path.exists():
|
||||
raise SystemExit(f"JSON file not found: {json_path}")
|
||||
|
||||
paths = load_paths(json_path)
|
||||
counts = Counter(normalize_ext(p) for p in paths)
|
||||
|
||||
for ext, count in counts.most_common():
|
||||
print(f"{ext}\t{count}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user