Adaptive Collection, and Phase 11 WIP

This commit is contained in:
2026-02-10 20:12:43 +03:00
parent 447ecaba39
commit 63c3e2c5c7
4 changed files with 56 additions and 2 deletions

View File

@@ -1,8 +1,10 @@
"""Helper utilities for metadata extraction from Russian text."""
import os
import re
from typing import List
from abc import abstractmethod
from pathlib import Path
from typing import Callable, List
_YEAR_PATTERN = re.compile(r"(?<!\d)(1\d{3}|20\d{2}|2100)(?!\d)")
@@ -105,3 +107,46 @@ def extract_russian_event_names(text: str) -> List[str]:
seen.add(quoted)
return events
class _AdaptiveFile:
extension: str # Format: .jpg
local_path: str
def __init__(self, extension: str, local_path: str):
self.extension = extension
self.local_path = local_path
# This method allows to work with file locally, and lambda should be provided for this.
# Why separate method? For possible cleanup after work is done. And to download file, if needed
# Lambda: first argument is a local path
@abstractmethod
def work_with_file_locally(self, func: Callable[[str], None]):
pass
class _AdaptiveCollection:
# Generator method with yield
@abstractmethod
def iterate(self, recursive: bool):
pass
class LocalFilesystemAdaptiveFile(_AdaptiveFile):
def work_with_file_locally(self, func: Callable[[str], None]):
func(self.local_path)
class LocalFilesystemAdaptiveCollection(_AdaptiveCollection):
base_dir: str
def __init__(self, base_dir: str):
super().__init__()
self.base_dir = base_dir
def iterate(self, recursive: bool):
for root, dirs, files in os.walk(self.base_dir):
for file in files:
full_path = os.path.join(root, file)
yield _AdaptiveFile(Path(full_path).suffix, full_path)