Adaptive Collection, and Phase 11 WIP
This commit is contained in:
@@ -1,8 +1,10 @@
|
||||
"""Helper utilities for metadata extraction from Russian text."""
|
||||
|
||||
import os
|
||||
import re
|
||||
from typing import List
|
||||
|
||||
from abc import abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import Callable, List
|
||||
|
||||
_YEAR_PATTERN = re.compile(r"(?<!\d)(1\d{3}|20\d{2}|2100)(?!\d)")
|
||||
|
||||
@@ -105,3 +107,46 @@ def extract_russian_event_names(text: str) -> List[str]:
|
||||
seen.add(quoted)
|
||||
|
||||
return events
|
||||
|
||||
|
||||
class _AdaptiveFile:
|
||||
extension: str # Format: .jpg
|
||||
local_path: str
|
||||
|
||||
def __init__(self, extension: str, local_path: str):
|
||||
self.extension = extension
|
||||
self.local_path = local_path
|
||||
|
||||
# This method allows to work with file locally, and lambda should be provided for this.
|
||||
# Why separate method? For possible cleanup after work is done. And to download file, if needed
|
||||
# Lambda: first argument is a local path
|
||||
@abstractmethod
|
||||
def work_with_file_locally(self, func: Callable[[str], None]):
|
||||
pass
|
||||
|
||||
|
||||
class _AdaptiveCollection:
|
||||
# Generator method with yield
|
||||
@abstractmethod
|
||||
def iterate(self, recursive: bool):
|
||||
pass
|
||||
|
||||
|
||||
class LocalFilesystemAdaptiveFile(_AdaptiveFile):
|
||||
def work_with_file_locally(self, func: Callable[[str], None]):
|
||||
func(self.local_path)
|
||||
|
||||
|
||||
class LocalFilesystemAdaptiveCollection(_AdaptiveCollection):
|
||||
base_dir: str
|
||||
|
||||
def __init__(self, base_dir: str):
|
||||
super().__init__()
|
||||
|
||||
self.base_dir = base_dir
|
||||
|
||||
def iterate(self, recursive: bool):
|
||||
for root, dirs, files in os.walk(self.base_dir):
|
||||
for file in files:
|
||||
full_path = os.path.join(root, file)
|
||||
yield _AdaptiveFile(Path(full_path).suffix, full_path)
|
||||
|
||||
Reference in New Issue
Block a user