Prep for Phase 12 of loading files for enrichment through the adaptive collections
This commit is contained in:
@@ -116,10 +116,11 @@ def extract_russian_event_names(text: str) -> List[str]:
|
||||
class _AdaptiveFile(ABC):
|
||||
extension: str # Format: .jpg
|
||||
local_path: str
|
||||
filename: str
|
||||
|
||||
def __init__(self, extension: str, local_path: str):
|
||||
def __init__(self, filename: str, extension: str):
|
||||
self.filename = filename
|
||||
self.extension = extension
|
||||
self.local_path = local_path
|
||||
|
||||
# This method allows to work with file locally, and lambda should be provided for this.
|
||||
# Why separate method? For possible cleanup after work is done. And to download file, if needed
|
||||
@@ -137,6 +138,12 @@ class _AdaptiveCollection(ABC):
|
||||
|
||||
|
||||
class LocalFilesystemAdaptiveFile(_AdaptiveFile):
|
||||
local_path: str
|
||||
|
||||
def __init__(self, filename: str, extension: str, local_path: str):
|
||||
super().__init__(filename, extension)
|
||||
self.local_path = local_path
|
||||
|
||||
def work_with_file_locally(self, func: Callable[[str], None]):
|
||||
func(self.local_path)
|
||||
|
||||
@@ -153,7 +160,8 @@ class LocalFilesystemAdaptiveCollection(_AdaptiveCollection):
|
||||
for root, dirs, files in os.walk(self.base_dir):
|
||||
for file in files:
|
||||
full_path = os.path.join(root, file)
|
||||
yield LocalFilesystemAdaptiveFile(Path(full_path).suffix, full_path)
|
||||
p = Path(full_path)
|
||||
yield LocalFilesystemAdaptiveFile(p.name, p.suffix, full_path)
|
||||
|
||||
if not recursive:
|
||||
break
|
||||
@@ -162,16 +170,19 @@ class LocalFilesystemAdaptiveCollection(_AdaptiveCollection):
|
||||
class YandexDiskAdaptiveFile(_AdaptiveFile):
|
||||
"""Adaptive file representation for Yandex Disk resources."""
|
||||
|
||||
def __init__(self, extension: str, local_path: str, token: str):
|
||||
super().__init__(extension, local_path)
|
||||
remote_path: str
|
||||
|
||||
def __init__(self, filename: str, extension: str, remote_path: str, token: str):
|
||||
super().__init__(filename, extension)
|
||||
self.token = token
|
||||
self.remote_path = remote_path
|
||||
|
||||
def _download_to_temp_file(self) -> str:
|
||||
headers = {"Authorization": f"OAuth {self.token}"}
|
||||
response = requests.get(
|
||||
"https://cloud-api.yandex.net/v1/disk/resources/download",
|
||||
headers=headers,
|
||||
params={"path": self.local_path},
|
||||
params={"path": self.remote_path},
|
||||
timeout=30,
|
||||
)
|
||||
response.raise_for_status()
|
||||
@@ -180,7 +191,8 @@ class YandexDiskAdaptiveFile(_AdaptiveFile):
|
||||
file_response = requests.get(href, timeout=120)
|
||||
file_response.raise_for_status()
|
||||
|
||||
suffix = Path(self.local_path).suffix
|
||||
p = Path(self.remote_path)
|
||||
suffix = p.suffix
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
|
||||
temp_file.write(file_response.content)
|
||||
return temp_file.name
|
||||
@@ -249,7 +261,8 @@ class YandexDiskAdaptiveCollection(_AdaptiveCollection):
|
||||
if root_info.get("type") == "file":
|
||||
path = root_info["path"]
|
||||
logger.info(f"Found file on Yandex Disk: {path}")
|
||||
yield YandexDiskAdaptiveFile(Path(path).suffix, path, self.token)
|
||||
p = Path(path)
|
||||
yield YandexDiskAdaptiveFile(p.name, p.suffix, path, self.token)
|
||||
return
|
||||
|
||||
directories = [root_path]
|
||||
@@ -257,11 +270,12 @@ class YandexDiskAdaptiveCollection(_AdaptiveCollection):
|
||||
current_dir = directories.pop(0)
|
||||
for item in self._iter_children(current_dir):
|
||||
item_type = item.get("type")
|
||||
item_path = item.get("path")
|
||||
item_path = str(item.get("path"))
|
||||
if item_type == "file":
|
||||
logger.info(f"Found file on Yandex Disk: {item_path}")
|
||||
p = Path(item_path)
|
||||
yield YandexDiskAdaptiveFile(
|
||||
Path(item_path).suffix, item_path, self.token
|
||||
p.name, p.suffix, item_path, self.token
|
||||
)
|
||||
elif recursive and item_type == "dir":
|
||||
directories.append(item_path)
|
||||
|
||||
Reference in New Issue
Block a user