Enrichment now processed via chunks. 2 documents -> into the vector storage. Also geussing source from the file extension

This commit is contained in:
2026-02-11 11:23:50 +03:00
parent 1e6ab247b9
commit 7b52887558
4 changed files with 127 additions and 81 deletions

View File

@@ -115,13 +115,11 @@ def extract_russian_event_names(text: str) -> List[str]:
class _AdaptiveFile(ABC):
extension: str # Format: .jpg
local_path: str
filename: str
def __init__(self, filename: str, extension: str, local_path: str):
def __init__(self, filename: str, extension: str):
self.filename = filename
self.extension = extension
self.local_path = local_path
# This method allows to work with file locally, and lambda should be provided for this.
# Why separate method? For possible cleanup after work is done. And to download file, if needed
@@ -139,8 +137,11 @@ class _AdaptiveCollection(ABC):
class LocalFilesystemAdaptiveFile(_AdaptiveFile):
local_path: str
def __init__(self, filename: str, extension: str, local_path: str):
super().__init__(filename, extension, local_path)
super().__init__(filename, extension)
self.local_path = local_path
def work_with_file_locally(self, func: Callable[[str], None]):
func(self.local_path)
@@ -171,7 +172,7 @@ class YandexDiskAdaptiveFile(_AdaptiveFile):
remote_path: str
def __init__(self, filename: str, extension: str, remote_path: str, token: str):
super().__init__(filename, extension, remote_path)
super().__init__(filename, extension)
self.token = token
self.remote_path = remote_path