Checking properly source of the file for metadata, with instanceof

This commit is contained in:
2026-02-11 16:23:27 +03:00
parent f5659675ec
commit 93d538ecc6

View File

@@ -118,7 +118,7 @@ def try_guess_file_type(extension: str) -> str:
def identify_adaptive_file_source(adaptive_file: _AdaptiveFile) -> str:
if adaptive_file is YandexDiskAdaptiveFile:
if isinstance(adaptive_file, YandexDiskAdaptiveFile):
return "Яндекс Диск"
else:
return "Локальный Файл"
@@ -279,9 +279,11 @@ class DocumentEnricher:
file_hash = self._get_file_hash(local_file_path)
if self._is_document_hash_processed(file_hash):
logger.info(
f"Skipping already processed document hash for: {source_identifier}"
f"SKIPPING already processed document hash for: {source_identifier}"
)
return
else:
logger.info("Document is not processed! Doing it")
loader = self._get_loader_for_extension(local_file_path)
if loader is None:
@@ -326,6 +328,7 @@ class DocumentEnricher:
self.ADAPTIVE_FILES_QUEUE.put(adaptive_file)
logger.debug("ADAPTIVE COLLECTION DEPLETED!")
self.collection_finished.set()
# Phase 13 API: reads adaptive files and writes processed docs into PROCESSED_DOCUMENTS_QUEUE
@@ -368,8 +371,9 @@ class DocumentEnricher:
processed_record[0], processed_record[1]
)
except Exception as error:
logger.error(f"Error uploading processed documents: {error}")
raise
logger.error(
f"Error uploading processed documents: {error}. But swallowing error. NOT raising."
)
finally:
self.PROCESSED_DOCUMENTS_QUEUE.task_done()