Checking properly source of the file for metadata, with instanceof
This commit is contained in:
@@ -118,7 +118,7 @@ def try_guess_file_type(extension: str) -> str:
|
||||
|
||||
|
||||
def identify_adaptive_file_source(adaptive_file: _AdaptiveFile) -> str:
|
||||
if adaptive_file is YandexDiskAdaptiveFile:
|
||||
if isinstance(adaptive_file, YandexDiskAdaptiveFile):
|
||||
return "Яндекс Диск"
|
||||
else:
|
||||
return "Локальный Файл"
|
||||
@@ -279,9 +279,11 @@ class DocumentEnricher:
|
||||
file_hash = self._get_file_hash(local_file_path)
|
||||
if self._is_document_hash_processed(file_hash):
|
||||
logger.info(
|
||||
f"Skipping already processed document hash for: {source_identifier}"
|
||||
f"SKIPPING already processed document hash for: {source_identifier}"
|
||||
)
|
||||
return
|
||||
else:
|
||||
logger.info("Document is not processed! Doing it")
|
||||
|
||||
loader = self._get_loader_for_extension(local_file_path)
|
||||
if loader is None:
|
||||
@@ -326,6 +328,7 @@ class DocumentEnricher:
|
||||
|
||||
self.ADAPTIVE_FILES_QUEUE.put(adaptive_file)
|
||||
|
||||
logger.debug("ADAPTIVE COLLECTION DEPLETED!")
|
||||
self.collection_finished.set()
|
||||
|
||||
# Phase 13 API: reads adaptive files and writes processed docs into PROCESSED_DOCUMENTS_QUEUE
|
||||
@@ -368,8 +371,9 @@ class DocumentEnricher:
|
||||
processed_record[0], processed_record[1]
|
||||
)
|
||||
except Exception as error:
|
||||
logger.error(f"Error uploading processed documents: {error}")
|
||||
raise
|
||||
logger.error(
|
||||
f"Error uploading processed documents: {error}. But swallowing error. NOT raising."
|
||||
)
|
||||
finally:
|
||||
self.PROCESSED_DOCUMENTS_QUEUE.task_done()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user