Checking properly source of the file for metadata, with instanceof
This commit is contained in:
@@ -118,7 +118,7 @@ def try_guess_file_type(extension: str) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def identify_adaptive_file_source(adaptive_file: _AdaptiveFile) -> str:
|
def identify_adaptive_file_source(adaptive_file: _AdaptiveFile) -> str:
|
||||||
if adaptive_file is YandexDiskAdaptiveFile:
|
if isinstance(adaptive_file, YandexDiskAdaptiveFile):
|
||||||
return "Яндекс Диск"
|
return "Яндекс Диск"
|
||||||
else:
|
else:
|
||||||
return "Локальный Файл"
|
return "Локальный Файл"
|
||||||
@@ -279,9 +279,11 @@ class DocumentEnricher:
|
|||||||
file_hash = self._get_file_hash(local_file_path)
|
file_hash = self._get_file_hash(local_file_path)
|
||||||
if self._is_document_hash_processed(file_hash):
|
if self._is_document_hash_processed(file_hash):
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Skipping already processed document hash for: {source_identifier}"
|
f"SKIPPING already processed document hash for: {source_identifier}"
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
else:
|
||||||
|
logger.info("Document is not processed! Doing it")
|
||||||
|
|
||||||
loader = self._get_loader_for_extension(local_file_path)
|
loader = self._get_loader_for_extension(local_file_path)
|
||||||
if loader is None:
|
if loader is None:
|
||||||
@@ -326,6 +328,7 @@ class DocumentEnricher:
|
|||||||
|
|
||||||
self.ADAPTIVE_FILES_QUEUE.put(adaptive_file)
|
self.ADAPTIVE_FILES_QUEUE.put(adaptive_file)
|
||||||
|
|
||||||
|
logger.debug("ADAPTIVE COLLECTION DEPLETED!")
|
||||||
self.collection_finished.set()
|
self.collection_finished.set()
|
||||||
|
|
||||||
# Phase 13 API: reads adaptive files and writes processed docs into PROCESSED_DOCUMENTS_QUEUE
|
# Phase 13 API: reads adaptive files and writes processed docs into PROCESSED_DOCUMENTS_QUEUE
|
||||||
@@ -368,8 +371,9 @@ class DocumentEnricher:
|
|||||||
processed_record[0], processed_record[1]
|
processed_record[0], processed_record[1]
|
||||||
)
|
)
|
||||||
except Exception as error:
|
except Exception as error:
|
||||||
logger.error(f"Error uploading processed documents: {error}")
|
logger.error(
|
||||||
raise
|
f"Error uploading processed documents: {error}. But swallowing error. NOT raising."
|
||||||
|
)
|
||||||
finally:
|
finally:
|
||||||
self.PROCESSED_DOCUMENTS_QUEUE.task_done()
|
self.PROCESSED_DOCUMENTS_QUEUE.task_done()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user