enrichment with years, events
This commit is contained in:
@@ -39,6 +39,8 @@ from sqlalchemy.orm import sessionmaker
|
||||
from loguru import logger
|
||||
import sqlite3
|
||||
|
||||
from helpers import extract_russian_event_names, extract_years_from_text
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
@@ -189,6 +191,13 @@ class DocumentEnricher:
|
||||
# Split documents if they are too large
|
||||
split_docs = self.text_splitter.split_documents(docs)
|
||||
|
||||
# Extract additional metadata from each chunk.
|
||||
for chunk in split_docs:
|
||||
years = extract_years_from_text(chunk.page_content)
|
||||
events = extract_russian_event_names(chunk.page_content)
|
||||
chunk.metadata["years"] = years
|
||||
chunk.metadata["events"] = events
|
||||
|
||||
# Add to the collection
|
||||
all_docs.extend(split_docs)
|
||||
|
||||
@@ -277,4 +286,4 @@ if __name__ == "__main__":
|
||||
vector_store = initialize_vector_store()
|
||||
|
||||
# Run enrichment process
|
||||
run_enrichment_process(vector_store)
|
||||
run_enrichment_process(vector_store)
|
||||
|
||||
Reference in New Issue
Block a user