removed test retrieval feature. off you go
This commit is contained in:
@@ -1,8 +1,8 @@
|
|||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from dotenv import load_dotenv
|
|
||||||
|
|
||||||
import click
|
import click
|
||||||
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
# Load environment variables
|
# Load environment variables
|
||||||
@@ -85,37 +85,10 @@ def retrieve(query, collection_name, top_k):
|
|||||||
"""Retrieve documents from vector database based on a query"""
|
"""Retrieve documents from vector database based on a query"""
|
||||||
logger.info(f"Starting retrieval process for query: {query}")
|
logger.info(f"Starting retrieval process for query: {query}")
|
||||||
|
|
||||||
try:
|
click.echo(
|
||||||
# Import here to avoid circular dependencies
|
"WARNING: Retrieval disabled, since it is no longer relevant for the testing of the retrieving feature. Use chat with agent instead. xoxo"
|
||||||
from retrieval import search_documents_with_metadata
|
|
||||||
|
|
||||||
# Perform retrieval
|
|
||||||
results = search_documents_with_metadata(
|
|
||||||
query=query,
|
|
||||||
collection_name=collection_name,
|
|
||||||
top_k=top_k
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if not results:
|
|
||||||
click.echo("No relevant documents found for the query.")
|
|
||||||
return
|
|
||||||
|
|
||||||
click.echo(f"Found {len(results)} relevant documents:\n")
|
|
||||||
|
|
||||||
for i, result in enumerate(results, 1):
|
|
||||||
click.echo(f"{i}. Source: {result['source']}")
|
|
||||||
click.echo(f" Filename: {result['filename']}")
|
|
||||||
click.echo(f" Page: {result['page_number']}")
|
|
||||||
click.echo(f" File Extension: {result['file_extension']}")
|
|
||||||
click.echo(f" Content Preview: {result['content'][:200]}...")
|
|
||||||
click.echo(f" Metadata: {result['metadata']}\n")
|
|
||||||
|
|
||||||
logger.info("Retrieval process completed successfully!")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error during retrieval process: {str(e)}")
|
|
||||||
click.echo(f"Error: {str(e)}")
|
|
||||||
|
|
||||||
|
|
||||||
@cli.command(
|
@cli.command(
|
||||||
name="chat",
|
name="chat",
|
||||||
@@ -143,10 +116,7 @@ def chat(collection_name, model):
|
|||||||
click.echo("Type 'quit' or 'exit' to end the conversation.\n")
|
click.echo("Type 'quit' or 'exit' to end the conversation.\n")
|
||||||
|
|
||||||
# Run the interactive chat loop
|
# Run the interactive chat loop
|
||||||
run_chat_loop(
|
run_chat_loop(collection_name=collection_name, llm_model=model)
|
||||||
collection_name=collection_name,
|
|
||||||
llm_model=model
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info("Chat session ended")
|
logger.info("Chat session ended")
|
||||||
|
|
||||||
|
|||||||
@@ -2,10 +2,11 @@
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from langchain_core.retrievers import BaseRetriever
|
|
||||||
from langchain_core.callbacks import CallbackManagerForRetrieverRun
|
from langchain_core.callbacks import CallbackManagerForRetrieverRun
|
||||||
from langchain_core.documents import Document
|
from langchain_core.documents import Document
|
||||||
|
from langchain_core.retrievers import BaseRetriever
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from vector_storage import initialize_vector_store
|
from vector_storage import initialize_vector_store
|
||||||
@@ -60,7 +61,9 @@ def create_retriever(collection_name: str = "documents_langchain", top_k: int =
|
|||||||
Returns:
|
Returns:
|
||||||
VectorStoreRetriever instance
|
VectorStoreRetriever instance
|
||||||
"""
|
"""
|
||||||
logger.info(f"Initializing vector store for retrieval from collection: {collection_name}")
|
logger.info(
|
||||||
|
f"Initializing vector store for retrieval from collection: {collection_name}"
|
||||||
|
)
|
||||||
|
|
||||||
# Initialize the vector store
|
# Initialize the vector store
|
||||||
vector_store = initialize_vector_store(collection_name=collection_name)
|
vector_store = initialize_vector_store(collection_name=collection_name)
|
||||||
@@ -71,35 +74,8 @@ def create_retriever(collection_name: str = "documents_langchain", top_k: int =
|
|||||||
return retriever
|
return retriever
|
||||||
|
|
||||||
|
|
||||||
def search_documents(query: str, collection_name: str = "documents_langchain", top_k: int = 5) -> List[Document]:
|
|
||||||
"""
|
|
||||||
Search for documents in the vector store based on the query.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
query: The query string to search for
|
|
||||||
collection_name: Name of the Qdrant collection to use
|
|
||||||
top_k: Number of documents to retrieve
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of documents with metadata
|
|
||||||
"""
|
|
||||||
logger.info(f"Starting document search for query: {query}")
|
|
||||||
|
|
||||||
# Create the retriever
|
|
||||||
retriever = create_retriever(collection_name=collection_name, top_k=top_k)
|
|
||||||
|
|
||||||
# Perform the search
|
|
||||||
results = retriever.invoke(query)
|
|
||||||
|
|
||||||
logger.info(f"Search completed, returned {len(results)} documents")
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def search_documents_with_metadata(
|
def search_documents_with_metadata(
|
||||||
query: str,
|
query: str, collection_name: str = "documents_langchain", top_k: int = 5
|
||||||
collection_name: str = "documents_langchain",
|
|
||||||
top_k: int = 5
|
|
||||||
) -> List[dict]:
|
) -> List[dict]:
|
||||||
"""
|
"""
|
||||||
Search for documents and return them with detailed metadata.
|
Search for documents and return them with detailed metadata.
|
||||||
@@ -129,30 +105,20 @@ def search_documents_with_metadata(
|
|||||||
"metadata": doc.metadata,
|
"metadata": doc.metadata,
|
||||||
"source": doc.metadata.get("source", "Unknown"),
|
"source": doc.metadata.get("source", "Unknown"),
|
||||||
"filename": doc.metadata.get("filename", "Unknown"),
|
"filename": doc.metadata.get("filename", "Unknown"),
|
||||||
"page_number": doc.metadata.get("page_number", doc.metadata.get("page", "N/A")),
|
"page_number": doc.metadata.get(
|
||||||
|
"page_number", doc.metadata.get("page", "N/A")
|
||||||
|
),
|
||||||
"file_extension": doc.metadata.get("file_extension", "N/A"),
|
"file_extension": doc.metadata.get("file_extension", "N/A"),
|
||||||
"file_size": doc.metadata.get("file_size", "N/A")
|
"file_size": doc.metadata.get("file_size", "N/A"),
|
||||||
}
|
}
|
||||||
formatted_results.append(formatted_result)
|
formatted_results.append(formatted_result)
|
||||||
|
|
||||||
logger.info(f"Metadata search completed, returned {len(formatted_results)} documents")
|
logger.info(
|
||||||
|
f"Metadata search completed, returned {len(formatted_results)} documents"
|
||||||
|
)
|
||||||
|
|
||||||
return formatted_results
|
return formatted_results
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error during document search with metadata: {str(e)}")
|
logger.error(f"Error during document search with metadata: {str(e)}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
# Example usage
|
|
||||||
query = "What is the main topic discussed in the documents?"
|
|
||||||
results = search_documents_with_metadata(query, top_k=5)
|
|
||||||
|
|
||||||
print(f"Found {len(results)} documents:")
|
|
||||||
for i, result in enumerate(results, 1):
|
|
||||||
print(f"\n{i}. Source: {result['source']}")
|
|
||||||
print(f" Filename: {result['filename']}")
|
|
||||||
print(f" Page: {result['page_number']}")
|
|
||||||
print(f" Content preview: {result['content'][:200]}...")
|
|
||||||
print(f" Metadata: {result['metadata']}")
|
|
||||||
Reference in New Issue
Block a user