diff --git a/services/rag/.DS_Store b/services/rag/.DS_Store index 5bd78c6..5c7d7c4 100644 Binary files a/services/rag/.DS_Store and b/services/rag/.DS_Store differ diff --git a/services/rag/llamaindex/.env.dist b/services/rag/llamaindex/.env.dist index 2d5a3bb..d6a3dd5 100644 --- a/services/rag/llamaindex/.env.dist +++ b/services/rag/llamaindex/.env.dist @@ -14,6 +14,7 @@ QDRANT_GRPC_PORT=6334 # OpenAI Configuration (for reference - uncomment and configure when using OpenAI strategy) # OPENAI_CHAT_URL=https://api.openai.com/v1 # OPENAI_CHAT_KEY=your_openai_api_key_here +# OPENAI_CHAT_IS_FUNCTION_CALLING_MODEL=false # OPENAI_EMBEDDING_MODEL=text-embedding-3-small # OPENAI_EMBEDDING_BASE_URL=https://api.openai.com/v1 # OPENAI_EMBEDDING_API_KEY=your_openai_api_key_here diff --git a/services/rag/llamaindex/PLANNING.md b/services/rag/llamaindex/PLANNING.md index 65cad9a..e84cfcd 100644 --- a/services/rag/llamaindex/PLANNING.md +++ b/services/rag/llamaindex/PLANNING.md @@ -93,3 +93,10 @@ Chosen data folder: relatve ./../../../data - from the current folder - optionally filter low-information chunks (headers/footers) - [x] Add optional metadata-aware retrieval improvements (years/events/keywords) parity with LangChain approach (folder near current folder), if feasible in the chosen LlamaIndex primitives. - [x] Update `server.py` endpoint to use the new agent-like chat path (keep simple retrieval endpoint available as fallback or debug mode). + +# Phase 13 (make wrapper around OpenAI llamaindex class or install library to help use any openai compatible models) + +Inside config.py file object is being created for using as Chat Model. Unfortunately, it allows only "supported" models as value for "model" argument in constructor. + +- [x] Search online for library or plugin for llamainde, that fixes the OpenAI class behaviour or provides replacement, that will allow any models. If found, install it in project venv, update then requirements.txt and replace usage of OpenAI to new one in the code. +- [x] Fallback option: create wrapper of OpenAI class, that will inherit and replace methods/features that check for "registered" models. Use the replacement then in the code. diff --git a/services/rag/llamaindex/config.py b/services/rag/llamaindex/config.py index 9348aa7..e948e90 100644 --- a/services/rag/llamaindex/config.py +++ b/services/rag/llamaindex/config.py @@ -93,18 +93,30 @@ def get_llm_model(): return llm elif strategy == "openai": - from llama_index.llms.openai import OpenAI + from llama_index.llms.openai_like import OpenAILike + # from helpers.openai_compatible_llm import OpenAICompatibleLLM openai_chat_url = os.getenv("OPENAI_CHAT_URL", "https://api.openai.com/v1") openai_chat_key = os.getenv("OPENAI_CHAT_KEY", "dummy_key_for_template") openai_chat_model = os.getenv("OPENAI_CHAT_MODEL", "gpt-3.5-turbo") + openai_is_fc_model = ( + os.getenv("OPENAI_CHAT_IS_FUNCTION_CALLING_MODEL", "false").lower() + == "true" + ) # Set the API key in environment for OpenAI os.environ["OPENAI_API_KEY"] = openai_chat_key - logger.info(f"Initializing OpenAI chat model: {openai_chat_model}") + logger.info( + f"Initializing OpenAI-compatible chat model: {openai_chat_model} " + f"(base={openai_chat_url}, function_calling={openai_is_fc_model})" + ) - llm = OpenAI(model=openai_chat_model, api_base=openai_chat_url) + llm = OpenAILike( + model=openai_chat_model, + api_base=openai_chat_url, + api_key=openai_chat_key, + ) return llm diff --git a/services/rag/llamaindex/helpers/openai_compatible_llm.py b/services/rag/llamaindex/helpers/openai_compatible_llm.py new file mode 100644 index 0000000..a8778cf --- /dev/null +++ b/services/rag/llamaindex/helpers/openai_compatible_llm.py @@ -0,0 +1,35 @@ +""" +OpenAI-compatible LLM wrapper for LlamaIndex chat models. + +This wrapper is used as a fallback/replacement for strict OpenAI model validation paths. +It relies on LlamaIndex `OpenAILike`, which supports arbitrary model names for +OpenAI-compatible endpoints. +""" + +from llama_index.llms.openai_like import OpenAILike + + +class OpenAICompatibleLLM(OpenAILike): + """ + Thin wrapper over OpenAILike with chat-friendly defaults. + """ + + def __init__( + self, + model: str, + api_base: str, + api_key: str, + temperature: float = 0.1, + timeout: float = 120.0, + is_function_calling_model: bool = False, + ): + super().__init__( + model=model, + api_base=api_base, + api_key=api_key, + temperature=temperature, + timeout=timeout, + # Explicitly avoid "registered model only" assumptions. + is_chat_model=True, + is_function_calling_model=is_function_calling_model, + ) diff --git a/services/rag/llamaindex/requirements.txt b/services/rag/llamaindex/requirements.txt index 3196401..9b08e74 100644 --- a/services/rag/llamaindex/requirements.txt +++ b/services/rag/llamaindex/requirements.txt @@ -74,6 +74,7 @@ llama-index-indices-managed-llama-cloud==0.9.4 llama-index-instrumentation==0.4.2 llama-index-llms-ollama==0.9.1 llama-index-llms-openai==0.6.17 +llama-index-llms-openai-like==0.6.0 llama-index-readers-file==0.5.6 llama-index-readers-llama-parse==0.5.1 llama-index-vector-stores-qdrant==0.9.1