using openailike for llamaindex that will allow of usage of any openai compatible models via api, and not just hardcoded models inside default OpenAI class in llamaindex

2026-03-09 09:59:27 +03:00
parent 6b3fa1cfaa
commit 969d25209c
6 changed files with 59 additions and 3 deletions
--- a/services/rag/.DS_Store
+++ b/services/rag/.DS_Store
--- a/services/rag/llamaindex/.env.dist
+++ b/services/rag/llamaindex/.env.dist
@@ -14,6 +14,7 @@ QDRANT_GRPC_PORT=6334
 # OpenAI Configuration (for reference - uncomment and configure when using OpenAI strategy)
 # OPENAI_CHAT_URL=https://api.openai.com/v1
 # OPENAI_CHAT_KEY=your_openai_api_key_here
 # OPENAI_CHAT_IS_FUNCTION_CALLING_MODEL=false
 # OPENAI_EMBEDDING_MODEL=text-embedding-3-small
 # OPENAI_EMBEDDING_BASE_URL=https://api.openai.com/v1
 # OPENAI_EMBEDDING_API_KEY=your_openai_api_key_here
--- a/services/rag/llamaindex/PLANNING.md
+++ b/services/rag/llamaindex/PLANNING.md
@@ -93,3 +93,10 @@ Chosen data folder: relatve ./../../../data - from the current folder
  - optionally filter low-information chunks (headers/footers)
 - [x] Add optional metadata-aware retrieval improvements (years/events/keywords) parity with LangChain approach (folder near current folder), if feasible in the chosen LlamaIndex primitives.
 - [x] Update `server.py` endpoint to use the new agent-like chat path (keep simple retrieval endpoint available as fallback or debug mode).
 # Phase 13 (make wrapper around OpenAI llamaindex class or install library to help use any openai compatible models)
 Inside config.py file object is being created for using as Chat Model. Unfortunately, it allows only "supported" models as value for "model" argument in constructor.
 - [x] Search online for library or plugin for llamainde, that fixes the OpenAI class behaviour or provides replacement, that will allow any models. If found, install it in project venv, update then requirements.txt and replace usage of OpenAI to new one in the code.
 - [x] Fallback option: create wrapper of OpenAI class, that will inherit and replace methods/features that check for "registered" models. Use the replacement then in the code.
--- a/services/rag/llamaindex/config.py
+++ b/services/rag/llamaindex/config.py
@@ -93,18 +93,30 @@ def get_llm_model():
        return llm
    elif strategy == "openai":
-        from llama_index.llms.openai import OpenAI
+        from llama_index.llms.openai_like import OpenAILike
        # from helpers.openai_compatible_llm import OpenAICompatibleLLM
        openai_chat_url = os.getenv("OPENAI_CHAT_URL", "https://api.openai.com/v1")
        openai_chat_key = os.getenv("OPENAI_CHAT_KEY", "dummy_key_for_template")
        openai_chat_model = os.getenv("OPENAI_CHAT_MODEL", "gpt-3.5-turbo")
        openai_is_fc_model = (
            os.getenv("OPENAI_CHAT_IS_FUNCTION_CALLING_MODEL", "false").lower()
            == "true"
        )
        # Set the API key in environment for OpenAI
        os.environ["OPENAI_API_KEY"] = openai_chat_key
-        logger.info(f"Initializing OpenAI chat model: {openai_chat_model}")
+        logger.info(
            f"Initializing OpenAI-compatible chat model: {openai_chat_model} "
            f"(base={openai_chat_url}, function_calling={openai_is_fc_model})"
        )
-        llm = OpenAI(model=openai_chat_model, api_base=openai_chat_url)
+        llm = OpenAILike(
            model=openai_chat_model,
            api_base=openai_chat_url,
            api_key=openai_chat_key,
        )
        return llm
--- a/services/rag/llamaindex/helpers/openai_compatible_llm.py
+++ b/services/rag/llamaindex/helpers/openai_compatible_llm.py
@@ -0,0 +1,35 @@
 """
 OpenAI-compatible LLM wrapper for LlamaIndex chat models.
 This wrapper is used as a fallback/replacement for strict OpenAI model validation paths.
 It relies on LlamaIndex `OpenAILike`, which supports arbitrary model names for
 OpenAI-compatible endpoints.
 """
 from llama_index.llms.openai_like import OpenAILike
 class OpenAICompatibleLLM(OpenAILike):
    """
    Thin wrapper over OpenAILike with chat-friendly defaults.
    """
    def __init__(
        self,
        model: str,
        api_base: str,
        api_key: str,
        temperature: float = 0.1,
        timeout: float = 120.0,
        is_function_calling_model: bool = False,
    ):
        super().__init__(
            model=model,
            api_base=api_base,
            api_key=api_key,
            temperature=temperature,
            timeout=timeout,
            # Explicitly avoid "registered model only" assumptions.
            is_chat_model=True,
            is_function_calling_model=is_function_calling_model,
        )
--- a/services/rag/llamaindex/requirements.txt
+++ b/services/rag/llamaindex/requirements.txt
@@ -74,6 +74,7 @@ llama-index-indices-managed-llama-cloud==0.9.4
 llama-index-instrumentation==0.4.2
 llama-index-llms-ollama==0.9.1
 llama-index-llms-openai==0.6.17
 llama-index-llms-openai-like==0.6.0
 llama-index-readers-file==0.5.6
 llama-index-readers-llama-parse==0.5.1
 llama-index-vector-stores-qdrant==0.9.1