fix for openai compatibility model with needed parameters
This commit is contained in:
@@ -14,6 +14,9 @@ QDRANT_GRPC_PORT=6334
|
|||||||
# OpenAI Configuration (for reference - uncomment and configure when using OpenAI strategy)
|
# OpenAI Configuration (for reference - uncomment and configure when using OpenAI strategy)
|
||||||
# OPENAI_CHAT_URL=https://api.openai.com/v1
|
# OPENAI_CHAT_URL=https://api.openai.com/v1
|
||||||
# OPENAI_CHAT_KEY=your_openai_api_key_here
|
# OPENAI_CHAT_KEY=your_openai_api_key_here
|
||||||
|
# OPENAI_CHAT_TEMPERATURE=0.1
|
||||||
|
# OPENAI_CHAT_MAX_TOKENS=1024
|
||||||
|
# OPENAI_CHAT_REASONING_EFFORT=low
|
||||||
# OPENAI_CHAT_IS_FUNCTION_CALLING_MODEL=false
|
# OPENAI_CHAT_IS_FUNCTION_CALLING_MODEL=false
|
||||||
# OPENAI_EMBEDDING_MODEL=text-embedding-3-small
|
# OPENAI_EMBEDDING_MODEL=text-embedding-3-small
|
||||||
# OPENAI_EMBEDDING_BASE_URL=https://api.openai.com/v1
|
# OPENAI_EMBEDDING_BASE_URL=https://api.openai.com/v1
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ from typing import Any, Iterable, List
|
|||||||
|
|
||||||
from llama_index.core import PromptTemplate
|
from llama_index.core import PromptTemplate
|
||||||
from llama_index.core.agent import AgentWorkflow
|
from llama_index.core.agent import AgentWorkflow
|
||||||
|
from llama_index.core.base.llms.types import ChatMessage, MessageRole
|
||||||
from llama_index.core.retrievers import VectorIndexRetriever
|
from llama_index.core.retrievers import VectorIndexRetriever
|
||||||
from llama_index.core.schema import NodeWithScore
|
from llama_index.core.schema import NodeWithScore
|
||||||
from llama_index.core.tools import FunctionTool
|
from llama_index.core.tools import FunctionTool
|
||||||
@@ -230,6 +231,19 @@ def synthesize_answer(query: str, sources: list[dict[str, Any]], agent_draft: st
|
|||||||
context_json=context_json,
|
context_json=context_json,
|
||||||
)
|
)
|
||||||
logger.info("Synthesizing grounded answer from retrieved sources")
|
logger.info("Synthesizing grounded answer from retrieved sources")
|
||||||
|
# Prefer chat API for chat-capable models; fallback to completion if unavailable.
|
||||||
|
try:
|
||||||
|
if hasattr(llm, "chat"):
|
||||||
|
chat_response = llm.chat(
|
||||||
|
[
|
||||||
|
ChatMessage(role=MessageRole.SYSTEM, content="You answer with grounded citations only."),
|
||||||
|
ChatMessage(role=MessageRole.USER, content=prompt),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
return _normalize_text(getattr(chat_response, "message", chat_response).content)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"LLM chat synthesis failed, falling back to completion: {e}")
|
||||||
|
|
||||||
response = llm.complete(prompt)
|
response = llm.complete(prompt)
|
||||||
return _normalize_text(getattr(response, "text", response))
|
return _normalize_text(getattr(response, "text", response))
|
||||||
|
|
||||||
|
|||||||
@@ -93,12 +93,19 @@ def get_llm_model():
|
|||||||
return llm
|
return llm
|
||||||
|
|
||||||
elif strategy == "openai":
|
elif strategy == "openai":
|
||||||
from llama_index.llms.openai_like import OpenAILike
|
from helpers.openai_compatible_llm import OpenAICompatibleLLM
|
||||||
# from helpers.openai_compatible_llm import OpenAICompatibleLLM
|
|
||||||
|
|
||||||
openai_chat_url = os.getenv("OPENAI_CHAT_URL", "https://api.openai.com/v1")
|
openai_chat_url = os.getenv("OPENAI_CHAT_URL", "https://api.openai.com/v1")
|
||||||
openai_chat_key = os.getenv("OPENAI_CHAT_KEY", "dummy_key_for_template")
|
openai_chat_key = os.getenv("OPENAI_CHAT_KEY", "dummy_key_for_template")
|
||||||
openai_chat_model = os.getenv("OPENAI_CHAT_MODEL", "gpt-3.5-turbo")
|
openai_chat_model = os.getenv("OPENAI_CHAT_MODEL", "gpt-3.5-turbo")
|
||||||
|
openai_chat_temperature = float(os.getenv("OPENAI_CHAT_TEMPERATURE", "0.1"))
|
||||||
|
openai_chat_max_tokens_env = os.getenv("OPENAI_CHAT_MAX_TOKENS", "").strip()
|
||||||
|
openai_chat_max_tokens = (
|
||||||
|
int(openai_chat_max_tokens_env) if openai_chat_max_tokens_env else 1024
|
||||||
|
)
|
||||||
|
openai_reasoning_effort = (
|
||||||
|
os.getenv("OPENAI_CHAT_REASONING_EFFORT", "").strip() or None
|
||||||
|
)
|
||||||
openai_is_fc_model = (
|
openai_is_fc_model = (
|
||||||
os.getenv("OPENAI_CHAT_IS_FUNCTION_CALLING_MODEL", "false").lower()
|
os.getenv("OPENAI_CHAT_IS_FUNCTION_CALLING_MODEL", "false").lower()
|
||||||
== "true"
|
== "true"
|
||||||
@@ -109,13 +116,19 @@ def get_llm_model():
|
|||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Initializing OpenAI-compatible chat model: {openai_chat_model} "
|
f"Initializing OpenAI-compatible chat model: {openai_chat_model} "
|
||||||
f"(base={openai_chat_url}, function_calling={openai_is_fc_model})"
|
f"(base={openai_chat_url}, max_tokens={openai_chat_max_tokens}, "
|
||||||
|
f"reasoning_effort={openai_reasoning_effort}, function_calling={openai_is_fc_model})"
|
||||||
)
|
)
|
||||||
|
|
||||||
llm = OpenAILike(
|
llm = OpenAICompatibleLLM(
|
||||||
model=openai_chat_model,
|
model=openai_chat_model,
|
||||||
api_base=openai_chat_url,
|
api_base=openai_chat_url,
|
||||||
api_key=openai_chat_key,
|
api_key=openai_chat_key,
|
||||||
|
temperature=openai_chat_temperature,
|
||||||
|
max_tokens=openai_chat_max_tokens,
|
||||||
|
reasoning_effort=openai_reasoning_effort,
|
||||||
|
timeout=120.0,
|
||||||
|
is_function_calling_model=openai_is_fc_model,
|
||||||
)
|
)
|
||||||
|
|
||||||
return llm
|
return llm
|
||||||
|
|||||||
@@ -21,6 +21,8 @@ class OpenAICompatibleLLM(OpenAILike):
|
|||||||
api_key: str,
|
api_key: str,
|
||||||
temperature: float = 0.1,
|
temperature: float = 0.1,
|
||||||
timeout: float = 120.0,
|
timeout: float = 120.0,
|
||||||
|
max_tokens: int | None = None,
|
||||||
|
reasoning_effort: str | None = None,
|
||||||
is_function_calling_model: bool = False,
|
is_function_calling_model: bool = False,
|
||||||
):
|
):
|
||||||
super().__init__(
|
super().__init__(
|
||||||
@@ -29,7 +31,10 @@ class OpenAICompatibleLLM(OpenAILike):
|
|||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
temperature=temperature,
|
temperature=temperature,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
|
max_tokens=max_tokens,
|
||||||
|
reasoning_effort=reasoning_effort,
|
||||||
# Explicitly avoid "registered model only" assumptions.
|
# Explicitly avoid "registered model only" assumptions.
|
||||||
is_chat_model=True,
|
is_chat_model=True,
|
||||||
is_function_calling_model=is_function_calling_model,
|
is_function_calling_model=is_function_calling_model,
|
||||||
|
should_use_structured_outputs=False,
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user