feat: Introduce OpenAI LLM provider and update API key handling

This commit integrates OpenAI as a new Large Language Model (LLM) provider,
expanding the available options for content refinement.

Key changes include:
- Added `set_openai_api_key` to handle OpenAI API key retrieval from
  `config.ini` or environment variables.
- Modified `set_api_key` to dynamically read the LLM provider from `config.ini`
This commit is contained in:
2025-11-12 02:51:18 +11:00
parent ae7c579580
commit 1a867844ce
6 changed files with 446 additions and 77 deletions

View File

@@ -13,6 +13,7 @@ from docling_core.types.doc.base import ImageRefMode
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_ollama import ChatOllama
from langchain_openai import ChatOpenAI
from llm import set_api_key, get_model_name, get_temperature
from io import BytesIO
from pathlib import Path
@@ -120,7 +121,7 @@ def refine_content(md: str, images: dict[str, bytes], pdf: bytes) -> str:
config.read("config.ini")
provider = config.get("llm", "PROVIDER", fallback="gemini")
set_api_key(provider)
set_api_key()
try:
if provider == "gemini":
@@ -135,6 +136,11 @@ def refine_content(md: str, images: dict[str, bytes], pdf: bytes) -> str:
num_ctx=256000,
num_predict=-1,
)
elif provider == "openai":
llm = ChatOpenAI(
model=get_model_name(),
temperature=get_temperature(),
)
else:
raise ValueError(f"Unsupported LLM provider: {provider}")
except Exception as e:
@@ -147,21 +153,12 @@ def refine_content(md: str, images: dict[str, bytes], pdf: bytes) -> str:
# 添加 Markdown
human_message_parts = []
if provider == "gemini":
human_message_parts.append(
{
"type": "media",
"mime_type": "text/markdown",
"data": base64.b64encode(md.encode("UTF-8")).decode("utf-8"),
}
)
elif provider == "ollama":
human_message_parts.append(
{
"type": "text",
"text": md,
}
)
human_message_parts.append(
{
"type": "text",
"text": md,
}
)
# 添加图片
for image_name in images.keys():
@@ -171,21 +168,14 @@ def refine_content(md: str, images: dict[str, bytes], pdf: bytes) -> str:
"text": f"This is image: '{image_name}':\n",
}
)
if provider == "gemini":
human_message_parts.append(
{
"type": "media",
"mime_type": "image/png",
"data": base64.b64encode(images[image_name]).decode("utf-8"),
}
)
if provider == "ollama":
human_message_parts.append(
{
"type": "image_url",
"image_url": f"data:image/png;base64,{base64.b64encode(images[image_name]).decode('utf-8')}",
}
)
human_message_parts.append(
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{base64.b64encode(images[image_name]).decode('utf-8')}"
},
}
)
# 添加 PDF
if provider == "gemini":
@@ -202,6 +192,18 @@ def refine_content(md: str, images: dict[str, bytes], pdf: bytes) -> str:
},
]
)
if provider == "openai":
human_message_parts.extend(
[
{
"type": "file",
"file": {
"filename": "origin.pdf",
"file_data": f"data:application/pdf;base64,{base64.b64encode(pdf).decode('utf-8')}",
},
},
]
)
if provider == "ollama":
doc = fitz.open(stream=pdf, filetype="pdf")
for page_num in range(doc.page_count):