Files
slide-translate/refine.py
nite 1a867844ce feat: Introduce OpenAI LLM provider and update API key handling
This commit integrates OpenAI as a new Large Language Model (LLM) provider,
expanding the available options for content refinement.

Key changes include:
- Added `set_openai_api_key` to handle OpenAI API key retrieval from
  `config.ini` or environment variables.
- Modified `set_api_key` to dynamically read the LLM provider from `config.ini`
2025-11-12 02:51:18 +11:00

44 lines
1.5 KiB
Python

import argparse
from pdf_convertor import load_md_file, save_md_images, refine_content
from pathlib import Path
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Refine Markdown content from PDF.")
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument("--md-path", type=str, help="Path to the input Markdown file.")
group.add_argument(
"--all",
action="store_true",
help="Process all markdown files in the output directory.",
)
args = parser.parse_args()
if args.all:
output_dir = Path("output")
for md_file_path in output_dir.glob("*/index.md"):
md_path = md_file_path
pdf_path = Path("input").joinpath(md_file_path.parent.name + ".pdf")
output = md_file_path.parent
output.mkdir(parents=True, exist_ok=True)
md, images = load_md_file(md_path)
with open(pdf_path, "rb") as pdf_file:
pdf = pdf_file.read()
md = refine_content(md, images, pdf)
save_md_images(output, md, images, md_name="index_refined.md")
else:
md_path = Path(args.md_path)
pdf_path = Path("input").joinpath(md_path.parent.name + ".pdf")
output = md_path.parent
output.mkdir(parents=True, exist_ok=True)
md, images = load_md_file(md_path)
with open(pdf_path, "rb") as pdf_file:
pdf = pdf_file.read()
md = refine_content(md, images, pdf)
save_md_images(output, md, images, md_name="index_refined.md")