feat: Introduce OpenAI LLM provider and update API key handling

This commit integrates OpenAI as a new Large Language Model (LLM) provider, expanding the available options for content refinement. Key changes include: - Added `set_openai_api_key` to handle OpenAI API key retrieval from `config.ini` or environment variables. - Modified `set_api_key` to dynamically read the LLM provider from `config.ini`
2025-11-12 02:51:18 +11:00
parent ae7c579580
commit 1a867844ce
6 changed files with 446 additions and 77 deletions
--- a/refine.py
+++ b/refine.py
@@ -1,23 +1,43 @@
-import argparse  # New import
+import argparse
 from pdf_convertor import load_md_file, save_md_images, refine_content
 from pathlib import Path

 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Refine Markdown content from PDF.")
-    parser.add_argument(
-        "--md-path", type=str, required=True, help="Path to the input Markdown file."
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument("--md-path", type=str, help="Path to the input Markdown file.")
+    group.add_argument(
+        "--all",
+        action="store_true",
+        help="Process all markdown files in the output directory.",
    )
    args = parser.parse_args()

-    md_path = args.md_path
-    pdf_path = Path("input").joinpath(Path(args.md_path).parent.name + ".pdf")
+    if args.all:
+        output_dir = Path("output")
+        for md_file_path in output_dir.glob("*/index.md"):
+            md_path = md_file_path
+            pdf_path = Path("input").joinpath(md_file_path.parent.name + ".pdf")

-    output = Path(md_path).parent
-    output.mkdir(parents=True, exist_ok=True)
+            output = md_file_path.parent
+            output.mkdir(parents=True, exist_ok=True)

-    md, images = load_md_file(md_path)
-    with open(pdf_path, "rb") as pdf_file:
-        pdf = pdf_file.read()
-    md = refine_content(md, images, pdf)
+            md, images = load_md_file(md_path)
+            with open(pdf_path, "rb") as pdf_file:
+                pdf = pdf_file.read()
+            md = refine_content(md, images, pdf)

-    save_md_images(output, md, images, md_name="index_refined.md")
+            save_md_images(output, md, images, md_name="index_refined.md")
+    else:
+        md_path = Path(args.md_path)
+        pdf_path = Path("input").joinpath(md_path.parent.name + ".pdf")
+
+        output = md_path.parent
+        output.mkdir(parents=True, exist_ok=True)
+
+        md, images = load_md_file(md_path)
+        with open(pdf_path, "rb") as pdf_file:
+            pdf = pdf_file.read()
+        md = refine_content(md, images, pdf)
+
+        save_md_images(output, md, images, md_name="index_refined.md")