feat: Introduce OpenAI LLM provider and update API key handling

This commit integrates OpenAI as a new Large Language Model (LLM) provider, expanding the available options for content refinement. Key changes include: - Added `set_openai_api_key` to handle OpenAI API key retrieval from `config.ini` or environment variables. - Modified `set_api_key` to dynamically read the LLM provider from `config.ini`
2025-11-12 02:51:18 +11:00
parent ae7c579580
commit 1a867844ce
6 changed files with 446 additions and 77 deletions
--- a/convert.py
+++ b/convert.py
@@ -0,0 +1,30 @@
+import argparse
+import os
+from pdf_convertor import save_md_images, convert_pdf_to_markdown
+from pathlib import Path
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Convert PDF to Markdown.")
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument("--pdf-path", type=str, help="Path to the input PDF file.")
+    group.add_argument("--all", action="store_true", help="Process all pdf.")
+    args = parser.parse_args()
+
+    if args.all:
+        input_dir = Path("input")
+        for filename in os.listdir(input_dir):
+            if not filename.endswith(".pdf"):
+                continue
+            pdf_path = input_dir.joinpath(filename)
+            with open(pdf_path, "rb") as pdf_file:
+                pdf = pdf_file.read()
+            md, images = convert_pdf_to_markdown(pdf)
+            output = Path("output").joinpath(Path(pdf_path).name.removesuffix(".pdf"))
+            save_md_images(output, md, images)
+    else:
+        pdf_path = Path(args.pdf_path)
+        with open(pdf_path, "rb") as pdf_file:
+            pdf = pdf_file.read()
+        md, images = convert_pdf_to_markdown(pdf)
+        output = Path("output").joinpath(pdf_path.name.removesuffix(".pdf"))
+        save_md_images(output, md, images)