feat: Introduce OpenAI LLM provider and update API key handling
This commit integrates OpenAI as a new Large Language Model (LLM) provider, expanding the available options for content refinement. Key changes include: - Added `set_openai_api_key` to handle OpenAI API key retrieval from `config.ini` or environment variables. - Modified `set_api_key` to dynamically read the LLM provider from `config.ini`
This commit is contained in:
44
refine.py
44
refine.py
@@ -1,23 +1,43 @@
|
||||
import argparse # New import
|
||||
import argparse
|
||||
from pdf_convertor import load_md_file, save_md_images, refine_content
|
||||
from pathlib import Path
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Refine Markdown content from PDF.")
|
||||
parser.add_argument(
|
||||
"--md-path", type=str, required=True, help="Path to the input Markdown file."
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument("--md-path", type=str, help="Path to the input Markdown file.")
|
||||
group.add_argument(
|
||||
"--all",
|
||||
action="store_true",
|
||||
help="Process all markdown files in the output directory.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
md_path = args.md_path
|
||||
pdf_path = Path("input").joinpath(Path(args.md_path).parent.name + ".pdf")
|
||||
if args.all:
|
||||
output_dir = Path("output")
|
||||
for md_file_path in output_dir.glob("*/index.md"):
|
||||
md_path = md_file_path
|
||||
pdf_path = Path("input").joinpath(md_file_path.parent.name + ".pdf")
|
||||
|
||||
output = Path(md_path).parent
|
||||
output.mkdir(parents=True, exist_ok=True)
|
||||
output = md_file_path.parent
|
||||
output.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
md, images = load_md_file(md_path)
|
||||
with open(pdf_path, "rb") as pdf_file:
|
||||
pdf = pdf_file.read()
|
||||
md = refine_content(md, images, pdf)
|
||||
md, images = load_md_file(md_path)
|
||||
with open(pdf_path, "rb") as pdf_file:
|
||||
pdf = pdf_file.read()
|
||||
md = refine_content(md, images, pdf)
|
||||
|
||||
save_md_images(output, md, images, md_name="index_refined.md")
|
||||
save_md_images(output, md, images, md_name="index_refined.md")
|
||||
else:
|
||||
md_path = Path(args.md_path)
|
||||
pdf_path = Path("input").joinpath(md_path.parent.name + ".pdf")
|
||||
|
||||
output = md_path.parent
|
||||
output.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
md, images = load_md_file(md_path)
|
||||
with open(pdf_path, "rb") as pdf_file:
|
||||
pdf = pdf_file.read()
|
||||
md = refine_content(md, images, pdf)
|
||||
|
||||
save_md_images(output, md, images, md_name="index_refined.md")
|
||||
|
||||
Reference in New Issue
Block a user