This commit integrates OpenAI as a new Large Language Model (LLM) provider, expanding the available options for content refinement. Key changes include: - Added `set_openai_api_key` to handle OpenAI API key retrieval from `config.ini` or environment variables. - Modified `set_api_key` to dynamically read the LLM provider from `config.ini`
44 lines
1.5 KiB
Python
44 lines
1.5 KiB
Python
import argparse
|
|
from pdf_convertor import load_md_file, save_md_images, refine_content
|
|
from pathlib import Path
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Refine Markdown content from PDF.")
|
|
group = parser.add_mutually_exclusive_group(required=True)
|
|
group.add_argument("--md-path", type=str, help="Path to the input Markdown file.")
|
|
group.add_argument(
|
|
"--all",
|
|
action="store_true",
|
|
help="Process all markdown files in the output directory.",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
if args.all:
|
|
output_dir = Path("output")
|
|
for md_file_path in output_dir.glob("*/index.md"):
|
|
md_path = md_file_path
|
|
pdf_path = Path("input").joinpath(md_file_path.parent.name + ".pdf")
|
|
|
|
output = md_file_path.parent
|
|
output.mkdir(parents=True, exist_ok=True)
|
|
|
|
md, images = load_md_file(md_path)
|
|
with open(pdf_path, "rb") as pdf_file:
|
|
pdf = pdf_file.read()
|
|
md = refine_content(md, images, pdf)
|
|
|
|
save_md_images(output, md, images, md_name="index_refined.md")
|
|
else:
|
|
md_path = Path(args.md_path)
|
|
pdf_path = Path("input").joinpath(md_path.parent.name + ".pdf")
|
|
|
|
output = md_path.parent
|
|
output.mkdir(parents=True, exist_ok=True)
|
|
|
|
md, images = load_md_file(md_path)
|
|
with open(pdf_path, "rb") as pdf_file:
|
|
pdf = pdf_file.read()
|
|
md = refine_content(md, images, pdf)
|
|
|
|
save_md_images(output, md, images, md_name="index_refined.md")
|