slide-translate/convert.py

import argparse
import os
from pdf_convertor import save_md_images, convert_pdf_to_markdown
from pathlib import Path

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Convert PDF to Markdown.")
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument("--pdf-path", type=str, help="Path to the input PDF file.")
    group.add_argument("--all", action="store_true", help="Process all pdf.")
    args = parser.parse_args()

    if args.all:
        input_dir = Path("input")
        for filename in os.listdir(input_dir):
            if not filename.endswith(".pdf"):
                continue
            pdf_path = input_dir.joinpath(filename)
            with open(pdf_path, "rb") as pdf_file:
                pdf = pdf_file.read()
            md, images = convert_pdf_to_markdown(pdf)
            output = Path("output").joinpath(Path(pdf_path).name.removesuffix(".pdf"))
            save_md_images(output, md, images)
    else:
        pdf_path = Path(args.pdf_path)
        with open(pdf_path, "rb") as pdf_file:
            pdf = pdf_file.read()
        md, images = convert_pdf_to_markdown(pdf)
        output = Path("output").joinpath(pdf_path.name.removesuffix(".pdf"))
        save_md_images(output, md, images)