import argparse import os from pdf_convertor import save_md_images, convert_pdf_to_markdown from pathlib import Path if __name__ == "__main__": parser = argparse.ArgumentParser(description="Convert PDF to Markdown.") group = parser.add_mutually_exclusive_group(required=True) group.add_argument("--pdf-path", type=str, help="Path to the input PDF file.") group.add_argument("--all", action="store_true", help="Process all pdf.") args = parser.parse_args() if args.all: input_dir = Path("input") for filename in os.listdir(input_dir): if not filename.endswith(".pdf"): continue pdf_path = input_dir.joinpath(filename) with open(pdf_path, "rb") as pdf_file: pdf = pdf_file.read() md, images = convert_pdf_to_markdown(pdf) output = Path("output").joinpath(Path(pdf_path).name.removesuffix(".pdf")) save_md_images(output, md, images) else: pdf_path = Path(args.pdf_path) with open(pdf_path, "rb") as pdf_file: pdf = pdf_file.read() md, images = convert_pdf_to_markdown(pdf) output = Path("output").joinpath(pdf_path.name.removesuffix(".pdf")) save_md_images(output, md, images)