import os from pdf_convertor import ( convert_pdf_to_markdown, save_md_images, refine_content, ) from pathlib import Path def main(): input_dir = Path("input") output_dir = Path("output") output_dir.mkdir(parents=True, exist_ok=True) for filename in os.listdir(input_dir): if not filename.endswith(".pdf"): continue pdf_path = input_dir.joinpath(filename) current_output_dir = output_dir.joinpath( pdf_path.name.removesuffix(pdf_path.suffix) ) current_output_dir.mkdir(parents=True, exist_ok=True) print(f"Processing {pdf_path} -> {current_output_dir}") with open(pdf_path, "rb") as pdf_file: pdf_content = pdf_file.read() md, images = convert_pdf_to_markdown(pdf_content) md = refine_content(md, images, pdf_content) save_md_images(current_output_dir, md, images) if __name__ == "__main__": main()