u

2025-11-07 04:03:57 +11:00
parent 40ff3756a5
commit e05c15db16
6 changed files with 557 additions and 406 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -10,4 +10,5 @@ wheels/
 .venv
 input/
 output/
-config.ini
+config.ini
+test.py
--- a/llm.py
+++ b/llm.py
@@ -21,3 +21,8 @@ def get_model_name() -> str:
    config = configparser.ConfigParser()
    config.read("config.ini")
    return config.get("llm", "MODEL_NAME", fallback="gemini-2.5-flash")
+
+def get_temperature() -> float:
+    config = configparser.ConfigParser()
+    config.read("config.ini")
+    return float(config.get("llm", "TEMPERATURE", fallback=0.7))
--- a/main.py
+++ b/main.py
@@ -29,7 +29,11 @@ def main():
        with open(pdf_path, "rb") as pdf_file:
            pdf_content = pdf_file.read()
            md, images = convert_pdf_to_markdown(pdf_content)
-            md = refine_content(md, images, pdf_content)
+            try:
+                md = refine_content(md, images, pdf_content)
+            except BaseException:
+                continue
+
            save_md_images(current_output_dir, md, images)


--- a/pdf_convertor.py
+++ b/pdf_convertor.py
@@ -115,26 +115,14 @@ def refine_content(md: str, images: dict[str, bytes], pdf: bytes) -> str:
    set_gemini_api_key()

    try:
-        llm = ChatGoogleGenerativeAI(model=get_model_name(), temperature=0)
+        llm = ChatGoogleGenerativeAI(model=get_model_name(), temperature=0.7)
    except Exception as e:
        raise BaseException(
            f"Error initializing LLM. Make sure your Google API key is set correctly. Error: {e}"
        )

-    prompt = """
-    You are a professional technical document editor. Your task is to polish a Markdown text automatically converted from an accompanying PDF document. Please use the original PDF as the source of truth for layout, images, and context.
-
-    Please perform the following operations based on the provided Markdown and PDF:
-
-    1.  **Clean up extraneous characters**: Review the Markdown text and remove any conversion artifacts or strange formatting that do not exist in the original PDF.
-    2.  **Explain image content**: Refer to charts, diagrams, and images in the PDF, and add descriptions after image citations so that complete information can be obtained through text descriptions even without the images.
-    3.  **Correct list formatting**: The conversion may have flattened nested lists. Analyze the list structure in the PDF and restore the correct multi-level indentation in the Markdown.
-    4.  **Correct mathematical formulas and symbols**: Convert plain text formulas into proper formula notation, for example, `Kmin` should be `$K_{min}`, and `E = hc/λ` should be `$E = \\frac{hc}{\\lambda}`.
-    5.  **Adjust headings**: Rename headings that have the same name according to the different content within the subsections to avoid duplicate same-name headings and ensure the outline is clear.
-    6.  **Translate**: Translate the content into Simplified Chinese. Proper nouns should retain their original expression during translation, for example, `Magnetic resonance imaging` should be translated as `磁共振成像(Magnetic resonance imaging, MRI)`. If the term appears multiple times, the original expression should be appended each time.
-
-    Only output the adjusted Markdown text, without any other text content.
-    """
+    with open("pdf_convertor_prompt.md", "r") as f:
+        prompt = f.read()

    human_message_parts = [
        {
@@ -187,4 +175,19 @@ def refine_content(md: str, images: dict[str, bytes], pdf: bytes) -> str:
    except Exception as e:
        raise BaseException(f"An error occurred while invoking the LLM: {e}")

-    return str(refined_content)
+    if str(refined_content) == "":
+        raise BaseException("Response of Gemini is empty")
+
+    return fix_output(str(refined_content))
+
+
+def fix_output(md: str) -> str:
+    if not md.startswith("['") or not md.endswith("']"):
+        return md
+    md = md.removeprefix("['")
+    md = md.removesuffix("']")
+    md = md.replace("\\n", "\n")
+    md = md.replace("\", '", "\n\n")
+    md = md.replace("', \"", "\n\n")
+    md = md.replace("', '", "\n\n")
+    return md
--- a/pdf_convertor_prompt.md
+++ b/pdf_convertor_prompt.md
@@ -0,0 +1,69 @@
+You are a professional technical document editor. Your task is to polish a Markdown text that has been automatically converted from a corresponding PDF document. Please use the original PDF as the sole reference for layout, images, and context.
+
+Please perform the following operations based on the provided Markdown and PDF:
+
+1.  **Clean up extraneous characters**: Check the Markdown text and delete any conversion artifacts or strange formatting that does not exist in the original PDF.
+2.  **Explain image content**: Refer to the charts, diagrams, and images in the PDF, and add a description after the image reference so that the full information can be obtained from the text description even without the image.
+
+    - Add a blank line after the image reference to control line breaks.
+
+      For example
+
+      ```
+      ![Brief description of the image](./image.png)
+
+      A detailed explanation of the image, detailed enough to replace the image and help the reader understand the content.
+      ```
+
+3.  **Correct list formatting**: The conversion process may flatten nested lists. Please analyze the list structure in the PDF and restore the correct multi-level indentation in Markdown.
+4.  **Correct mathematical formulas and symbols**: Convert plain text formulas into correct formula notation, for example, `Kmin` should be `$K_{min}$`, and `E = hc/λ` should be `$E = \\frac{hc}{\\lambda}`.
+5.  **Adjust headings**: Based on the different content within sub-chapters, rename headings with the same name to avoid duplicate headings and ensure a clear outline.
+6.  **Clean up redundant headings**: If there is no content between adjacent headings of the same level, the headings should be adjusted to conform to standards.
+
+    - For example, the following is improper formatting, where there is no content between multiple headings of the same level
+
+      ```
+      ## Convolutional Neural Networks: Weight Sharing with Multiple Filters
+
+      ## Weight sharing
+
+      Multiple filters can be applied to detect the spatial distributions of multiple visual patterns.
+
+      ![Image](images/66.png)
+
+      This diagram consists of two parts. The left part illustrates how multiple filters (represented by connections of different colors) are applied to an input image, with each filter detecting a different pattern. The right part shows how a single filter (hidden unit / filter response) is convolved over the input to produce a feature map.
+
+      ## Convolutional Neural Networks: Weight Sharing and Translation Invariance
+
+      ## Weight sharing
+
+      ## Translation invariance:
+
+      *   Captures statistics in local patches, and they are independent of location.
+      ```
+
+      It can be changed to
+
+      ```
+      ## Convolutional Neural Networks
+
+      ### Weight Sharing with Multiple Filters
+
+      Multiple filters can be applied to detect the spatial distributions of multiple visual patterns.
+
+      ![Image](images/66.png)
+
+      This diagram consists of two parts. The left part illustrates how multiple filters (represented by connections of different colors) are applied to an input image, with each filter detecting a different pattern. The right part shows how a single filter (hidden unit / filter response) is convolved over the input to produce a feature map.
+
+      ## Convolutional Neural Networks
+
+      ### Weight Sharing and Translation Invariance
+
+      #### Translation invariance:
+
+      *   Captures statistics in local patches, and they are independent of location.
+      ```
+
+7.  **Translate**: Translate the content into Simplified Chinese. When translating, proper nouns should retain their original expression, for example, `Magnetic resonance imaging` should be translated as `磁共振成像(Magnetic resonance imaging, MRI)`. If the term appears multiple times, the original expression should be included each time.
+
+Only output the adjusted Markdown text, without any other text content. Do not output in JSON format, and do not add ` ``` ` or ` ```markdown ` at the beginning or end of the Markdown.
--- a/uv.lock
+++ b/uv.lock