add healthcheck

2025-03-15 09:50:20 +08:00 · 2024-11-25 10:19:09 +00:00 · 2024-11-25 10:19:09 +00:00 · 73b3967618
commit 73b3967618
parent a35167e533
5 changed files with 52 additions and 100 deletions
--- a/1
+++ b/1
@ -27,4 +27,5 @@ COPY pyproject.toml poetry.lock ./
 RUN poetry install
 COPY . .
 HEALTHCHECK --interval=60s --timeout=30s --start-period=5s --retries=3 CMD [ "curl", "http://localhost:8191/health" ]
 CMD ["./cmd.sh"]
--- a/fix_nodriver.py
+++ b/fix_nodriver.py
@ -1,65 +0,0 @@
 # https://github.com/ultrafunkamsterdam/undetected-chromedriver/issues/1954
 # Fix for nodriver in .venv/lib/python3.11/site-packages/nodriver/core/browser.py
 from __future__ import annotations
 import logging
 import os
 from pathlib import Path
 from platform import python_version
 env_path = os.getenv("VIRTUAL_ENV")
 if env_path is None:
    env_path = Path(os.__file__).parent.parent.parent.as_posix()
 python_version = python_version().split(".")[0:2]
 nodriver_path = Path(env_path + f"/lib/python{'.'.join(python_version)}/site-packages/nodriver/cdp/network.py")
 if not nodriver_path.exists():
    msg = f"{nodriver_path} not found"
    raise FileNotFoundError(msg)
 new_cookie_partition_key = """\
        if isinstance(json, str):
            return cls(top_level_site=json, has_cross_site_ancestor=False)
        elif isinstance(json, dict):
            return cls(
                top_level_site=str(json["topLevelSite"]),
                has_cross_site_ancestor=bool(json["hasCrossSiteAncestor"]),
            )
 """
 logger = logging.getLogger(__name__)
 handler = logging.StreamHandler()
 logger.addHandler(handler)
 logger.setLevel(logging.INFO)
 logger.info(f"Fixing nodriver in {nodriver_path}")
 # delete CookiePartitionKey declaration
 with nodriver_path.open("r+") as f:
    lines = f.readlines()
    found_def = False
    found_body = False
    i = -1
    while i < len(lines):
        i += 1
        line = lines[i]
        strip_line = line.strip("\n")
        if not found_def and line.startswith("class CookiePartitionKey:"):
            logger.info(f"Found line {i}: {strip_line}")
            found_def = True
            continue
        if found_def:
            if line.startswith("    def from_json"):
                logger.info(f"Found line {i}: {strip_line}")
                found_body = True
                continue
            if found_body:
                if line.startswith(("\t\t", "        ")):
                    logger.info(f"Removing line {i}: {strip_line}")
                    lines.pop(i)
                    i -= 1
                    continue
                else:
                    lines = lines[:i] + [new_cookie_partition_key] + lines[i:]
                    break
 with nodriver_path.open("w") as f:
    f.writelines(lines)
--- a/main.py
+++ b/main.py
@ -30,9 +30,16 @@ def read_root():
 async def health_check():
    """Health check endpoint."""
    logger.info("Health check")
-    # browser: Chrome = await new_browser()
+
-    # browser.get("https://google.com")
+    health_check_request = read_item(
-    # browser.stop()
+        LinkRequest.model_construct(url="https://prowlarr.servarr.com/v1/ping")
    )
    if health_check_request.solution.status != 200:
        raise HTTPException(
            status_code=500,
            detail="Health check failed",
        )
    return {"status": "ok"}
@ -45,25 +52,29 @@ def read_item(request: LinkRequest):
    response: LinkResponse
    # start_time = int(time.time() * 1000)
    try:
        with SB(uc=True, locale_code="en", test=False, xvfb=True, ad_block=True) as sb:
            sb: BaseCase
            sb.uc_open_with_reconnect(request.url)
            source = sb.get_page_source()
            source_bs = BeautifulSoup(source, "html.parser")
            title_tag = source_bs.title
            logger.info(f"Got webpage: {request.url}")
            if title_tag in src.utils.consts.CHALLENGE_TITLES:
                logger.info("Challenge detected")
                sb.uc_gui_click_captcha()
-        logger.info(f"Got webpage: {request.url}")
+                logger.info("Clicked captcha")
            sb.save_screenshot("screenshot.png")
        logger.info(f"Got webpage: {request.url}")
            source = sb.get_page_source()
            source_bs = BeautifulSoup(source, "html.parser")
            title_tag = source_bs.title
        if title_tag is None:
            raise HTTPException(status_code=500, detail="Title tag not found")
-        if title_tag.string in src.utils.consts.CHALLENGE_TITLES:
+            if title_tag and title_tag.string in src.utils.consts.CHALLENGE_TITLES:
-            raise HTTPException(status_code=500, detail="Could not bypass challenge")
+                raise HTTPException(
                    status_code=500, detail="Could not bypass challenge"
                )
        title = title_tag.string
        logger.info(f"Title: {title}")
            response = LinkResponse(
                message="Success",
                solution=Solution(
@ -76,6 +87,9 @@ def read_item(request: LinkRequest):
                ),
                startTimestamp=start_time,
            )
    except Exception as e:
        logger.error(f"Error: {e}")
        raise HTTPException(status_code=500, detail="Unknown error, check logs") from e
    return response
--- a/src/models/requests.py
+++ b/src/models/requests.py
@ -4,13 +4,13 @@ import time
 from http import HTTPStatus
 from typing import Any
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 class LinkRequest(BaseModel):
-    cmd: str
+    cmd: str = "get"
    url: str
-    maxTimeout: int  # noqa: N815 # Ignore to preserve compatibility
+    max_timeout: int = Field(30, alias="maxTimeout")
 class ProtectionTriggeredError(Exception):
--- a/tests/main_test.py
+++ b/tests/main_test.py
@ -39,7 +39,9 @@ def test_bypass(website: str):
    response = client.post(
        "/v1",
-        json=LinkRequest(url=website, maxTimeout=30, cmd="request.get").model_dump(),
+        json=LinkRequest.model_construct(
            url=website, max_timeout=30, cmd="request.get"
        ).model_dump(),
    )
    assert response.status_code == HTTPStatus.OK