Byparr/src/utils/browser.py

import asyncio

import nodriver as webdriver
from nodriver.core.element import Element

from src.utils import logger
from src.utils.consts import CHALLENGE_TITLES
from src.utils.extentions import download_extentions

downloaded_extentions = download_extentions()


async def new_browser():
    """
    Create a new browser instance with the specified configuration.

    Returns
    -------
        A coroutine that resolves to the newly created browser instance.

    Raises
    ------
        Any exceptions that may occur during the creation of the browser instance.

    """
    config: webdriver.Config = webdriver.Config()
    config.sandbox = False
    config.add_argument(f"--load-extension={','.join(downloaded_extentions)}")

    return await webdriver.start(config=config)


async def bypass_cloudflare(page: webdriver.Tab):
    """
    Asynchronously bypasses Cloudflare challenges on the given web page.

    Args:
    ----
        page (webdriver.Tab): The web page to bypass Cloudflare challenges on.

    Returns:
    -------
        bool: True if the page was successfully bypassed, False otherwise.

    Raises:
    ------
        Exception: If the element containing the Cloudflare challenge could not be found.

    Notes:
    -----
        This function repeatedly checks the title of the page until it is not in the
        list of known Cloudflare challenge titles. Once a challenge is found, it attempts
        to locate the element containing the challenge and click it. If the element cannot
        be found within a certain time limit, the function will retry. If the element is
        found, it will be clicked. If the element cannot be found at all, an exception will
        be raised.

    """
    challenged = False
    while True:
        await page
        logger.debug(f"Current page: {page.target.title}")
        if page.target.title not in CHALLENGE_TITLES:
            return challenged
        if not challenged:
            logger.info("Found challenge")
            challenged = True
        try:
            elem = await page.find(
                "Verify you are human by completing the action below.",
                timeout=3,
            )
        # If challenge solves by itself
        except asyncio.TimeoutError:
            if page.target.title not in CHALLENGE_TITLES:
                return challenged

        if elem is None:
            logger.debug("Couldn't find the title, trying other method...")
            continue

        if not isinstance(elem, Element):
            logger.fatal("Element is a string, please report this to Byparr dev")
            raise InvalidElementError
        elem = elem.parent
        # Get the element containing the shadow root
        for _ in range(3):
            if elem is not None:
                elem = get_first_div(elem)
            else:
                raise InvalidElementError

        if isinstance(elem, Element) and elem.shadow_roots:
            inner_elem = Element(elem.shadow_roots[0], page, elem.tree).children[0]
            if isinstance(inner_elem, Element):
                logger.debug("Clicking element")
                await inner_elem.mouse_click()
            else:
                logger.warn(
                    "Element is a string, please report this to Byparr dev"
                )  # I really hope this never happens
        else:
            logger.warn("Coulnd't find checkbox, trying again...")


def get_first_div(elem):
    """
    Retrieve the first div element from the given element's children.

    Args:
    ----
        elem: The parent element to search for a div child.

    Returns:
    -------
        The first div element found, or the original element if no div is found.

    """
    for child in elem.children:
        if child.tag_name == "div":
            return child
    raise InvalidElementError


class InvalidElementError(Exception):
    pass
remove screenshots, improve logging and add timeout for button 2024-07-25 00:39:29 +00:00			`import asyncio`
added screenshots and debug env 2024-07-24 20:38:23 +00:00
first commit 2024-07-24 13:57:40 +00:00			`import nodriver as webdriver`
			`from nodriver.core.element import Element`

			`from src.utils import logger`
use const for logging level 2024-07-25 00:06:56 +00:00			`from src.utils.consts import CHALLENGE_TITLES`
refactor + docstrings 2024-07-25 21:00:31 +00:00			`from src.utils.extentions import download_extentions`

			`downloaded_extentions = download_extentions()`
first commit 2024-07-24 13:57:40 +00:00

			`async def new_browser():`
refactor + docstrings 2024-07-25 21:00:31 +00:00			`"""`
			`Create a new browser instance with the specified configuration.`

			`Returns`
			`-------`
			`A coroutine that resolves to the newly created browser instance.`

			`Raises`
			`------`
			`Any exceptions that may occur during the creation of the browser instance.`

			`"""`
first commit 2024-07-24 13:57:40 +00:00			`config: webdriver.Config = webdriver.Config()`
			`config.sandbox = False`
added default logger 2024-07-24 15:16:46 +00:00			`config.add_argument(f"--load-extension={','.join(downloaded_extentions)}")`
first commit 2024-07-24 13:57:40 +00:00
			`return await webdriver.start(config=config)`


			`async def bypass_cloudflare(page: webdriver.Tab):`
refactor + docstrings 2024-07-25 21:00:31 +00:00			`"""`
			`Asynchronously bypasses Cloudflare challenges on the given web page.`

			`Args:`
			`----`
			`page (webdriver.Tab): The web page to bypass Cloudflare challenges on.`

			`Returns:`
			`-------`
			`bool: True if the page was successfully bypassed, False otherwise.`

			`Raises:`
			`------`
			`Exception: If the element containing the Cloudflare challenge could not be found.`

			`Notes:`
			`-----`
			`This function repeatedly checks the title of the page until it is not in the`
			`list of known Cloudflare challenge titles. Once a challenge is found, it attempts`
			`to locate the element containing the challenge and click it. If the element cannot`
			`be found within a certain time limit, the function will retry. If the element is`
			`found, it will be clicked. If the element cannot be found at all, an exception will`
			`be raised.`

			`"""`
first commit 2024-07-24 13:57:40 +00:00			`challenged = False`
			`while True:`
refactor + docstrings 2024-07-25 21:00:31 +00:00			`await page`
remove screenshots, improve logging and add timeout for button 2024-07-25 00:39:29 +00:00			`logger.debug(f"Current page: {page.target.title}")`
first commit 2024-07-24 13:57:40 +00:00			`if page.target.title not in CHALLENGE_TITLES:`
			`return challenged`
added default logger 2024-07-24 15:16:46 +00:00			`if not challenged:`
			`logger.info("Found challenge")`
			`challenged = True`
remove screenshots, improve logging and add timeout for button 2024-07-25 00:39:29 +00:00			`try:`
refactor + docstrings 2024-07-25 21:00:31 +00:00			`elem = await page.find(`
			`"Verify you are human by completing the action below.",`
			`timeout=3,`
remove screenshots, improve logging and add timeout for button 2024-07-25 00:39:29 +00:00			`)`
use shadow roots and fix HTTP request failed: [500:InternalServerError] #2 2024-08-25 15:24:27 +00:00			`# If challenge solves by itself`
remove screenshots, improve logging and add timeout for button 2024-07-25 00:39:29 +00:00			`except asyncio.TimeoutError:`
refactor + docstrings 2024-07-25 21:00:31 +00:00			`if page.target.title not in CHALLENGE_TITLES:`
			`return challenged`

			`if elem is None:`
tweaks 2024-09-13 19:00:20 +00:00			`logger.debug("Couldn't find the title, trying other method...")`
refactor + docstrings 2024-07-25 21:00:31 +00:00			`continue`

tweaks 2024-09-13 19:00:20 +00:00			`if not isinstance(elem, Element):`
			`logger.fatal("Element is a string, please report this to Byparr dev")`
			`raise InvalidElementError`
refactor + docstrings 2024-07-25 21:00:31 +00:00			`elem = elem.parent`
use shadow roots and fix HTTP request failed: [500:InternalServerError] #2 2024-08-25 15:24:27 +00:00			`# Get the element containing the shadow root`
fix new issue 2024-09-13 18:04:22 +00:00			`for _ in range(3):`
refactor + docstrings 2024-07-25 21:00:31 +00:00			`if elem is not None:`
use shadow roots and fix HTTP request failed: [500:InternalServerError] #2 2024-08-25 15:24:27 +00:00			`elem = get_first_div(elem)`
refactor + docstrings 2024-07-25 21:00:31 +00:00			`else:`
			`raise InvalidElementError`

use shadow roots and fix HTTP request failed: [500:InternalServerError] #2 2024-08-25 15:24:27 +00:00			`if isinstance(elem, Element) and elem.shadow_roots:`
			`inner_elem = Element(elem.shadow_roots[0], page, elem.tree).children[0]`
			`if isinstance(inner_elem, Element):`
			`logger.debug("Clicking element")`
			`await inner_elem.mouse_click()`
			`else:`
			`logger.warn(`
			`"Element is a string, please report this to Byparr dev"`
			`) # I really hope this never happens`
refactor + docstrings 2024-07-25 21:00:31 +00:00			`else:`
			`logger.warn("Coulnd't find checkbox, trying again...")`


use shadow roots and fix HTTP request failed: [500:InternalServerError] #2 2024-08-25 15:24:27 +00:00			`def get_first_div(elem):`
			`"""`
			`Retrieve the first div element from the given element's children.`

			`Args:`
			`----`
			`elem: The parent element to search for a div child.`

			`Returns:`
			`-------`
			`The first div element found, or the original element if no div is found.`

			`"""`
fix new issue 2024-09-13 18:04:22 +00:00			`for child in elem.children:`
			`if child.tag_name == "div":`
			`return child`
			`raise InvalidElementError`
use shadow roots and fix HTTP request failed: [500:InternalServerError] #2 2024-08-25 15:24:27 +00:00

refactor + docstrings 2024-07-25 21:00:31 +00:00			`class InvalidElementError(Exception):`
			`pass`