Byparr/src/utils/browser.py

127 lines
3.8 KiB
Python
Raw Normal View History

import asyncio
2024-07-24 20:38:23 +00:00
2024-07-24 13:57:40 +00:00
import nodriver as webdriver
from nodriver.core.element import Element
from src.utils import logger
2024-07-25 00:06:56 +00:00
from src.utils.consts import CHALLENGE_TITLES
2024-07-25 21:00:31 +00:00
from src.utils.extentions import download_extentions
downloaded_extentions = download_extentions()
2024-07-24 13:57:40 +00:00
async def new_browser():
2024-07-25 21:00:31 +00:00
"""
Create a new browser instance with the specified configuration.
Returns
-------
A coroutine that resolves to the newly created browser instance.
Raises
------
Any exceptions that may occur during the creation of the browser instance.
"""
2024-07-24 13:57:40 +00:00
config: webdriver.Config = webdriver.Config()
config.sandbox = False
2024-07-24 15:16:46 +00:00
config.add_argument(f"--load-extension={','.join(downloaded_extentions)}")
2024-07-24 13:57:40 +00:00
return await webdriver.start(config=config)
async def bypass_cloudflare(page: webdriver.Tab):
2024-07-25 21:00:31 +00:00
"""
Asynchronously bypasses Cloudflare challenges on the given web page.
Args:
----
page (webdriver.Tab): The web page to bypass Cloudflare challenges on.
Returns:
-------
bool: True if the page was successfully bypassed, False otherwise.
Raises:
------
Exception: If the element containing the Cloudflare challenge could not be found.
Notes:
-----
This function repeatedly checks the title of the page until it is not in the
list of known Cloudflare challenge titles. Once a challenge is found, it attempts
to locate the element containing the challenge and click it. If the element cannot
be found within a certain time limit, the function will retry. If the element is
found, it will be clicked. If the element cannot be found at all, an exception will
be raised.
"""
2024-07-24 13:57:40 +00:00
challenged = False
while True:
2024-07-25 21:00:31 +00:00
await page
logger.debug(f"Current page: {page.target.title}")
2024-07-24 13:57:40 +00:00
if page.target.title not in CHALLENGE_TITLES:
return challenged
2024-07-24 15:16:46 +00:00
if not challenged:
logger.info("Found challenge")
challenged = True
try:
2024-07-25 21:00:31 +00:00
elem = await page.find(
"Verify you are human by completing the action below.",
timeout=3,
)
# If challenge solves by itself
except asyncio.TimeoutError:
2024-07-25 21:00:31 +00:00
if page.target.title not in CHALLENGE_TITLES:
return challenged
if elem is None:
2024-09-13 19:00:20 +00:00
logger.debug("Couldn't find the title, trying other method...")
2024-07-25 21:00:31 +00:00
continue
2024-09-13 19:00:20 +00:00
if not isinstance(elem, Element):
logger.fatal("Element is a string, please report this to Byparr dev")
raise InvalidElementError
2024-07-25 21:00:31 +00:00
elem = elem.parent
# Get the element containing the shadow root
2024-09-13 18:04:22 +00:00
for _ in range(3):
2024-07-25 21:00:31 +00:00
if elem is not None:
elem = get_first_div(elem)
2024-07-25 21:00:31 +00:00
else:
raise InvalidElementError
if isinstance(elem, Element) and elem.shadow_roots:
inner_elem = Element(elem.shadow_roots[0], page, elem.tree).children[0]
if isinstance(inner_elem, Element):
logger.debug("Clicking element")
await inner_elem.mouse_click()
else:
logger.warn(
"Element is a string, please report this to Byparr dev"
) # I really hope this never happens
2024-07-25 21:00:31 +00:00
else:
logger.warn("Coulnd't find checkbox, trying again...")
def get_first_div(elem):
"""
Retrieve the first div element from the given element's children.
Args:
----
elem: The parent element to search for a div child.
Returns:
-------
The first div element found, or the original element if no div is found.
"""
2024-09-13 18:04:22 +00:00
for child in elem.children:
if child.tag_name == "div":
return child
raise InvalidElementError
2024-07-25 21:00:31 +00:00
class InvalidElementError(Exception):
pass