2024-07-25 00:39:29 +00:00
|
|
|
import asyncio
|
2024-07-24 20:38:23 +00:00
|
|
|
|
2024-07-24 13:57:40 +00:00
|
|
|
import nodriver as webdriver
|
|
|
|
from nodriver.core.element import Element
|
|
|
|
|
|
|
|
from src.utils import logger
|
2024-07-25 00:06:56 +00:00
|
|
|
from src.utils.consts import CHALLENGE_TITLES
|
2024-07-25 21:00:31 +00:00
|
|
|
from src.utils.extentions import download_extentions
|
|
|
|
|
|
|
|
downloaded_extentions = download_extentions()
|
2024-07-24 13:57:40 +00:00
|
|
|
|
|
|
|
|
|
|
|
async def new_browser():
|
2024-07-25 21:00:31 +00:00
|
|
|
"""
|
|
|
|
Create a new browser instance with the specified configuration.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
A coroutine that resolves to the newly created browser instance.
|
|
|
|
|
|
|
|
Raises
|
|
|
|
------
|
|
|
|
Any exceptions that may occur during the creation of the browser instance.
|
|
|
|
|
|
|
|
"""
|
2024-10-18 13:30:00 +00:00
|
|
|
config: webdriver.Config = webdriver.Config(
|
|
|
|
browser_executable_path="/usr/bin/chromium", sandbox=True
|
|
|
|
)
|
2024-07-24 15:16:46 +00:00
|
|
|
config.add_argument(f"--load-extension={','.join(downloaded_extentions)}")
|
2024-07-24 13:57:40 +00:00
|
|
|
|
|
|
|
return await webdriver.start(config=config)
|
|
|
|
|
|
|
|
|
|
|
|
async def bypass_cloudflare(page: webdriver.Tab):
|
2024-07-25 21:00:31 +00:00
|
|
|
"""
|
|
|
|
Asynchronously bypasses Cloudflare challenges on the given web page.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
----
|
|
|
|
page (webdriver.Tab): The web page to bypass Cloudflare challenges on.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
-------
|
|
|
|
bool: True if the page was successfully bypassed, False otherwise.
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
------
|
|
|
|
Exception: If the element containing the Cloudflare challenge could not be found.
|
|
|
|
|
|
|
|
Notes:
|
|
|
|
-----
|
|
|
|
This function repeatedly checks the title of the page until it is not in the
|
|
|
|
list of known Cloudflare challenge titles. Once a challenge is found, it attempts
|
|
|
|
to locate the element containing the challenge and click it. If the element cannot
|
|
|
|
be found within a certain time limit, the function will retry. If the element is
|
|
|
|
found, it will be clicked. If the element cannot be found at all, an exception will
|
|
|
|
be raised.
|
|
|
|
|
|
|
|
"""
|
2024-07-24 13:57:40 +00:00
|
|
|
challenged = False
|
|
|
|
while True:
|
2024-10-19 20:35:11 +00:00
|
|
|
await asyncio.sleep(1)
|
2024-07-25 00:39:29 +00:00
|
|
|
logger.debug(f"Current page: {page.target.title}")
|
2024-10-19 20:35:11 +00:00
|
|
|
|
2024-07-24 13:57:40 +00:00
|
|
|
if page.target.title not in CHALLENGE_TITLES:
|
|
|
|
return challenged
|
2024-10-19 20:35:11 +00:00
|
|
|
|
2024-07-24 15:16:46 +00:00
|
|
|
if not challenged:
|
|
|
|
logger.info("Found challenge")
|
|
|
|
challenged = True
|
2024-10-18 14:50:43 +00:00
|
|
|
|
2024-10-19 20:35:11 +00:00
|
|
|
if (
|
|
|
|
page.target.title != "Just a moment..."
|
|
|
|
): # If not in cloudflare, wait for autobypass
|
|
|
|
await asyncio.sleep(3)
|
|
|
|
logger.debug("Waiting for challenge to complete")
|
|
|
|
continue
|
|
|
|
|
2024-10-18 15:39:27 +00:00
|
|
|
loaded = False
|
2024-10-18 14:50:43 +00:00
|
|
|
try:
|
2024-10-19 20:35:11 +00:00
|
|
|
elem = await page.find("lds-ring")
|
|
|
|
except asyncio.TimeoutError as e:
|
|
|
|
logger.error(
|
|
|
|
"Couldn't find lds-ring, probably not a cloudflare challenge, trying again..."
|
|
|
|
)
|
|
|
|
raise InvalidElementError from e
|
|
|
|
if elem is None:
|
|
|
|
logger.error("elem is None")
|
|
|
|
logger.debug(elem)
|
|
|
|
raise InvalidElementError
|
|
|
|
|
|
|
|
parent = elem.parent
|
|
|
|
if not isinstance(parent, Element) or parent.attributes is None:
|
|
|
|
logger.error("parent is not an element or has no attributes")
|
|
|
|
logger.debug(parent)
|
|
|
|
raise InvalidElementError
|
|
|
|
|
|
|
|
for attr in parent.attributes:
|
|
|
|
if attr == "display: none; visibility: hidden;":
|
|
|
|
loaded = True
|
|
|
|
logger.info("Page loaded")
|
|
|
|
|
|
|
|
if not loaded:
|
|
|
|
logger.debug("Challenge still loading")
|
|
|
|
continue
|
2024-10-18 14:50:43 +00:00
|
|
|
|
2024-09-13 19:07:43 +00:00
|
|
|
elem = await page.find("input")
|
2024-07-25 21:00:31 +00:00
|
|
|
elem = elem.parent
|
2024-08-25 15:24:27 +00:00
|
|
|
# Get the element containing the shadow root
|
|
|
|
if isinstance(elem, Element) and elem.shadow_roots:
|
2024-10-19 20:35:11 +00:00
|
|
|
logger.info("Found shadow root")
|
2024-08-25 15:24:27 +00:00
|
|
|
inner_elem = Element(elem.shadow_roots[0], page, elem.tree).children[0]
|
|
|
|
if isinstance(inner_elem, Element):
|
2024-10-19 20:35:11 +00:00
|
|
|
logger.info("Found elem inside shadow root")
|
2024-08-25 15:24:27 +00:00
|
|
|
logger.debug("Clicking element")
|
|
|
|
await inner_elem.mouse_click()
|
2024-10-18 15:39:27 +00:00
|
|
|
await asyncio.sleep(3)
|
2024-08-25 15:24:27 +00:00
|
|
|
else:
|
2024-09-13 19:07:43 +00:00
|
|
|
logger.warning(
|
2024-10-19 20:35:11 +00:00
|
|
|
"Couldn't find element containing shadow root, trying again..."
|
|
|
|
)
|
|
|
|
logger.debug(inner_elem)
|
2024-07-25 21:00:31 +00:00
|
|
|
else:
|
2024-09-13 19:07:43 +00:00
|
|
|
logger.warning("Coulnd't find checkbox, trying again...")
|
2024-10-19 20:35:11 +00:00
|
|
|
logger.debug(elem)
|
2024-08-25 15:24:27 +00:00
|
|
|
|
|
|
|
|
2024-07-25 21:00:31 +00:00
|
|
|
class InvalidElementError(Exception):
|
|
|
|
pass
|