refactor and test in dockerfile

This commit is contained in:
Thephaseless 2024-10-19 20:35:11 +00:00
parent 9dbd088e64
commit 9fbe751dd8
9 changed files with 99 additions and 57 deletions

View File

@ -24,7 +24,6 @@
}
}
},
"postStartCommand": "./entrypoint.sh",
// Features to add to the dev container. More info: https://containers.dev/features.
// "features": {},
// Use 'forwardPorts' to make a list of ports inside the container available locally.

View File

@ -55,4 +55,5 @@ RUN poetry install
COPY fix_nodriver.py ./
RUN . /app/.venv/bin/activate && python fix_nodriver.py
COPY . .
RUN ./run_vnc.sh && . /app/.venv/bin/activate && poetry run pytest -n auto
CMD ["./entrypoint.sh"]

View File

@ -1,19 +1,6 @@
#!/bin/sh
rm -f /tmp/.X0-lock
# Run Xvfb on dispaly 0.
Xvfb :0 -screen 0 1280x720x16 &
# Run fluxbox windows manager on display 0.
fluxbox -display :0 &
# Run x11vnc on display 0
x11vnc -display :0 -forever -ncache 10 &
# Add delay
sleep 5
./run_vnc.sh
# Activate virtual environment
export DISPLAY=:0
. .venv/bin/activate && python3 main.py

View File

@ -31,6 +31,7 @@ async def read_item(request: LinkRequest):
logger.info(f"Request: {request}")
start_time = int(time.time() * 1000)
browser = await new_browser()
await asyncio.sleep(1)
page = await browser.get(request.url)
await page.bring_to_front()
timeout = request.maxTimeout
@ -38,9 +39,10 @@ async def read_item(request: LinkRequest):
timeout = None
try:
challenged = await asyncio.wait_for(bypass_cloudflare(page), timeout=timeout)
except asyncio.TimeoutError:
logger.info("Timed out bypassing Cloudflare")
except Exception as e:
logger.error(await page.get_content())
logger.fatal("Element is a string, please report this to Byparr dev")
browser.stop()
raise HTTPException(detail="Couldn't bypass", status_code=408) from e
logger.info(f"Got webpage: {request.url}")

36
poetry.lock generated
View File

@ -233,6 +233,20 @@ files = [
dnspython = ">=2.0.0"
idna = ">=2.0.0"
[[package]]
name = "execnet"
version = "2.1.1"
description = "execnet: rapid multi-Python deployment"
optional = false
python-versions = ">=3.8"
files = [
{file = "execnet-2.1.1-py3-none-any.whl", hash = "sha256:26dee51f1b80cebd6d0ca8e74dd8745419761d3bef34163928cbebbdc4749fdc"},
{file = "execnet-2.1.1.tar.gz", hash = "sha256:5189b52c6121c24feae288166ab41b32549c7e2348652736540b9e6e7d4e72e3"},
]
[package.extras]
testing = ["hatch", "pre-commit", "pytest", "tox"]
[[package]]
name = "fastapi"
version = "0.111.1"
@ -771,6 +785,26 @@ pytest = ">=8.2,<9"
docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"]
testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"]
[[package]]
name = "pytest-xdist"
version = "3.6.1"
description = "pytest xdist plugin for distributed testing, most importantly across multiple CPUs"
optional = false
python-versions = ">=3.8"
files = [
{file = "pytest_xdist-3.6.1-py3-none-any.whl", hash = "sha256:9ed4adfb68a016610848639bb7e02c9352d5d9f03d04809919e2dafc3be4cca7"},
{file = "pytest_xdist-3.6.1.tar.gz", hash = "sha256:ead156a4db231eec769737f57668ef58a2084a34b2e55c4a8fa20d861107300d"},
]
[package.dependencies]
execnet = ">=2.1"
pytest = ">=7.0.0"
[package.extras]
psutil = ["psutil (>=3.0)"]
setproctitle = ["setproctitle"]
testing = ["filelock"]
[[package]]
name = "python-dotenv"
version = "1.0.1"
@ -1329,4 +1363,4 @@ files = [
[metadata]
lock-version = "2.0"
python-versions = "^3.12"
content-hash = "9481a9ebbb507cb1adaf9a980e4b701afaf26fb8c323c914d8349f741ab4e805"
content-hash = "fca1ae6988ae2e5b1d223ff008ed938c2daa58082b5996c2db3c33c8214df482"

View File

@ -14,6 +14,7 @@ nodriver = "^0.34"
requests = "^2.32.3"
httpx = "^0.27.2"
pytest-asyncio = "^0.24.0"
pytest-xdist = "^3.6.1"
[build-system]

16
run_vnc.sh Executable file
View File

@ -0,0 +1,16 @@
#!/bin/sh
export DISPLAY=:0
rm -f /tmp/.X0-lock
# Run Xvfb on dispaly 0.
Xvfb :0 -screen 0 1280x720x16 &
# Run fluxbox windows manager on display 0.
fluxbox -display :0 &
# Run x11vnc on display 0
x11vnc -display :0 -forever -ncache 10 &
# Add delay
sleep 5

View File

@ -59,69 +59,70 @@ async def bypass_cloudflare(page: webdriver.Tab):
"""
challenged = False
while True:
await page
await asyncio.sleep(1)
logger.debug(f"Current page: {page.target.title}")
if page.target.title not in CHALLENGE_TITLES:
return challenged
if not challenged:
logger.info("Found challenge")
challenged = True
if (
page.target.title != "Just a moment..."
): # If not in cloudflare, wait for autobypass
await asyncio.sleep(3)
logger.debug("Waiting for challenge to complete")
continue
loaded = False
try:
elem = await page.find("lds-ring", timeout=3)
parent = elem.parent
if not isinstance(parent, Element) or parent.attributes is None:
continue
for attr in parent.attributes:
if attr == "display: none; visibility: hidden;":
loaded = True
elem = await page.find("lds-ring")
except asyncio.TimeoutError as e:
logger.error(
"Couldn't find lds-ring, probably not a cloudflare challenge, trying again..."
)
raise InvalidElementError from e
if elem is None:
logger.error("elem is None")
logger.debug(elem)
raise InvalidElementError
except asyncio.TimeoutError:
logger.debug("Challenge loaded")
else:
if not loaded:
logger.debug("Challenge still loading")
continue
parent = elem.parent
if not isinstance(parent, Element) or parent.attributes is None:
logger.error("parent is not an element or has no attributes")
logger.debug(parent)
raise InvalidElementError
for attr in parent.attributes:
if attr == "display: none; visibility: hidden;":
loaded = True
logger.info("Page loaded")
if not loaded:
logger.debug("Challenge still loading")
continue
await page
logger.debug("Couldn't find the title, trying other method...")
elem = await page.find("input")
elem = elem.parent
# Get the element containing the shadow root
if isinstance(elem, Element) and elem.shadow_roots:
logger.info("Found shadow root")
inner_elem = Element(elem.shadow_roots[0], page, elem.tree).children[0]
if isinstance(inner_elem, Element):
logger.info("Found elem inside shadow root")
logger.debug("Clicking element")
await inner_elem.mouse_click()
await asyncio.sleep(3)
else:
logger.warning(
"Element is a string, please report this to Byparr dev"
) # I really hope this never happens
logger.warning(inner_elem)
"Couldn't find element containing shadow root, trying again..."
)
logger.debug(inner_elem)
else:
logger.warning("Coulnd't find checkbox, trying again...")
def get_first_div(elem):
"""
Retrieve the first div element from the given element's children.
Args:
----
elem: The parent element to search for a div child.
Returns:
-------
The first div element found, or the original element if no div is found.
"""
for child in elem.children:
if child.tag_name == "div":
return child
raise InvalidElementError
logger.debug(elem)
class InvalidElementError(Exception):

View File

@ -15,6 +15,7 @@ test_websites = [
"https://extratorrent.st/", # github is blocking these
"https://idope.se/", # github is blocking these
"https://www.ygg.re/",
"https://speed.cd/browse/freeleech",
]