It's version 0.6.2 I clear code

This commit is contained in:
Niken
2025-11-23 23:17:00 +03:00
parent b6c1c60609
commit 7495062a8a
15 changed files with 183 additions and 97 deletions
+61 -45
View File
@@ -1,11 +1,8 @@
import requests
import asyncio
from random import randint, choice
from bs4 import BeautifulSoup
from playwright.async_api import async_playwright
BASE_URL = "https://rule34.xxx"
HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}
URL = f"{BASE_URL}/index.php?page=post&s=list"
MAXIMUM = 999
@@ -25,49 +22,68 @@ def get_tags_str() -> str:
def get_tags() -> str:
return "+".join(TAGS) if TAGS else ""
def get_url():
while True:
try:
tags = get_tags()
pid = randint(1, MAXIMUM)
# Формируем корректный URL с query‑параметрами
if tags:
url_page = f"{URL}&tags={tags}&pid={pid}"
else:
url_page = f"{URL}&pid={pid}"
async def get_url():
async with async_playwright() as p:
browser = await p.firefox.launch(headless=True)
page = await browser.new_page(user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64)")
r = requests.get(url_page, headers=HEADERS, timeout=5)
soup = BeautifulSoup(r.text, 'lxml')
block = soup.find(class_="image-list")
if not block:
continue
block = block.find_all("span")
if not block:
while True:
try:
tags = get_tags()
pid = randint(1, MAXIMUM)
# Формируем корректный URL
if tags:
url_page = f"{URL}&tags={tags}&pid={pid}"
else:
url_page = f"{URL}&pid={pid}"
await page.goto(url_page, timeout=5000)
# Ищем блок с картинками
block = await page.query_selector(".image-list")
if not block:
continue
spans = await block.query_selector_all("span")
if not spans:
continue
link_el = await choice(spans).query_selector("a")
if not link_el:
continue
href = await link_el.get_attribute("href")
if not href:
continue
await page.goto(f"{BASE_URL}{href}", timeout=30000)
flexi = await page.query_selector(".flexi")
if not flexi:
continue
img_el = await flexi.query_selector("img")
if not img_el:
continue
url = await img_el.get_attribute("src")
if not url:
continue
await browser.close()
return url
except Exception as e:
print(f"[get_url ERROR] {e}")
continue
link = choice(block).find("a")
if not link:
continue
r2 = requests.get(f"{BASE_URL}{link.get('href')}", headers=HEADERS, timeout=10)
soup_two = BeautifulSoup(r2.text, 'lxml')
flexi = soup_two.find(class_="flexi")
if not flexi:
continue
img = flexi.find("img")
if not img:
continue
url = img.get("src")
if not url:
continue
return url
except Exception as e:
print(f"[get_url ERROR] {e}")
continue
# Пример использования
async def main():
result = await get_url()
print("Result URL:", result)
if __name__ == "__main__":
asyncio.run(main())