It's version 0.6.2 I clear code

2025-11-23 23:17:00 +03:00
parent b6c1c60609
commit 7495062a8a
15 changed files with 183 additions and 97 deletions
@@ -1,11 +1,8 @@
-import requests
+import asyncio
 from random import randint, choice
-from bs4 import BeautifulSoup
+from playwright.async_api import async_playwright

 BASE_URL = "https://rule34.xxx"
-HEADERS = {
-    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
-}
 URL = f"{BASE_URL}/index.php?page=post&s=list"
 MAXIMUM = 999

@@ -25,49 +22,68 @@ def get_tags_str() -> str:
 def get_tags() -> str:
    return "+".join(TAGS) if TAGS else ""

-def get_url():
-    while True:
-        try:
-            tags = get_tags()
-            pid = randint(1, MAXIMUM)

-            # Формируем корректный URL с query‑параметрами
-            if tags:
-                url_page = f"{URL}&tags={tags}&pid={pid}"
-            else:
-                url_page = f"{URL}&pid={pid}"
+async def get_url():
+    async with async_playwright() as p:
+        browser = await p.firefox.launch(headless=True)
+        page = await browser.new_page(user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64)")

-            r = requests.get(url_page, headers=HEADERS, timeout=5)
-            soup = BeautifulSoup(r.text, 'lxml')
-            block = soup.find(class_="image-list")
-            if not block:
-                continue
-            block = block.find_all("span")
-            if not block:
+        while True:
+            try:
+                tags = get_tags()
+                pid = randint(1, MAXIMUM)
+
+                # Формируем корректный URL
+                if tags:
+                    url_page = f"{URL}&tags={tags}&pid={pid}"
+                else:
+                    url_page = f"{URL}&pid={pid}"
+
+                await page.goto(url_page, timeout=5000)
+
+                # Ищем блок с картинками
+                block = await page.query_selector(".image-list")
+                if not block:
+                    continue
+
+                spans = await block.query_selector_all("span")
+                if not spans:
+                    continue
+
+                link_el = await choice(spans).query_selector("a")
+                if not link_el:
+                    continue
+
+                href = await link_el.get_attribute("href")
+                if not href:
+                    continue
+
+                await page.goto(f"{BASE_URL}{href}", timeout=30000)
+
+                flexi = await page.query_selector(".flexi")
+                if not flexi:
+                    continue
+
+                img_el = await flexi.query_selector("img")
+                if not img_el:
+                    continue
+
+                url = await img_el.get_attribute("src")
+                if not url:
+                    continue
+
+                await browser.close()
+                return url
+
+            except Exception as e:
+                print(f"[get_url ERROR] {e}")
                continue

-            link = choice(block).find("a")
-            if not link:
-                continue

-            r2 = requests.get(f"{BASE_URL}{link.get('href')}", headers=HEADERS, timeout=10)
-            soup_two = BeautifulSoup(r2.text, 'lxml')
-
-            flexi = soup_two.find(class_="flexi")
-            if not flexi:
-                continue
-
-            img = flexi.find("img")
-            if not img:
-                continue
-
-            url = img.get("src")
-            if not url:
-                continue
-
-            return url
-
-        except Exception as e:
-            print(f"[get_url ERROR] {e}")
-            continue
+# Пример использования
+async def main():
+    result = await get_url()
+    print("Result URL:", result)

+if __name__ == "__main__":
+    asyncio.run(main())