74 lines
1.8 KiB
Python
74 lines
1.8 KiB
Python
import requests
|
|
from random import randint, choice
|
|
from bs4 import BeautifulSoup
|
|
|
|
BASE_URL = "https://rule34.xxx"
|
|
HEADERS = {
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
|
|
}
|
|
URL = f"{BASE_URL}/index.php?page=post&s=list"
|
|
MAXIMUM = 999
|
|
|
|
# Хранилище тегов в памяти
|
|
TAGS = set()
|
|
|
|
def add_tags(tags: list[str]):
|
|
TAGS.update(tags)
|
|
|
|
def del_tags(tags: list[str]):
|
|
for t in tags:
|
|
TAGS.discard(t)
|
|
|
|
def get_tags_str() -> str:
|
|
return "+".join(TAGS) if TAGS else "(нет тегов)"
|
|
|
|
def get_tags() -> str:
|
|
return "+".join(TAGS) if TAGS else ""
|
|
|
|
def get_url():
|
|
while True:
|
|
try:
|
|
tags = get_tags()
|
|
pid = randint(1, MAXIMUM)
|
|
|
|
# Формируем корректный URL с query‑параметрами
|
|
if tags:
|
|
url_page = f"{URL}&tags={tags}&pid={pid}"
|
|
else:
|
|
url_page = f"{URL}&pid={pid}"
|
|
|
|
r = requests.get(url_page, headers=HEADERS, timeout=5)
|
|
soup = BeautifulSoup(r.text, 'lxml')
|
|
block = soup.find(class_="image-list")
|
|
if not block:
|
|
continue
|
|
block = block.find_all("span")
|
|
if not block:
|
|
continue
|
|
|
|
link = choice(block).find("a")
|
|
if not link:
|
|
continue
|
|
|
|
r2 = requests.get(f"{BASE_URL}{link.get('href')}", headers=HEADERS, timeout=10)
|
|
soup_two = BeautifulSoup(r2.text, 'lxml')
|
|
|
|
flexi = soup_two.find(class_="flexi")
|
|
if not flexi:
|
|
continue
|
|
|
|
img = flexi.find("img")
|
|
if not img:
|
|
continue
|
|
|
|
url = img.get("src")
|
|
if not url:
|
|
continue
|
|
|
|
return url
|
|
|
|
except Exception as e:
|
|
print(f"[get_url ERROR] {e}")
|
|
continue
|
|
|