Compare commits

..

6 Commits

17 changed files with 546 additions and 236 deletions
+10
View File
@@ -0,0 +1,10 @@
TELEGRAM_BOT_TOKEN=your_token_here
# Полное отключение логов и любого хранения (БД, файлы логов, сохранение message_id)
# DISABLE_PERSISTENCE=1
# Или по отдельности:
# DISABLE_LOGGING=1 — нет логов в консоль и в файл
# DISABLE_STORAGE=1 — нет SQLite, /del не работает, группы /set не сохраняются
SCHEDULE_DRIVE_FOLDER_ID=1WhUFHGkS4qC_e84KRArF4ooXHJr8mL5T
+2 -2
View File
@@ -20,10 +20,10 @@ def register_handlers(dp: Dispatcher, state: BotState, bot: Bot):
async def send_welcome(message: Message): async def send_welcome(message: Message):
# Создаём инлайн-кнопку для открытия Web App # Создаём инлайн-кнопку для открытия Web App
keyboard = InlineKeyboardMarkup(inline_keyboard=[ keyboard = InlineKeyboardMarkup(inline_keyboard=[
[InlineKeyboardButton(text="Открыть мини-приложение", web_app=WebAppInfo(url="https://overfit-percussively-nicolas.ngrok-free.dev"))] [InlineKeyboardButton(text="Открыть", web_app=WebAppInfo(url="https://mukhyil.duckdns.org/"))]
]) ])
await message.answer( await message.answer(
f"Расписание на {get_day()} число месяца:", f"Мой сайт для видео",
reply_markup=keyboard reply_markup=keyboard
) )
+8 -2
View File
@@ -12,6 +12,8 @@ logger = getLogger(__name__)
def register_handlers(dp: Dispatcher, state: BotState, bot: Bot) -> int: def register_handlers(dp: Dispatcher, state: BotState, bot: Bot) -> int:
async def init_db(): async def init_db():
if Config.DISABLE_STORAGE:
return
async with aiosqlite.connect(Config.DAYS_TO_DB_PATH) as db: async with aiosqlite.connect(Config.DAYS_TO_DB_PATH) as db:
await db.execute(""" await db.execute("""
CREATE TABLE IF NOT EXISTS days_to_new_year ( CREATE TABLE IF NOT EXISTS days_to_new_year (
@@ -24,6 +26,8 @@ def register_handlers(dp: Dispatcher, state: BotState, bot: Bot) -> int:
logger.info("База данных инициализирована") logger.info("База данных инициализирована")
async def save_days_to_db(user_id: int, days: int): async def save_days_to_db(user_id: int, days: int):
if Config.DISABLE_STORAGE:
return
logger.debug(f"Сохраняем user_id={user_id}, days={days}") logger.debug(f"Сохраняем user_id={user_id}, days={days}")
async with aiosqlite.connect(Config.DAYS_TO_DB_PATH) as db: async with aiosqlite.connect(Config.DAYS_TO_DB_PATH) as db:
await db.execute(""" await db.execute("""
@@ -34,6 +38,8 @@ def register_handlers(dp: Dispatcher, state: BotState, bot: Bot) -> int:
logger.info(f"Запись сохранена: user_id={user_id}, days={days}") logger.info(f"Запись сохранена: user_id={user_id}, days={days}")
async def get_last_days(user_id: int) -> int | None: async def get_last_days(user_id: int) -> int | None:
if Config.DISABLE_STORAGE:
return None
async with aiosqlite.connect(Config.DAYS_TO_DB_PATH) as db: async with aiosqlite.connect(Config.DAYS_TO_DB_PATH) as db:
async with db.execute( async with db.execute(
"SELECT days FROM days_to_new_year WHERE user_id = ?", (int(user_id),) "SELECT days FROM days_to_new_year WHERE user_id = ?", (int(user_id),)
@@ -110,8 +116,8 @@ def register_handlers(dp: Dispatcher, state: BotState, bot: Bot) -> int:
save_message(msg.chat.id, msg.message_id) save_message(msg.chat.id, msg.message_id)
except Exception as e: except Exception as e:
logger.exception(f"Ошибка при обработке uid={uid}: {e}") logger.exception(f"Ошибка при обработке uid={uid}: {e}")
logger.info("Завершён цикл periodic_task, спим 6 часов") logger.info("Завершён цикл periodic_task, спим 24 часов")
await asyncio.sleep(21600) # каждые 6 часов await asyncio.sleep(86400) # каждые 24 часов
asyncio.create_task(periodic_task()) asyncio.create_task(periodic_task())
return 0 return 0
+1 -1
View File
@@ -17,7 +17,7 @@ class TelegramBot:
# Регистрируем обработчики из разных модулей # Регистрируем обработчики из разных модулей
admin.register_handlers(self.dp, self.state, self.bot) admin.register_handlers(self.dp, self.state, self.bot)
# schedule.register_handlers(self.dp, self.state) schedule.register_handlers(self.dp, self.state)
# media.register_handlers(self.dp, self.state, self.bot) # media.register_handlers(self.dp, self.state, self.bot)
# common.register_handlers(self.dp, self.state, self.bot) # common.register_handlers(self.dp, self.state, self.bot)
+33 -5
View File
@@ -1,11 +1,20 @@
import os import os
from pathlib import Path
from dotenv import load_dotenv from dotenv import load_dotenv
from typing import Dict from typing import Dict
load_dotenv()
def _env_bool(name: str, default: bool = False) -> bool:
raw = os.getenv(name)
if raw is None:
return default
return raw.strip().lower() in ("1", "true", "yes", "on")
class Config: class Config:
# Загружаем .env
load_dotenv()
# API # API
API_TOKEN = os.getenv("TELEGRAM_BOT_TOKEN") API_TOKEN = os.getenv("TELEGRAM_BOT_TOKEN")
@@ -33,11 +42,30 @@ class Config:
# Settings # Settings
ANTISPAM_DELAY = 20 ANTISPAM_DELAY = 20
WATCHER_BASE_DELAY = 30 WATCHER_INTERVAL_SEC = 600
WATCHER_RANDOM_DELAY_MIN = 1
WATCHER_RANDOM_DELAY_MAX = 120
SCHEDULE_DRIVE_FOLDER_ID = os.getenv(
"SCHEDULE_DRIVE_FOLDER_ID", "1WhUFHGkS4qC_e84KRArF4ooXHJr8mL5T"
)
# Отключение логов и хранения (см. .env.example)
# DISABLE_PERSISTENCE=1 — выключает и логи, и все БД/файлы сразу
_NO_PERSISTENCE = _env_bool("DISABLE_PERSISTENCE")
DISABLE_LOGGING = (
_env_bool("DISABLE_LOGGING")
if os.getenv("DISABLE_LOGGING") is not None
else _NO_PERSISTENCE
)
DISABLE_STORAGE = (
_env_bool("DISABLE_STORAGE")
if os.getenv("DISABLE_STORAGE") is not None
else _NO_PERSISTENCE
)
# Пути # Пути
LOG_FILE = "storage/log/bot.log" LOG_FILE = Path("storage/log/bot.log")
DAYS_TO_DB_PATH = "addons/x_days_to/days_to_new_year.db" DAYS_TO_DB_PATH = Path("addons/x_days_to/days_to_new_year.db")
if __name__ == "__main__": if __name__ == "__main__":
+46 -7
View File
@@ -17,8 +17,14 @@ def register_handlers(dp: Dispatcher, state: BotState, bot: Bot):
@saving @saving
@admin_required(3) @admin_required(3)
async def send_log(message: Message): async def send_log(message: Message):
if Config.DISABLE_LOGGING:
await message.answer("📝 Логирование отключено (DISABLE_LOGGING=1).")
return
if Config.DISABLE_STORAGE:
await message.answer("📝 Файл логов не ведётся (DISABLE_STORAGE=1).")
return
try: try:
log_file = types.FSInputFile(Config.LOG_FILE) log_file = types.FSInputFile(str(Config.LOG_FILE))
await message.answer_document(log_file, caption="📑 Логи бота") await message.answer_document(log_file, caption="📑 Логи бота")
except FileNotFoundError: except FileNotFoundError:
await message.answer("Файл логов пока не создан.") await message.answer("Файл логов пока не создан.")
@@ -31,7 +37,12 @@ def register_handlers(dp: Dispatcher, state: BotState, bot: Bot):
from utils.mac_metrics import get_macbook_battery_level, get_process_usage from utils.mac_metrics import get_macbook_battery_level, get_process_usage
try: try:
stats = analyze_bot_logs(Config.LOG_FILE) if Config.DISABLE_LOGGING or Config.DISABLE_STORAGE:
await message.answer(
"📊 Аналитика по логам недоступна: логирование или хранение отключено в .env"
)
return
stats = analyze_bot_logs(str(Config.LOG_FILE))
batt = await get_macbook_battery_level() batt = await get_macbook_battery_level()
usage = await get_process_usage() usage = await get_process_usage()
status_text = ( status_text = (
@@ -53,7 +64,12 @@ def register_handlers(dp: Dispatcher, state: BotState, bot: Bot):
async def stat(message: Message): async def stat(message: Message):
from utils.analytics import analyze_bot_logs from utils.analytics import analyze_bot_logs
stats = analyze_bot_logs(Config.LOG_FILE) if Config.DISABLE_LOGGING or Config.DISABLE_STORAGE:
await message.answer(
"📊 Аналитика по логам недоступна: логирование или хранение отключено в .env"
)
return
stats = analyze_bot_logs(str(Config.LOG_FILE))
await message.answer( await message.answer(
create_statistics_text(stats), reply_to_message_id=message.message_id create_statistics_text(stats), reply_to_message_id=message.message_id
) )
@@ -61,6 +77,12 @@ def register_handlers(dp: Dispatcher, state: BotState, bot: Bot):
@dp.message(Command("del")) @dp.message(Command("del"))
@admin_required(1) @admin_required(1)
async def delete_all_messages(message: Message): async def delete_all_messages(message: Message):
if Config.DISABLE_STORAGE:
await message.answer(
"📭 Хранение сообщений отключено (DISABLE_STORAGE=1).",
reply_to_message_id=message.message_id,
)
return
messages = load_messages() messages = load_messages()
if not messages: if not messages:
sent = await message.answer( sent = await message.answer(
@@ -91,16 +113,33 @@ def register_handlers(dp: Dispatcher, state: BotState, bot: Bot):
async def power_control(message: types.Message): async def power_control(message: types.Message):
args = message.text.split() args = message.text.split()
if len(args) < 2: if len(args) < 2:
days = state.watcher_days_ahead
status = "включена" if state.watcher_work else "выключена" status = "включена" if state.watcher_work else "выключена"
await message.answer(f"⏱️ Слежка расписания: {status}") await message.answer(f"⏱️ Слежка расписания: {status} (на {days} дн.)")
return return
command = args[1].lower() command = args[1].lower()
watcher_service = WatcherService(state, bot) watcher_service = WatcherService(state, bot)
if command == "on" and not state.watcher_work: if command == "on":
await watcher_service.start() # Проверяем, есть ли параметр количества дней
await message.answer("✅ Слежка расписания включена") days = 1
if len(args) > 2:
try:
days = int(args[2])
if days < 1:
await message.answer("❌ Количество дней должно быть >= 1")
return
except ValueError:
await message.answer("❌ Неверный формат дней. Используйте: /power on 3")
return
state.watcher_days_ahead = days
if not state.watcher_work:
await watcher_service.start()
await message.answer(f"✅ Слежка расписания включена (на {days} дн.)")
else:
await message.answer(f"✅ Количество дней изменено на {days} дн.")
elif command == "off" and state.watcher_work: elif command == "off" and state.watcher_work:
await watcher_service.stop() await watcher_service.stop()
await message.answer("❌ Слежка расписания выключена") await message.answer("❌ Слежка расписания выключена")
+1 -1
View File
@@ -36,7 +36,7 @@ def register_handlers(dp: Dispatcher, state: BotState):
schedule_service = ScheduleService() schedule_service = ScheduleService()
text, url, day, month = await schedule_service.get_schedule(group, day_offset) text, url, day, month = await schedule_service.get_schedule(group, day_offset)
msg = await message.answer(text, parse_mode="Markdownv2") msg = await message.answer(text, parse_mode="HTML")
save_message(msg.chat.id, msg.message_id) save_message(msg.chat.id, msg.message_id)
@dp.message(Command("prasp")) @dp.message(Command("prasp"))
+4 -10
View File
@@ -1,17 +1,11 @@
from asyncio import run from asyncio import run
from logging import basicConfig, FileHandler, StreamHandler, INFO, getLogger from logging import getLogger
from bot.core import TelegramBot from bot.core import TelegramBot
from config import Config from config import Config
from utils.logging_config import setup_logging
# Настройка логирования setup_logging()
basicConfig(
level=INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
handlers=[FileHandler(Config.LOG_FILE, encoding="utf-8"), StreamHandler()],
force=True,
)
logger = getLogger(__name__) logger = getLogger(__name__)
+1
View File
@@ -11,6 +11,7 @@ class BotState:
last_pinned: Dict[str, int] = None last_pinned: Dict[str, int] = None
watcher_work: bool = False watcher_work: bool = False
watcher_task: Optional[Task] = None watcher_task: Optional[Task] = None
watcher_days_ahead: int = 1
file_id_cache: Dict[str, str] = None file_id_cache: Dict[str, str] = None
last_day: Dict[str, int] = None last_day: Dict[str, int] = None
last_clip_hash: Dict[str, str] = None last_clip_hash: Dict[str, str] = None
+2
View File
@@ -53,6 +53,8 @@
ply==3.11 ply==3.11
propcache==0.3.2 propcache==0.3.2
pycparser==2.23 pycparser==2.23
pymupdf==1.27.2.3
pypdf==6.11.0
pydantic==2.11.10 pydantic==2.11.10
pydantic_core==2.33.2 pydantic_core==2.33.2
pyee==13.0.0 pyee==13.0.0
+109
View File
@@ -0,0 +1,109 @@
from __future__ import annotations
import logging
import re
import ssl
from dataclasses import dataclass
from datetime import datetime
from typing import List, Optional
import aiohttp
import certifi
from bs4 import BeautifulSoup
logger = logging.getLogger(__name__)
DRIVE_FOLDER_EMBED = (
"https://drive.google.com/embeddedfolderview?id={folder_id}#list"
)
DRIVE_DOWNLOAD_URL = "https://drive.google.com/uc?export=download&id={file_id}"
USER_AGENT = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
)
def _drive_connector() -> aiohttp.TCPConnector:
ssl_context = ssl.create_default_context(cafile=certifi.where())
return aiohttp.TCPConnector(ssl=ssl_context)
@dataclass(frozen=True)
class DriveScheduleFile:
file_id: str
name: str
schedule_date: datetime
class DriveScheduleSource:
def __init__(self, folder_id: str):
self.folder_id = folder_id
self._files_cache: Optional[List[DriveScheduleFile]] = None
@staticmethod
def _parse_filename_date(name: str) -> Optional[datetime]:
match = re.match(
r"^(\d{2})\.(\d{2})\.(\d{2})\s+по\s+учащимся\.pdf$",
name.strip(),
re.IGNORECASE,
)
if not match:
return None
day, month, year = map(int, match.groups())
return datetime(2000 + year, month, day)
async def list_student_schedules(self, force_refresh: bool = False) -> List[DriveScheduleFile]:
if self._files_cache is not None and not force_refresh:
return self._files_cache
url = DRIVE_FOLDER_EMBED.format(folder_id=self.folder_id)
async with aiohttp.ClientSession(
headers={"User-Agent": USER_AGENT},
connector=_drive_connector(),
) as session:
async with session.get(url) as resp:
resp.raise_for_status()
html = await resp.text()
soup = BeautifulSoup(html, "html.parser")
files: List[DriveScheduleFile] = []
for entry in soup.select("div.flip-entry"):
entry_id = entry.get("id", "")
if not entry_id.startswith("entry-"):
continue
file_id = entry_id.removeprefix("entry-")
title_el = entry.select_one(".flip-entry-title")
if not title_el:
continue
name = title_el.get_text(strip=True)
schedule_date = self._parse_filename_date(name)
if schedule_date is None:
continue
files.append(DriveScheduleFile(file_id=file_id, name=name, schedule_date=schedule_date))
files.sort(key=lambda item: item.schedule_date)
self._files_cache = files
return files
async def find_for_date(self, target: datetime) -> Optional[DriveScheduleFile]:
files = await self.list_student_schedules()
for item in reversed(files):
if (
item.schedule_date.day == target.day
and item.schedule_date.month == target.month
and item.schedule_date.year == target.year
):
return item
return None
async def download_pdf(self, file_id: str) -> bytes:
url = DRIVE_DOWNLOAD_URL.format(file_id=file_id)
async with aiohttp.ClientSession(
headers={"User-Agent": USER_AGENT},
connector=_drive_connector(),
) as session:
async with session.get(url) as resp:
resp.raise_for_status()
return await resp.read()
+181 -141
View File
@@ -1,177 +1,217 @@
from datetime import datetime, timedelta from __future__ import annotations
from typing import Optional, Tuple
from playwright.async_api import async_playwright import io
import logging import logging
import aiohttp
from bs4 import BeautifulSoup
import ssl
import certifi
import re import re
from datetime import datetime, timedelta
from html import escape
from typing import List, Optional, Tuple
import fitz
from pypdf import PdfReader
from config import Config
from services.drive_schedule_source import DriveScheduleSource
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
BOUNDARY = r"[^0-9A-Za-zА-Яа-яЁё]" BOUNDARY = r"[^0-9A-Za-zА-Яа-яЁё]"
class ScheduleService: class ScheduleService:
def __init__(self): def __init__(self):
self.base_url = ( folder_id = getattr(Config, "SCHEDULE_DRIVE_FOLDER_ID", None) or (
"https://college.by/accounts/raspis/{mouth:02d}/{day:02d}-PODNAM.htm" "1WhUFHGkS4qC_e84KRArF4ooXHJr8mL5T"
) )
self.drive = DriveScheduleSource(folder_id)
self._pdf_cache: dict[str, bytes] = {}
def _make_url(self, day: int = 0) -> Tuple[str, int, int]: def _resolve_target_date(self, day_offset: int = 0) -> datetime:
"""Генерация URL для расписания""" target = datetime.now()
d = datetime.now() if day_offset == 0:
if day == 0: if target.hour >= 12:
if d.hour >= 12: target += timedelta(days=1)
d += timedelta(days=1) if target.weekday() == 6:
if d.weekday() == 6: target += timedelta(days=1)
d += timedelta(days=1)
return self.base_url.format(day=d.day, mouth=d.month), d.day, d.month
else: else:
return ( target = target.replace(day=int(day_offset))
self.base_url.format(day=int(day), mouth=d.month), return target.replace(hour=0, minute=0, second=0, microsecond=0)
int(day),
int(d.month),
def _next_target_date(self, day_offset: int = 0) -> datetime:
return (datetime.now() + timedelta(days=day_offset)).replace(hour=0, minute=0, second=0, microsecond=0)
async def _load_pdf_for_date(
self, day_offset: int = 0
) -> Tuple[Optional[bytes], Optional[str], int, int]:
target = self._resolve_target_date(day_offset)
day, month = target.day, target.month
drive_file = await self.drive.find_for_date(target)
if not drive_file:
return None, None, day, month
if drive_file.file_id not in self._pdf_cache:
self._pdf_cache[drive_file.file_id] = await self.drive.download_pdf(
drive_file.file_id
) )
import re url = f"https://drive.google.com/file/d/{drive_file.file_id}/view"
return self._pdf_cache[drive_file.file_id], url, day, month
async def get_schedule( async def _load_pdf_for_watcher(
self, group: str, day_offset: int = 0 self, day_offset: int = 1
) -> Tuple[str, str, int, int]: ) -> Tuple[Optional[bytes], Optional[str], int, int]:
"""Получение текста расписания (аналог Rust parse_schedule)""" target = self._next_target_date(day_offset)
url, day, month = self._make_url(day_offset) day, month = target.day, target.month
ssl_context = ssl.create_default_context(cafile=certifi.where()) drive_file = await self.drive.find_for_date(target)
ssl_context.check_hostname = False if not drive_file:
ssl_context.verify_mode = ssl.CERT_NONE return None, None, day, month
connector = aiohttp.TCPConnector(ssl=ssl_context) if drive_file.file_id not in self._pdf_cache:
self._pdf_cache[drive_file.file_id] = await self.drive.download_pdf(
drive_file.file_id
)
headers = { url = f"https://drive.google.com/file/d/{drive_file.file_id}/view"
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36" return self._pdf_cache[drive_file.file_id], url, day, month
}
async with aiohttp.ClientSession(
connector=connector, headers=headers
) as session:
async with session.get(url) as resp:
raw_bytes = await resp.read()
decoded = raw_bytes.decode("cp1251", errors="ignore")
document = BeautifulSoup(decoded, "html.parser")
elements = document.select("p.MsoPlainText b")
found_group = False
schedule_lines = []
# регулярка: ищем точное совпадение группы как отдельного слова
group_pattern = re.compile(rf"\b{re.escape(group)}\b", re.IGNORECASE)
for el in elements:
text = el.get_text(strip=True)
if not found_group:
if group_pattern.search(text):
found_group = True
schedule_lines.append(text)
else:
if "-----" in text or "+----" in text:
break
schedule_lines.append(text)
if not schedule_lines:
result = f"Расписание для группы {group} на {day} число не найдено"
else:
result = f"📅 Расписание для {day} числа:\n```\n"
for line in schedule_lines:
formatted = line.replace("¦", "").replace(" ", " ").strip()
if formatted:
result += f"{formatted}\n"
result += "```"
return result, url, day, month
@staticmethod @staticmethod
def exact_group_regex(group: str) -> re.Pattern: def exact_group_regex(group: str) -> re.Pattern:
# ищем как отдельный токен: граница слева/справа или начало/конец
pattern = rf"(^|{BOUNDARY}){re.escape(group)}({BOUNDARY}|$)" pattern = rf"(^|{BOUNDARY}){re.escape(group)}({BOUNDARY}|$)"
return re.compile(pattern) return re.compile(pattern, re.IGNORECASE)
@staticmethod
def _extract_pdf_lines(pdf_bytes: bytes) -> List[str]:
reader = PdfReader(io.BytesIO(pdf_bytes))
lines: List[str] = []
for page in reader.pages:
text = page.extract_text() or ""
for raw_line in text.splitlines():
line = raw_line.strip()
if line:
lines.append(line)
return lines
@staticmethod
def _parse_group_lines(lines: List[str], group: str) -> List[str]:
regex = ScheduleService.exact_group_regex(group)
schedule_lines: List[str] = []
found_group = False
for line in lines:
if not found_group:
if regex.search(line):
found_group = True
schedule_lines.append(line)
else:
if "-----" in line or "+----" in line:
break
schedule_lines.append(line)
return schedule_lines
@staticmethod
def is_schedule_missing(text: str) -> bool:
lowered = text.lower()
return "не найдено" in lowered or "не опубликовано" in lowered
@staticmethod
def _format_schedule_html(day: int, schedule_lines: List[str]) -> str:
body_lines = []
for line in schedule_lines:
formatted = line.replace("¦", "").replace(" ", " ").strip()
if formatted:
body_lines.append(formatted)
body = escape("\n".join(body_lines))
return f"📅 Расписание для {day} числа:\n<pre>{body}</pre>"
async def is_published_for(self, day_offset: int = 0) -> bool:
target = self._next_target_date(day_offset)
return await self.drive.find_for_date(target) is not None
async def get_schedule(
self, group: str, day_offset: int = 0
) -> Tuple[str, str, int, int]:
pdf_bytes, url, day, month = await self._load_pdf_for_date(day_offset)
folder_url = "https://drive.google.com/drive/folders/" + self.drive.folder_id
if not pdf_bytes:
result = (
f"⚠️ Расписание на {day:02d}.{month:02d} ещё не опубликовано "
f"в <a href=\"{folder_url}\">Google Drive</a>"
)
return result, folder_url, day, month
schedule_lines = self._parse_group_lines(
self._extract_pdf_lines(pdf_bytes), group
)
if not schedule_lines:
result = f"⚠️ Расписание для группы {escape(group)} на {day} число не найдено"
else:
result = self._format_schedule_html(day, schedule_lines)
return result, url or folder_url, day, month
async def get_pschedule( async def get_pschedule(
self, group: str, day_offset: int = 0 self, group: str, day_offset: int = 0
) -> Tuple[Optional[bytes], str, int, int]: ) -> Tuple[Optional[bytes], str, int, int]:
url, day, month = self._make_url(day_offset) pdf_bytes, url, day, month = await self._load_pdf_for_date(day_offset)
fallback_url = (
url
or "https://drive.google.com/drive/folders/" + self.drive.folder_id
)
async with async_playwright() as p: if not pdf_bytes:
browser = await p.chromium.launch(headless=True) return None, fallback_url, day, month
context = await browser.new_context(viewport={"width": 400, "height": 3000})
page = await context.new_page()
try: try:
response = await page.goto(url, wait_until="networkidle", timeout=30000) doc = fitz.open(stream=pdf_bytes, filetype="pdf")
if not response or response.status != 200: regex = self.exact_group_regex(group)
logger.warning(f"Ошибка загрузки страницы: {url}")
return None, url, day, month
# 1) сначала пытаемся по более точному селектору (как в HTML-парсере) for page in doc:
candidates = page.locator("p.MsoPlainText b") line_items = []
count = await candidates.count() page_dict = page.get_text("dict")
for block in page_dict.get("blocks", []):
if block.get("type") != 0:
continue
for line in block.get("lines", []):
text = "".join(span["text"] for span in line["spans"]).strip()
if text:
line_items.append((text, fitz.Rect(line["bbox"])))
regex = self.exact_group_regex(group) found_group = False
target_handle = None rects: List[fitz.Rect] = []
for text, bbox in line_items:
for i in range(count): if not found_group:
el = candidates.nth(i)
text = (await el.inner_text()).strip()
if regex.search(text):
# нашли b с нужной группой — возьмём родительский p для удобного скрина
parent_p = await el.locator("xpath=ancestor::p[1]").element_handle()
target_handle = parent_p or await el.element_handle()
break
# 2) если не нашли в p.MsoPlainText b, попробуем просто p b или p
if not target_handle:
candidates = page.locator("p b")
count = await candidates.count()
for i in range(count):
el = candidates.nth(i)
text = (await el.inner_text()).strip()
if regex.search(text): if regex.search(text):
parent_p = await el.locator("xpath=ancestor::p[1]").element_handle() found_group = True
target_handle = parent_p or await el.element_handle() rects.append(bbox)
else:
if "-----" in text or "+----" in text:
break break
rects.append(bbox)
if not target_handle: if not rects:
# последний шанс: любые <p> continue
candidates = page.locator("p")
count = await candidates.count()
for i in range(count):
el = candidates.nth(i)
text = (await el.inner_text()).strip()
if regex.search(text):
target_handle = await el.element_handle()
break
if target_handle: clip = rects[0]
# скроллим и получаем box for rect in rects[1:]:
await target_handle.scroll_into_view_if_needed() clip |= rect
box = await target_handle.bounding_box() clip.x0 = max(0, clip.x0 - 10)
if box: clip.x1 = min(page.rect.width, clip.x1 + 150)
clip_rect = { clip.y0 = max(0, clip.y0 - 5)
"x": float(max(box["x"], 0)), clip.y1 = min(page.rect.height, clip.y1 + 10)
"y": float(max(box["y"], 0)),
"width": float(box["width"] + 150),
"height": float(box["height"] + 100),
}
img = await page.screenshot(clip=clip_rect)
return img, url, day, month
except Exception as e: pixmap = page.get_pixmap(clip=clip, matrix=fitz.Matrix(2, 2))
logger.error(f"Ошибка при получении расписания: {e}") return pixmap.tobytes("png"), fallback_url, day, month
finally:
await context.close()
await browser.close()
return None, url, day, month except Exception as e:
logger.error(f"Ошибка при получении расписания из PDF: {e}")
return None, fallback_url, day, month
+78 -56
View File
@@ -1,12 +1,12 @@
import asyncio import asyncio
from datetime import datetime, timedelta
from random import randint from random import randint
from aiogram import Bot, types from datetime import datetime, timedelta
from models.state import BotState
from config import Config
from services.schedule_service import ScheduleService
from logging import getLogger
from aiogram import Bot, types
from config import Config
from logging import getLogger
from models.state import BotState
from services.schedule_service import ScheduleService
logger = getLogger(__name__) logger = getLogger(__name__)
@@ -40,21 +40,38 @@ class WatcherService:
pass pass
logger.info("Watcher остановлен") logger.info("Watcher остановлен")
@staticmethod
def _next_delay() -> int:
return Config.WATCHER_INTERVAL_SEC + randint(
Config.WATCHER_RANDOM_DELAY_MIN,
Config.WATCHER_RANDOM_DELAY_MAX,
)
@staticmethod
def _get_target_date_with_weekend_handling(days_ahead: int) -> datetime:
"""
Получить целевую дату с учетом выходных.
Если целевая дата - воскресенье, переносится на понедельник.
"""
target = (datetime.now() + timedelta(days=days_ahead)).replace(
hour=0, minute=0, second=0, microsecond=0
)
# weekday() returns 6 for Sunday
if target.weekday() == 6:
target += timedelta(days=1)
return target
async def _watcher_loop(self): async def _watcher_loop(self):
"""Основной цикл слежки""" """Основной цикл слежки за появлением PDF на Google Drive."""
while self.state.watcher_work: while self.state.watcher_work:
try: try:
find = await self._check_all_groups() nothing_found = await self._check_all_groups()
if find: if nothing_found:
# ничего не нашли → ждём delay = self._next_delay()
delay = randint( logger.info(f"PDF/расписание не найдено, следующая проверка через {delay} с")
Config.WATCHER_BASE_DELAY, Config.WATCHER_BASE_DELAY + 30
)
logger.info(f"Следующая проверка через {delay}")
await asyncio.sleep(delay) await asyncio.sleep(delay)
else: else:
# нашли → останавливаемся logger.info("Расписание найдено и отправлено, останавливаем watcher")
logger.info("Расписание найдено, останавливаем watcher")
self.state.watcher_work = False self.state.watcher_work = False
break break
except asyncio.CancelledError: except asyncio.CancelledError:
@@ -63,61 +80,66 @@ class WatcherService:
logger.error(f"Ошибка в watcher_loop: {e}") logger.error(f"Ошибка в watcher_loop: {e}")
await asyncio.sleep(60) await asyncio.sleep(60)
@staticmethod
def _get_target_day() -> datetime:
"""Получение целевого дня"""
now = datetime.now()
target = now + timedelta(days=1)
if target.weekday() == 6:
target += timedelta(days=1)
return target
async def _check_all_groups(self) -> bool: async def _check_all_groups(self) -> bool:
""" """
Возвращает True, если НИ в одной группе не найдено расписание. Возвращает True, если расписание ещё недоступно ни для одной группы.
Возвращает False, если хотя бы в одной группе найдено расписание. Возвращает False, если хотя бы одной группе отправили расписание.
""" """
day = self._get_target_day() days_ahead = self.state.watcher_days_ahead
found_any = False target = self._get_target_date_with_weekend_handling(days_ahead)
logger.info(
f"Проверяем Google Drive на расписание за {target.strftime('%d.%m.%Y')} "
f"(дней вперед: {days_ahead})"
)
if not await self.schedule_service.is_published_for(days_ahead):
return True
found_any = False
for group, chat_id in Config.GROUP_CHATS.items(): for group, chat_id in Config.GROUP_CHATS.items():
logger.info( logger.info(
f"Проверяем расписание для {group} на {day.strftime('%d.%m.%Y')}" f"Проверяем расписание для {group} на {target.strftime('%d.%m.%Y')}"
) )
found = await self._check_group_schedule(group, chat_id, day.day) if await self._check_group_schedule(group, chat_id, days_ahead):
if found:
found_any = True found_any = True
return not found_any # <-- вот так правильно return not found_any
async def _check_group_schedule(self, group: str, chat_id: int, day: int) -> bool: async def _check_group_schedule(self, group: str, chat_id: int, days_ahead: int) -> bool:
target = self._get_target_date_with_weekend_handling(days_ahead)
text, url, data_day, data_month = await self.schedule_service.get_schedule( text, url, data_day, data_month = await self.schedule_service.get_schedule(
group, day group, target.day
) )
if text and "не найдено" not in text.lower(): if not self.schedule_service.is_schedule_missing(text):
msg = await self.bot.send_message( msg = await self.bot.send_message(
chat_id, chat_id,
f"Авто-расписание для {group} на {data_day:02d}.{data_month:02d}\n\n{text}", (
parse_mode="Markdown", f"🔔 Авто-расписание для {group} "
f"на {data_day:02d}.{data_month:02d}\n\n{text}"
),
parse_mode="HTML",
) )
await self.bot.pin_chat_message( try:
chat_id, msg.message_id, disable_notification=False await self.bot.pin_chat_message(
chat_id, msg.message_id, disable_notification=False
)
except Exception as e:
logger.warning(f"Не удалось закрепить сообщение в {chat_id}: {e}")
return True
png, url, data_day, data_month = await self.schedule_service.get_pschedule(
group, 0
)
if png:
await self.bot.send_photo(
chat_id,
types.BufferedInputFile(png, filename=f"{group}.png"),
caption=(
f"🔔 АВАРИЙНЫЙ РЕЖИМ\n\n"
f"Авто-расписание для {group} "
f"на {data_day:02d}.{data_month:02d}"
),
) )
return True return True
else:
png, url, data_day, data_month = await self.schedule_service.get_pschedule(
group, day
)
if png:
await self.bot.send_photo(
chat_id,
types.BufferedInputFile(png, filename=f"{group}.png"),
caption=f"АВАРИЙНЫЙ РЕЖИМ\n\nАвто-расписание для {group} на {data_day:02d}.{data_month:02d}\n\nНайдено с ошибкой",
)
return True
return False return False
# clip_hash = hashlib.md5(clip_png).hexdigest()
# Логика проверки изменений и отправки сообщений
# ... (ваша существующая логика)
+4 -1
View File
@@ -1,6 +1,9 @@
import sqlite3 import sqlite3
from pathlib import Path
DIR = "/Users/mac/myfirstprogramm/storage/message.db" from config import Config
DIR = Path(__file__).resolve().parent / "message.db"
if __name__ == "__main__": if __name__ == "__main__":
db = sqlite3.connect(DIR) db = sqlite3.connect(DIR)
+19 -1
View File
@@ -1,7 +1,13 @@
from .DB import get_db from config import Config
def save_message(chat_id: int, message_id: int): def save_message(chat_id: int, message_id: int):
if Config.DISABLE_STORAGE:
return
if True:
return
from .DB import get_db
db = get_db() db = get_db()
cur = db.cursor() cur = db.cursor()
cur.execute("INSERT INTO message VALUES (?, ?)", (int(chat_id), int(message_id))) cur.execute("INSERT INTO message VALUES (?, ?)", (int(chat_id), int(message_id)))
@@ -11,6 +17,12 @@ def save_message(chat_id: int, message_id: int):
def load_messages(): def load_messages():
if Config.DISABLE_STORAGE:
return []
if True:
return []
from .DB import get_db
db = get_db() db = get_db()
cur = db.cursor() cur = db.cursor()
cur.execute("SELECT * FROM message") cur.execute("SELECT * FROM message")
@@ -21,6 +33,12 @@ def load_messages():
def clear_messages(): def clear_messages():
if Config.DISABLE_STORAGE:
return
if True:
return
from .DB import get_db
db = get_db() db = get_db()
cur = db.cursor() cur = db.cursor()
cur.execute("DELETE FROM message") cur.execute("DELETE FROM message")
+14 -9
View File
@@ -1,6 +1,11 @@
from .DB import get_db from config import Config
_DEFAULT_GROUP = "30тс"
def save_user(user_id: int, group: str = _DEFAULT_GROUP):
from .DB import get_db
def save_user(user_id: int, group: str = "30тс"):
db = get_db() db = get_db()
cur = db.cursor() cur = db.cursor()
cur.execute("INSERT INTO users (user_id, user_group) VALUES (?, ?)", (user_id, group)) cur.execute("INSERT INTO users (user_id, user_group) VALUES (?, ?)", (user_id, group))
@@ -8,19 +13,18 @@ def save_user(user_id: int, group: str = "30тс"):
cur.close() cur.close()
db.close() db.close()
def set_group(user_id: int, group: str = "30тс"):
def set_group(user_id: int, group: str = _DEFAULT_GROUP):
from .DB import get_db
db = get_db() db = get_db()
cur = db.cursor() cur = db.cursor()
# проверяем, есть ли пользователь
cur.execute("SELECT 1 FROM users WHERE user_id = ?", (user_id,)) cur.execute("SELECT 1 FROM users WHERE user_id = ?", (user_id,))
exists = cur.fetchone() exists = cur.fetchone()
if exists: if exists:
# если есть — обновляем группу
cur.execute("UPDATE users SET user_group = ? WHERE user_id = ?", (group, user_id)) cur.execute("UPDATE users SET user_group = ? WHERE user_id = ?", (group, user_id))
else: else:
# если нет — регистрируем нового пользователя
cur.execute("INSERT INTO users (user_id, user_group) VALUES (?, ?)", (user_id, group)) cur.execute("INSERT INTO users (user_id, user_group) VALUES (?, ?)", (user_id, group))
db.commit() db.commit()
@@ -28,7 +32,9 @@ def set_group(user_id: int, group: str = "30тс"):
db.close() db.close()
def get_group(user_id: int, default: str = "30тс") -> str: def get_group(user_id: int, default: str = _DEFAULT_GROUP) -> str:
from .DB import get_db
db = get_db() db = get_db()
cur = db.cursor() cur = db.cursor()
cur.execute("SELECT user_group FROM users WHERE user_id = ?", (user_id,)) cur.execute("SELECT user_group FROM users WHERE user_id = ?", (user_id,))
@@ -36,7 +42,6 @@ def get_group(user_id: int, default: str = "30тс") -> str:
if row: if row:
group = row[0] group = row[0]
else: else:
# если пользователя нет — регистрируем с дефолтной группой
cur.execute("INSERT INTO users (user_id, user_group) VALUES (?, ?)", (user_id, default)) cur.execute("INSERT INTO users (user_id, user_group) VALUES (?, ?)", (user_id, default))
db.commit() db.commit()
group = default group = default
+33
View File
@@ -0,0 +1,33 @@
import logging
from logging import CRITICAL, NullHandler, getLogger
from config import Config
def setup_logging() -> None:
"""Настройка логирования. При DISABLE_LOGGING — полное отключение."""
root = getLogger()
if Config.DISABLE_LOGGING:
root.handlers.clear()
root.addHandler(NullHandler())
root.setLevel(CRITICAL)
logging.disable(CRITICAL)
return
from logging import INFO, FileHandler, StreamHandler, basicConfig
handlers: list[logging.Handler] = [StreamHandler()]
if not Config.DISABLE_STORAGE:
log_path = Config.LOG_FILE
log_path.parent.mkdir(parents=True, exist_ok=True)
handlers.append(FileHandler(log_path, encoding="utf-8"))
basicConfig(
level=INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
handlers=handlers,
force=True,
)