110 lines
3.5 KiB
Python
110 lines
3.5 KiB
Python
from __future__ import annotations
|
|
|
|
import logging
|
|
import re
|
|
import ssl
|
|
from dataclasses import dataclass
|
|
from datetime import datetime
|
|
from typing import List, Optional
|
|
|
|
import aiohttp
|
|
import certifi
|
|
from bs4 import BeautifulSoup
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
DRIVE_FOLDER_EMBED = (
|
|
"https://drive.google.com/embeddedfolderview?id={folder_id}#list"
|
|
)
|
|
DRIVE_DOWNLOAD_URL = "https://drive.google.com/uc?export=download&id={file_id}"
|
|
USER_AGENT = (
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
|
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
|
)
|
|
|
|
|
|
def _drive_connector() -> aiohttp.TCPConnector:
|
|
ssl_context = ssl.create_default_context(cafile=certifi.where())
|
|
return aiohttp.TCPConnector(ssl=ssl_context)
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class DriveScheduleFile:
|
|
file_id: str
|
|
name: str
|
|
schedule_date: datetime
|
|
|
|
|
|
class DriveScheduleSource:
|
|
def __init__(self, folder_id: str):
|
|
self.folder_id = folder_id
|
|
self._files_cache: Optional[List[DriveScheduleFile]] = None
|
|
|
|
@staticmethod
|
|
def _parse_filename_date(name: str) -> Optional[datetime]:
|
|
match = re.match(
|
|
r"^(\d{2})\.(\d{2})\.(\d{2})\s+по\s+учащимся\.pdf$",
|
|
name.strip(),
|
|
re.IGNORECASE,
|
|
)
|
|
if not match:
|
|
return None
|
|
day, month, year = map(int, match.groups())
|
|
return datetime(2000 + year, month, day)
|
|
|
|
async def list_student_schedules(self, force_refresh: bool = False) -> List[DriveScheduleFile]:
|
|
if self._files_cache is not None and not force_refresh:
|
|
return self._files_cache
|
|
|
|
url = DRIVE_FOLDER_EMBED.format(folder_id=self.folder_id)
|
|
|
|
async with aiohttp.ClientSession(
|
|
headers={"User-Agent": USER_AGENT},
|
|
connector=_drive_connector(),
|
|
) as session:
|
|
async with session.get(url) as resp:
|
|
resp.raise_for_status()
|
|
html = await resp.text()
|
|
|
|
soup = BeautifulSoup(html, "html.parser")
|
|
files: List[DriveScheduleFile] = []
|
|
|
|
for entry in soup.select("div.flip-entry"):
|
|
entry_id = entry.get("id", "")
|
|
if not entry_id.startswith("entry-"):
|
|
continue
|
|
file_id = entry_id.removeprefix("entry-")
|
|
title_el = entry.select_one(".flip-entry-title")
|
|
if not title_el:
|
|
continue
|
|
name = title_el.get_text(strip=True)
|
|
schedule_date = self._parse_filename_date(name)
|
|
if schedule_date is None:
|
|
continue
|
|
files.append(DriveScheduleFile(file_id=file_id, name=name, schedule_date=schedule_date))
|
|
|
|
files.sort(key=lambda item: item.schedule_date)
|
|
self._files_cache = files
|
|
return files
|
|
|
|
async def find_for_date(self, target: datetime) -> Optional[DriveScheduleFile]:
|
|
files = await self.list_student_schedules()
|
|
for item in reversed(files):
|
|
if (
|
|
item.schedule_date.day == target.day
|
|
and item.schedule_date.month == target.month
|
|
and item.schedule_date.year == target.year
|
|
):
|
|
return item
|
|
return None
|
|
|
|
async def download_pdf(self, file_id: str) -> bytes:
|
|
url = DRIVE_DOWNLOAD_URL.format(file_id=file_id)
|
|
async with aiohttp.ClientSession(
|
|
headers={"User-Agent": USER_AGENT},
|
|
connector=_drive_connector(),
|
|
) as session:
|
|
async with session.get(url) as resp:
|
|
resp.raise_for_status()
|
|
return await resp.read()
|