it's version 0.7.2 Я используя нейронку создал новый парсер для гугл таблиц. Тем самым востановив работу программы
This commit is contained in:
@@ -0,0 +1,109 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import ssl
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from typing import List, Optional
|
||||
|
||||
import aiohttp
|
||||
import certifi
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DRIVE_FOLDER_EMBED = (
|
||||
"https://drive.google.com/embeddedfolderview?id={folder_id}#list"
|
||||
)
|
||||
DRIVE_DOWNLOAD_URL = "https://drive.google.com/uc?export=download&id={file_id}"
|
||||
USER_AGENT = (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
)
|
||||
|
||||
|
||||
def _drive_connector() -> aiohttp.TCPConnector:
|
||||
ssl_context = ssl.create_default_context(cafile=certifi.where())
|
||||
return aiohttp.TCPConnector(ssl=ssl_context)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DriveScheduleFile:
|
||||
file_id: str
|
||||
name: str
|
||||
schedule_date: datetime
|
||||
|
||||
|
||||
class DriveScheduleSource:
|
||||
def __init__(self, folder_id: str):
|
||||
self.folder_id = folder_id
|
||||
self._files_cache: Optional[List[DriveScheduleFile]] = None
|
||||
|
||||
@staticmethod
|
||||
def _parse_filename_date(name: str) -> Optional[datetime]:
|
||||
match = re.match(
|
||||
r"^(\d{2})\.(\d{2})\.(\d{2})\s+по\s+учащимся\.pdf$",
|
||||
name.strip(),
|
||||
re.IGNORECASE,
|
||||
)
|
||||
if not match:
|
||||
return None
|
||||
day, month, year = map(int, match.groups())
|
||||
return datetime(2000 + year, month, day)
|
||||
|
||||
async def list_student_schedules(self, force_refresh: bool = False) -> List[DriveScheduleFile]:
|
||||
if self._files_cache is not None and not force_refresh:
|
||||
return self._files_cache
|
||||
|
||||
url = DRIVE_FOLDER_EMBED.format(folder_id=self.folder_id)
|
||||
|
||||
async with aiohttp.ClientSession(
|
||||
headers={"User-Agent": USER_AGENT},
|
||||
connector=_drive_connector(),
|
||||
) as session:
|
||||
async with session.get(url) as resp:
|
||||
resp.raise_for_status()
|
||||
html = await resp.text()
|
||||
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
files: List[DriveScheduleFile] = []
|
||||
|
||||
for entry in soup.select("div.flip-entry"):
|
||||
entry_id = entry.get("id", "")
|
||||
if not entry_id.startswith("entry-"):
|
||||
continue
|
||||
file_id = entry_id.removeprefix("entry-")
|
||||
title_el = entry.select_one(".flip-entry-title")
|
||||
if not title_el:
|
||||
continue
|
||||
name = title_el.get_text(strip=True)
|
||||
schedule_date = self._parse_filename_date(name)
|
||||
if schedule_date is None:
|
||||
continue
|
||||
files.append(DriveScheduleFile(file_id=file_id, name=name, schedule_date=schedule_date))
|
||||
|
||||
files.sort(key=lambda item: item.schedule_date)
|
||||
self._files_cache = files
|
||||
return files
|
||||
|
||||
async def find_for_date(self, target: datetime) -> Optional[DriveScheduleFile]:
|
||||
files = await self.list_student_schedules()
|
||||
for item in reversed(files):
|
||||
if (
|
||||
item.schedule_date.day == target.day
|
||||
and item.schedule_date.month == target.month
|
||||
and item.schedule_date.year == target.year
|
||||
):
|
||||
return item
|
||||
return None
|
||||
|
||||
async def download_pdf(self, file_id: str) -> bytes:
|
||||
url = DRIVE_DOWNLOAD_URL.format(file_id=file_id)
|
||||
async with aiohttp.ClientSession(
|
||||
headers={"User-Agent": USER_AGENT},
|
||||
connector=_drive_connector(),
|
||||
) as session:
|
||||
async with session.get(url) as resp:
|
||||
resp.raise_for_status()
|
||||
return await resp.read()
|
||||
Reference in New Issue
Block a user