calendar-bot/scraping.py

57 lines
1.8 KiB
Python
Raw Normal View History

from typing import List
2023-08-17 22:52:36 +02:00
from bs4 import BeautifulSoup
import requests
from operator import add
from functools import reduce
import datetime
from event import Event
2023-08-17 22:52:36 +02:00
def get_soup() -> BeautifulSoup:
2023-08-17 22:52:36 +02:00
r = requests.get("http://www.pvv.ntnu.no/hendelser/")
soup = BeautifulSoup(r.text, "html.parser")
return soup
def process_soup(soup: BeautifulSoup) -> List[Event]:
2023-08-17 22:52:36 +02:00
zips = []
events = soup.find_all("ul", "events")
2023-08-17 22:52:36 +02:00
for event in events:
times, locations, organizers = zip(*(list(
2023-08-17 22:52:36 +02:00
map(lambda x: map(lambda y: y.find("strong").text, x),
filter(lambda x: x != [],
map(lambda x: x.find_all("li"),
event.find_all("li")))))))
times = list(map(lambda x: datetime.datetime.strptime(x, "%A %d. %b %H.%M").replace(year=datetime.datetime.now().year), times))
names = list(map(lambda x: x[0].text,
2023-08-17 22:52:36 +02:00
filter(lambda x: x != [],
map(lambda x: x.find_all("a"),
event.find_all("li")))))
descriptions = list(map(lambda x: x.text,
filter(lambda x: x is not None,
map(lambda x: x.find("p"),
event.find_all("li")))))
ids = []
for a in event.find_all("a", href=True):
if a['href'][:10] == '/hendelser':
ids.append(int(a['href'][a['href'].find("=")+1:]))
zips.append(list(zip(ids, times, names, organizers, locations, descriptions)))
2023-08-17 22:52:36 +02:00
events = reduce(add, zips)
events = list(map(lambda x: Event(*x), events))
return events
def get_events_today(events: List[Event]):
return list(filter(lambda e: e.time.date() == datetime.datetime.today().date(), events))
if __name__ == "__main__":
print(get_events_today(process_soup(get_soup())))
2023-08-17 22:52:36 +02:00