diff --git a/.gitignore b/.gitignore index f7275bb..93526df 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ venv/ +__pycache__/ diff --git a/event.py b/event.py index 785dcb8..5e1394c 100644 --- a/event.py +++ b/event.py @@ -1,16 +1,16 @@ import datetime class Event: - def __init__(self, id: int, start: datetime.datetime, end: datetime.datetime, name: str, location: str, description: str): + def __init__(self, id: int, time: datetime.datetime, name: str, location: str, organizer: str, description: str): self.id = id - self.start = start - self.end = end + self.time = time self.name = name self.location = location + self.organizer = organizer self.description = description def __str__(self): return f'{self.name} ({self.id})' def __repr__(self): - return f'Event({self.id}, {self.start}, {self.name})' + return f'Event({self.id}, {self.time}, {self.name})' diff --git a/scraping.py b/scraping.py index 203f9a6..73a0f4a 100644 --- a/scraping.py +++ b/scraping.py @@ -1,21 +1,30 @@ +from typing import List from bs4 import BeautifulSoup import requests from operator import add from functools import reduce +import datetime +from event import Event -if __name__ == "__main__": +def get_soup() -> BeautifulSoup: r = requests.get("http://www.pvv.ntnu.no/hendelser/") soup = BeautifulSoup(r.text, "html.parser") + return soup + + +def process_soup(soup: BeautifulSoup) -> List[Event]: zips = [] events = soup.find_all("ul", "events") + for event in events: - times, places, organizers = zip(*(list( + times, locations, organizers = zip(*(list( map(lambda x: map(lambda y: y.find("strong").text, x), filter(lambda x: x != [], map(lambda x: x.find_all("li"), event.find_all("li"))))))) - titles = list(map(lambda x: x[0].text, + times = list(map(lambda x: datetime.datetime.strptime(x, "%A %d. %b %H.%M").replace(year=datetime.datetime.now().year), times)) + names = list(map(lambda x: x[0].text, filter(lambda x: x != [], map(lambda x: x.find_all("a"), event.find_all("li"))))) @@ -28,7 +37,20 @@ if __name__ == "__main__": if a['href'][:10] == '/hendelser': ids.append(int(a['href'][a['href'].find("=")+1:])) - zips.append(list(zip(ids, times, places, organizers, titles, descriptions))) + zips.append(list(zip(ids, times, names, organizers, locations, descriptions))) events = reduce(add, zips) + events = list(map(lambda x: Event(*x), events)) + return events + + +def get_events_today(events: List[Event]): + return list(filter(lambda e: e.time.date() == datetime.datetime.today().date(), events)) + + +if __name__ == "__main__": + print(get_events_today(process_soup(get_soup()))) + + +