integrated Event-class, scrapes and gets events today

2023-08-17 23:32:37 +02:00 · 2023-08-17 23:32:37 +02:00 · b7ed45b883
parent 956013c7ca
commit b7ed45b883
3 changed files with 31 additions and 8 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1 +1,2 @@
 venv/
 __pycache__/
--- a/event.py
+++ b/event.py
@ -1,16 +1,16 @@
 import datetime
 class Event:
-    def __init__(self, id: int, start: datetime.datetime, end: datetime.datetime, name: str, location: str, description: str):
+    def __init__(self, id: int, time: datetime.datetime, name: str, location: str, organizer: str, description: str):
        self.id = id
-        self.start = start
+        self.time = time
        self.end = end
        self.name = name
        self.location = location
        self.organizer = organizer
        self.description = description
    def __str__(self):
        return f'{self.name} ({self.id})'
    def __repr__(self):
-        return f'Event({self.id}, {self.start}, {self.name})'
+        return f'Event({self.id}, {self.time}, {self.name})'
--- a/scraping.py
+++ b/scraping.py
@ -1,21 +1,30 @@
 from typing import List
 from bs4 import BeautifulSoup
 import requests
 from operator import add
 from functools import reduce
 import datetime
 from event import Event
-if __name__ == "__main__":
+def get_soup() -> BeautifulSoup:
    r = requests.get("http://www.pvv.ntnu.no/hendelser/")
    soup = BeautifulSoup(r.text, "html.parser")
    return soup 
 def process_soup(soup: BeautifulSoup) -> List[Event]:
    zips = []
    events = soup.find_all("ul", "events")
    for event in events:
-        times, places, organizers = zip(*(list(
+        times, locations, organizers = zip(*(list(
            map(lambda x: map(lambda y: y.find("strong").text, x), 
                filter(lambda x: x != [], 
                       map(lambda x: x.find_all("li"), 
                           event.find_all("li")))))))
-        titles = list(map(lambda x: x[0].text, 
+        times = list(map(lambda x: datetime.datetime.strptime(x, "%A %d. %b %H.%M").replace(year=datetime.datetime.now().year), times))
        names = list(map(lambda x: x[0].text, 
                     filter(lambda x: x != [], 
                            map(lambda x: x.find_all("a"), 
                                event.find_all("li")))))
@ -28,7 +37,20 @@ if __name__ == "__main__":
            if a['href'][:10] == '/hendelser':
                ids.append(int(a['href'][a['href'].find("=")+1:]))
-        zips.append(list(zip(ids, times, places, organizers, titles, descriptions)))
+        zips.append(list(zip(ids, times, names, organizers, locations, descriptions)))
    events = reduce(add, zips)
    events = list(map(lambda x: Event(*x), events))
    return events
 def get_events_today(events: List[Event]):
    return list(filter(lambda e: e.time.date() == datetime.datetime.today().date(), events))
 if __name__ == "__main__":
    print(get_events_today(process_soup(get_soup())))