integrated Event-class, scrapes and gets events today
This commit is contained in:
parent
956013c7ca
commit
b7ed45b883
|
@ -1 +1,2 @@
|
||||||
venv/
|
venv/
|
||||||
|
__pycache__/
|
||||||
|
|
8
event.py
8
event.py
|
@ -1,16 +1,16 @@
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
class Event:
|
class Event:
|
||||||
def __init__(self, id: int, start: datetime.datetime, end: datetime.datetime, name: str, location: str, description: str):
|
def __init__(self, id: int, time: datetime.datetime, name: str, location: str, organizer: str, description: str):
|
||||||
self.id = id
|
self.id = id
|
||||||
self.start = start
|
self.time = time
|
||||||
self.end = end
|
|
||||||
self.name = name
|
self.name = name
|
||||||
self.location = location
|
self.location = location
|
||||||
|
self.organizer = organizer
|
||||||
self.description = description
|
self.description = description
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return f'{self.name} ({self.id})'
|
return f'{self.name} ({self.id})'
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return f'Event({self.id}, {self.start}, {self.name})'
|
return f'Event({self.id}, {self.time}, {self.name})'
|
||||||
|
|
30
scraping.py
30
scraping.py
|
@ -1,21 +1,30 @@
|
||||||
|
from typing import List
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import requests
|
import requests
|
||||||
from operator import add
|
from operator import add
|
||||||
from functools import reduce
|
from functools import reduce
|
||||||
|
import datetime
|
||||||
|
from event import Event
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def get_soup() -> BeautifulSoup:
|
||||||
r = requests.get("http://www.pvv.ntnu.no/hendelser/")
|
r = requests.get("http://www.pvv.ntnu.no/hendelser/")
|
||||||
soup = BeautifulSoup(r.text, "html.parser")
|
soup = BeautifulSoup(r.text, "html.parser")
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
||||||
|
def process_soup(soup: BeautifulSoup) -> List[Event]:
|
||||||
zips = []
|
zips = []
|
||||||
events = soup.find_all("ul", "events")
|
events = soup.find_all("ul", "events")
|
||||||
|
|
||||||
for event in events:
|
for event in events:
|
||||||
times, places, organizers = zip(*(list(
|
times, locations, organizers = zip(*(list(
|
||||||
map(lambda x: map(lambda y: y.find("strong").text, x),
|
map(lambda x: map(lambda y: y.find("strong").text, x),
|
||||||
filter(lambda x: x != [],
|
filter(lambda x: x != [],
|
||||||
map(lambda x: x.find_all("li"),
|
map(lambda x: x.find_all("li"),
|
||||||
event.find_all("li")))))))
|
event.find_all("li")))))))
|
||||||
titles = list(map(lambda x: x[0].text,
|
times = list(map(lambda x: datetime.datetime.strptime(x, "%A %d. %b %H.%M").replace(year=datetime.datetime.now().year), times))
|
||||||
|
names = list(map(lambda x: x[0].text,
|
||||||
filter(lambda x: x != [],
|
filter(lambda x: x != [],
|
||||||
map(lambda x: x.find_all("a"),
|
map(lambda x: x.find_all("a"),
|
||||||
event.find_all("li")))))
|
event.find_all("li")))))
|
||||||
|
@ -28,7 +37,20 @@ if __name__ == "__main__":
|
||||||
if a['href'][:10] == '/hendelser':
|
if a['href'][:10] == '/hendelser':
|
||||||
ids.append(int(a['href'][a['href'].find("=")+1:]))
|
ids.append(int(a['href'][a['href'].find("=")+1:]))
|
||||||
|
|
||||||
zips.append(list(zip(ids, times, places, organizers, titles, descriptions)))
|
zips.append(list(zip(ids, times, names, organizers, locations, descriptions)))
|
||||||
|
|
||||||
events = reduce(add, zips)
|
events = reduce(add, zips)
|
||||||
|
events = list(map(lambda x: Event(*x), events))
|
||||||
|
return events
|
||||||
|
|
||||||
|
|
||||||
|
def get_events_today(events: List[Event]):
|
||||||
|
return list(filter(lambda e: e.time.date() == datetime.datetime.today().date(), events))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print(get_events_today(process_soup(get_soup())))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue