7 Commits

18 changed files with 157 additions and 182 deletions
+4 -1
View File
@@ -3,7 +3,7 @@ debug = true
debug_sql = false
[database]
# One of (sqlite, postgres)
# One of (sqlite, postgresql)
type = 'sqlite'
[database.sqlite]
@@ -38,3 +38,6 @@ dryrun = false
warn_days_before_borrowing_deadline = [ 5, 1 ]
days_before_queue_position_expires = 14
warn_days_before_expiring_queue_position_deadline = [ 3, 1 ]
[general]
quit_allowed = true
Generated
+4 -4
View File
@@ -7,11 +7,11 @@
]
},
"locked": {
"lastModified": 1769338528,
"narHash": "sha256-t18ZoSt9kaI1yde26ok5s7aFLkap1Q9+/2icVh2zuaE=",
"lastModified": 1780178524,
"narHash": "sha256-2PcNyNqbGCWBpAMdCU1HxSQmhQiG6evdjxVnPA7w5bQ=",
"ref": "refs/heads/main",
"rev": "7218348163fd8d84df4a6f682c634793e67a3fed",
"revCount": 13,
"rev": "2406de41ce9d0a1404cbf4e55537e3f720f37f23",
"revCount": 15,
"type": "git",
"url": "https://git.pvv.ntnu.no/Projects/libdib.git"
},
@@ -1,4 +1,5 @@
from dataclasses import dataclass
from typing import Any
# TODO: Add more languages
LANGUAGES: set[str] = {
@@ -19,12 +20,14 @@ LANGUAGES: set[str] = {
@dataclass
class BookMetadata:
"""A class representing metadata for a book."""
class BookData:
"""
A class representing metadata for a book that we might want to fetch from external sources
"""
isbn: str
title: str
# The source of the metadata provider
# ID of the data fetcher used to fetch this instance
source: str
authors: set[str]
language: str | None
@@ -32,11 +35,11 @@ class BookMetadata:
num_pages: int | None
subjects: set[str]
def to_dict(self) -> dict[str, any]:
def to_dict(self) -> dict[str, Any]:
return {
"isbn": self.isbn,
"title": self.title,
"source": self.metadata_source_id(),
"source": self.source,
"authors": set() if self.authors is None else self.authors,
"language": self.language,
"publish_date": self.publish_date,
@@ -0,0 +1,22 @@
# base fetcher.
from abc import ABC, abstractmethod
from .BookData import BookData
class BookDataFetcher(ABC):
"""
A base class for adapters that fetch book data from external sources.
"""
@classmethod
@abstractmethod
def fetcher_id(cls) -> str:
"""Returns a unique identifier for this specific fetcher, to identify where the data came from."""
pass
@classmethod
@abstractmethod
def try_fetch_data(cls, isbn: str) -> BookData | None:
"""Tries to fetch data for the given ISBN."""
pass
@@ -0,0 +1,3 @@
from .book_data_fetcher import fetch_book_data_from_multiple_sources
__all__ = ["fetch_book_data_from_multiple_sources"]
@@ -0,0 +1,72 @@
"""
this module contains the fetch_book_data_from_multiple_sources() function which combines all fetchers and returns ranked results (if any)
"""
from concurrent.futures import ThreadPoolExecutor
from worblehat.book_data_fetchers.BookData import BookData
from worblehat.book_data_fetchers.BookDataFetcher import BookDataFetcher
from worblehat.book_data_fetchers.fetchers.GoogleBooksFetcher import GoogleBooksFetcher
from worblehat.book_data_fetchers.fetchers.OpenLibraryFetcher import OpenLibraryFetcher
from worblehat.book_data_fetchers.fetchers.OutlandScraperFetcher import (
OutlandScraperFetcher,
)
# The order of these fetchers determines the priority of the sources.
# The first fetcher in the list has the highest priority.
FETCHERS: list[BookDataFetcher] = [
OpenLibraryFetcher,
GoogleBooksFetcher,
OutlandScraperFetcher,
]
FETCHER_SOURCE_IDS: list[str] = [fetcher.fetcher_id() for fetcher in FETCHERS]
def sort_data_by_priority(data: list[BookData]) -> list[BookData]:
"""
Sorts the given data by the priority of the sources.
The order of the data is the same as the order of the sources in the FETCHERS list.
"""
# Note that this function is O(n^2) but the number of fetchers is small so it's fine.
return sorted(data, key=lambda m: FETCHER_SOURCE_IDS.index(m.source))
def fetch_book_data_from_multiple_sources(isbn: str, strict: bool=False) -> list[BookData]:
"""
Returns a list of data fetched from multiple fetchers.
Fetchers that are not able to retrieve any data for the given ISBN will be ignored.
There is no guarantee that there will be any book data.
The results are always ordered in the same way as the fetchers are listed in the FETCHERS list.
"""
isbn = isbn.replace("-", "").replace("_", "").strip().lower()
if len(isbn) != 10 and len(isbn) != 13 and not isbn.isnumeric():
raise ValueError("Invalid ISBN")
results: list[BookData] = []
with ThreadPoolExecutor() as executor:
futures = [executor.submit(fetcher.try_fetch_data, isbn) for fetcher in FETCHERS]
for future in futures:
result = future.result()
if result is not None:
results.append(result)
for result in results:
try:
result.validate()
except ValueError as e:
if strict:
raise e
print(f"Invalid data: {e}")
results.remove(result)
return sort_data_by_priority(results)
@@ -4,17 +4,17 @@ A BookMetadataFetcher for the Google Books API.
import requests
from worblehat.services.metadata_fetchers.BookMetadata import BookMetadata
from worblehat.services.metadata_fetchers.BookMetadataFetcher import BookMetadataFetcher
from worblehat.book_data_fetchers.BookData import BookData
from worblehat.book_data_fetchers.BookDataFetcher import BookDataFetcher
class GoogleBooksFetcher(BookMetadataFetcher):
class GoogleBooksFetcher(BookDataFetcher):
@classmethod
def metadata_source_id(_cls) -> str:
def fetcher_id(_cls) -> str:
return "google_books"
@classmethod
def fetch_metadata(cls, isbn: str) -> BookMetadata | None:
def try_fetch_data(cls, isbn: str) -> BookData | None:
try:
jsonInput = requests.get(
"https://www.googleapis.com/books/v1/volumes",
@@ -33,19 +33,13 @@ class GoogleBooksFetcher(BookMetadataFetcher):
except Exception:
return None
return BookMetadata(
return BookData(
isbn=isbn,
title=title,
source=cls.metadata_source_id(),
source=cls.fetcher_id(),
authors=authors,
language=languages,
publish_date=publishDate,
num_pages=numberOfPages,
subjects=subjects,
)
if __name__ == "__main__":
book_data = GoogleBooksFetcher.fetch_metadata("0132624788")
book_data.validate()
print(book_data)
@@ -4,21 +4,21 @@ A BookMetadataFetcher for the Open Library API.
import requests
from worblehat.services.metadata_fetchers.BookMetadata import BookMetadata
from worblehat.services.metadata_fetchers.BookMetadataFetcher import BookMetadataFetcher
from worblehat.book_data_fetchers.BookData import BookData
from worblehat.book_data_fetchers.BookDataFetcher import BookDataFetcher
LANGUAGE_MAP = {
"Norwegian": "no",
}
class OpenLibraryFetcher(BookMetadataFetcher):
class OpenLibraryFetcher(BookDataFetcher):
@classmethod
def metadata_source_id(_cls) -> str:
def fetcher_id(_cls) -> str:
return "open_library"
@classmethod
def fetch_metadata(cls, isbn: str) -> BookMetadata | None:
def try_fetch_data(cls, isbn: str) -> BookData | None:
try:
jsonInput = requests.get(f"https://openlibrary.org/isbn/{isbn}.json").json()
@@ -48,19 +48,13 @@ class OpenLibraryFetcher(BookMetadataFetcher):
except Exception:
return None
return BookMetadata(
return BookData(
isbn=isbn,
title=title,
source=cls.metadata_source_id(),
source=cls.fetcher_id(),
authors=author_names,
language=language,
publish_date=publishDate,
num_pages=numberOfPages,
subjects=subjects,
)
if __name__ == "__main__":
book_data = OpenLibraryFetcher.fetch_metadata("9788205530751")
book_data.validate()
print(book_data)
@@ -5,8 +5,8 @@ A BookMetadataFetcher that webscrapes https://outland.no/
import requests
from bs4 import BeautifulSoup
from worblehat.services.metadata_fetchers.BookMetadata import BookMetadata
from worblehat.services.metadata_fetchers.BookMetadataFetcher import BookMetadataFetcher
from worblehat.book_data_fetchers.BookData import BookData
from worblehat.book_data_fetchers.BookDataFetcher import BookDataFetcher
LANGUAGE_MAP = {
"Norsk": "no",
@@ -25,13 +25,13 @@ LANGUAGE_MAP = {
}
class OutlandScraperFetcher(BookMetadataFetcher):
class OutlandScraperFetcher(BookDataFetcher):
@classmethod
def metadata_source_id(_cls) -> str:
def fetcher_id(_cls) -> str:
return "outland_scraper"
@classmethod
def fetch_metadata(cls, isbn: str) -> BookMetadata | None:
def try_fetch_data(cls, isbn: str) -> BookData | None:
try:
# Find the link to the product page
response = requests.get(f"https://outland.no/{isbn}")
@@ -89,19 +89,13 @@ class OutlandScraperFetcher(BookMetadataFetcher):
except Exception:
return None
return BookMetadata(
return BookData(
isbn=isbn,
title=bookData.get("Title"),
source=cls.metadata_source_id(),
source=cls.fetcher_id(),
authors=bookData.get("Authors"),
language=bookData.get("Language"),
publish_date=bookData.get("PublishDate"),
num_pages=bookData.get("NumberOfPages"),
subjects=bookData.get("Subjects"),
)
if __name__ == "__main__":
book_data = OutlandScraperFetcher.fetch_metadata("9781947808225")
book_data.validate()
print(book_data)
+13 -18
View File
@@ -16,6 +16,7 @@ from worblehat.models import *
from worblehat.services import (
create_bookcase_item_from_isbn,
is_valid_isbn,
Config,
)
from .subclis import (
@@ -47,26 +48,13 @@ class WorblehatCli(NumberedCmd):
self.sql_session_dirty = False
self.prompt_header = None
@classmethod
def run_with_safe_exit_wrapper(cls, sql_session: Session) -> None:
tool = cls(sql_session)
def run_with_safe_exit_wrapper(self) -> None:
while True:
try:
tool.cmdloop()
self.cmdloop()
except KeyboardInterrupt:
if not tool.sql_session_dirty:
exit(0)
try:
print()
if prompt_yes_no(
"Are you sure you want to exit without saving?",
default=False,
):
raise KeyboardInterrupt
except KeyboardInterrupt:
if tool.sql_session is not None:
tool.sql_session.rollback()
exit(0)
print("\n\n-----------------\n")
self.do_exit("Exit")
def do_show_bookcase(self, arg: str) -> None:
bookcase_selector = InteractiveItemSelector(
@@ -75,6 +63,8 @@ class WorblehatCli(NumberedCmd):
)
bookcase_selector.cmdloop()
bookcase = bookcase_selector.result
if bookcase == None:
return
for shelf in bookcase.shelfs:
print(shelf.short_str())
@@ -138,6 +128,8 @@ class WorblehatCli(NumberedCmd):
)
bookcase_selector.cmdloop()
bookcase = bookcase_selector.result
if bookcase == None:
return
bookcase_item.shelf = select_bookcase_shelf(bookcase, self.sql_session)
@@ -152,6 +144,8 @@ class WorblehatCli(NumberedCmd):
media_type_selector.cmdloop()
bookcase_item.media_type = media_type_selector.result
if bookcase_item.media_type == None:
return
username = input("Who owns this book? [PVV]> ")
if username != "":
@@ -240,7 +234,8 @@ class WorblehatCli(NumberedCmd):
self.sql_session.commit()
else:
self.sql_session.rollback()
exit(0)
if Config["general.quit_allowed"]:
exit(0)
funcs = {
0: {
@@ -384,6 +384,8 @@ class EditBookcaseCli(NumberedCmd):
)
bookcase_selector.cmdloop()
bookcase = bookcase_selector.result
if bookcase == None:
return
assert isinstance(bookcase, Bookcase)
shelf = select_bookcase_shelf(bookcase, self.sql_session)
+2 -1
View File
@@ -65,7 +65,8 @@ def main() -> None:
if args.command == "cli":
sql_session = _connect_to_database(echo=Config["logging.debug_sql"])
WorblehatCli.run_with_safe_exit_wrapper(sql_session)
worblehat = WorblehatCli(sql_session)
worblehat.run_with_safe_exit_wrapper()
exit(0)
if args.command == "create-db":
+3 -2
View File
@@ -2,12 +2,13 @@ import isbnlib
from sqlalchemy import select
from sqlalchemy.orm import Session
from worblehat.book_data_fetchers import fetch_book_data_from_multiple_sources
from ..models import (
Author,
BookcaseItem,
Language,
)
from .metadata_fetchers import fetch_metadata_from_multiple_sources
def is_valid_pvv_isbn(isbn: str) -> bool:
@@ -41,7 +42,7 @@ def create_bookcase_item_from_isbn(
Please not that the returned BookcaseItem will likely not be fully populated with the required
data, such as the book's location in the library, and the owner of the book, etc.
"""
metadata = fetch_metadata_from_multiple_sources(isbn)
metadata = fetch_book_data_from_multiple_sources(isbn)
if len(metadata) == 0:
return None
+2 -6
View File
@@ -2,7 +2,7 @@ import tomllib
from pathlib import Path
from pprint import pformat
from typing import Any
import os
class Config:
"""
@@ -38,14 +38,10 @@ class Config:
@staticmethod
def read_password(password_field: str) -> str:
file: Path = Path(password_field)
if file.is_file() and any([file.stat().st_mode & 0o400 and file.stat().st_uid == os.getuid(), file.stat().st_mode & 0o040 and file.stat().st_gid == os.getgid(), file.stat().st_mode & 0o004]):
if Path(password_field).is_file():
with Path(password_field).open() as f:
return f.read().strip()
else:
raise RuntimeError(
f"Testing, should only use file. {password_field}",
)
return password_field
@classmethod
@@ -1,22 +0,0 @@
# base fetcher.
from abc import ABC, abstractmethod
from .BookMetadata import BookMetadata
class BookMetadataFetcher(ABC):
"""
A base class for metadata fetchers.
"""
@classmethod
@abstractmethod
def metadata_source_id(cls) -> str:
"""Returns a unique identifier for the metadata source, to identify where the metadata came from."""
pass
@classmethod
@abstractmethod
def fetch_metadata(cls, isbn: str) -> BookMetadata | None:
"""Tries to fetch metadata for the given ISBN."""
pass
@@ -1,3 +0,0 @@
from .book_metadata_fetcher import fetch_metadata_from_multiple_sources
__all__ = ["fetch_metadata_from_multiple_sources"]
@@ -1,80 +0,0 @@
"""
this module contains the fetch_book_metadata() function which fetches book metadata from multiple sources in threads and returns the higest ranked non-None result.
"""
from concurrent.futures import ThreadPoolExecutor
from worblehat.services.metadata_fetchers.BookMetadata import BookMetadata
from worblehat.services.metadata_fetchers.BookMetadataFetcher import BookMetadataFetcher
from worblehat.services.metadata_fetchers.GoogleBooksFetcher import GoogleBooksFetcher
from worblehat.services.metadata_fetchers.OpenLibraryFetcher import OpenLibraryFetcher
from worblehat.services.metadata_fetchers.OutlandScraperFetcher import (
OutlandScraperFetcher,
)
# The order of these fetchers determines the priority of the sources.
# The first fetcher in the list has the highest priority.
FETCHERS: list[BookMetadataFetcher] = [
OpenLibraryFetcher,
GoogleBooksFetcher,
OutlandScraperFetcher,
]
FETCHER_SOURCE_IDS: list[str] = [fetcher.metadata_source_id() for fetcher in FETCHERS]
def sort_metadata_by_priority(metadata: list[BookMetadata]) -> list[BookMetadata]:
"""
Sorts the given metadata by the priority of the sources.
The order of the metadata is the same as the order of the sources in the FETCHERS list.
"""
# Note that this function is O(n^2) but the number of fetchers is small so it's fine.
return sorted(metadata, key=lambda m: FETCHER_SOURCE_IDS.index(m.source))
def fetch_metadata_from_multiple_sources(isbn: str, strict: bool=False) -> list[BookMetadata]:
"""
Returns a list of metadata fetched from multiple sources.
Sources that does not have metadata for the given ISBN will be ignored.
There is no guarantee that there will be any metadata.
The results are always ordered in the same way as the fetchers are listed in the FETCHERS list.
"""
isbn = isbn.replace("-", "").replace("_", "").strip().lower()
if len(isbn) != 10 and len(isbn) != 13 and not isbn.isnumeric():
raise ValueError("Invalid ISBN")
results: list[BookMetadata] = []
with ThreadPoolExecutor() as executor:
futures = [executor.submit(fetcher.fetch_metadata, isbn) for fetcher in FETCHERS]
for future in futures:
result = future.result()
if result is not None:
results.append(result)
for result in results:
try:
result.validate()
except ValueError as e:
if strict:
raise e
print(f"Invalid metadata: {e}")
results.remove(result)
return sort_metadata_by_priority(results)
if __name__ == "__main__":
from pprint import pprint
isbn = "0132624788"
metadata = fetch_metadata_from_multiple_sources(isbn)
pprint(metadata)