This repository has been archived on 2024-07-04. You can view files and clone it, but cannot push or open issues or pull requests.

243 lines
7.6 KiB

import os
from gdata.books.service import BookService
import xml.parsers.expat
import readline
import re
import random
import pgdb
import sys
from fileformat import *
from util import *
exit_commands = ['exit', 'abort', 'quit', 'bye', 'eat flaming death', 'q']
encoding = 'utf8'
def google_suggest_book_data(dbconnection, tmp_file=False):
service = BookService(source='Programvareverkstedet - Worblehat - 0.1a ')
action_list = []
authors_added = {}
file_prefix = "suggestion"
filler = ' -------------------------------- '
print("# Enter ISBN number(s), end with eof <CTRL+D>")
for ISBN in sys.stdin:
ISBN = ISBN.strip()
if ISBN in exit_commands:
elif book_in_db(dbconnection, ISBN):
comment = "Book with ISBN: " + str(ISBN) + " is already in DB, skipped"
# First print a long comment line to separate books
new_book = filler + "Book: " + ISBN.strip() + filler
feed = service.search_by_keyword('isbn='+ISBN)
if feed.entry:
authors = parse_authors(dbconnection, feed.entry[0])
# For each author, check if author is already added as a new entry
# or already in DB, otherwise make an entry for a new author
for author in authors:
if author['id'] in authors_added:
comment = "Author already added when book: " + str(authors_added[author['id']]) + " was added"
elif not author_in_db(dbconnection, author):
comment = "Author: \"" + str(author) + "\" was not already in database"
d = build_author(author)
authors_added[author['id']] = ISBN
d = {}
if len(authors) == 0:
# TODO: test this
comment = "Didn't find any authors for book. !!IMPORTANT!! Add correct author and id in new-book section"
authors.append({'id':'NO_AUTHOR', 'firstname':'John', 'lastname':'Doe'})
d.update(build_book(feed.entry[0], authors, ISBN))
print("No items found")
if tmp_file:
return write_tmpfile(file_prefix, write_actionlist(action_list))
def book_in_db(dbconnection, isbn):
cursor = dbconnection.cursor()
query = "SELECT * FROM book WHERE isbn=%(num)s"
cursor.execute(query, {'num':isbn})
if cursor.rowcount > 0:
return True
return False
def author_in_db(dbconnection, author):
cursor = dbconnection.cursor()
query = "SELECT * FROM person WHERE firstname=%(fname)s OR lastname=%(lname)s"
cursor.execute(query, {'fname':author['firstname'], 'lname':author['lastname']})
if cursor.rowcount > 0:
return True
return False
def person_id_in_db(dbconnection, id):
cursor = dbconnection.cursor()
query = "SELECT * FROM person WHERE id=%(i)s"
cursor.execute(query, {'i':id})
if cursor.rowcount > 0:
return True
return False
def found_item(entry, indata):
print "Found: "+entry.dc_title[0].text
build_book(entry, indata)
def build_book(entry, authors, indata=False):
dic = entry.to_dict()
book = {}
book['action'] = 'new-book'
isbn = find_isbn(unicode(dic['identifiers']))
if isbn:
book['isbn'] = unicode(isbn)
elif indata:
if len(indata) == 13:
book['isbn'] = unicode(indata)
print("!!!isbn length not 13")
#comment("No ISBN found.")
print("No ISBN found.")
return False
book['title'] = unicode(entry.dc_title[0].text, encoding)
set_value(book, dic, 'category')
if len(entry.dc_title) > 0:
book['subtitle'] = unicode(''.join(map(lambda x: x.text, entry.dc_title[1:])), encoding)
book['persons'] = {}
book['persons']['author'] = [author['id'] for author in authors]
if 'publishers' in dic:
book['publisher'] = unicode(','.join(dic['publishers']), encoding)
if 'date' in dic:
book['published_year'] = int(dic['date'][:4])
set_value(book, dic, 'edition')
book['num_pages'] = find_page_number(dic)
set_value(book, dic, 'series')
book['description'] = unicode(find_description(dic), encoding)
book['references'] = {}
book['references']['google-books'] = [unicode(dic['preview'], encoding)]
return book
def set_value(book, dic, key):
if key in dic:
book[key] = unicode(dic[key], encoding)
book[key] = None
def find_description(dic):
if 'description' in dic:
return unescape(dic['description'])
return ""
def find_page_number(dic):
if 'format' in dic:
for item in dic['format']:
if 'pages' in item:
return int(re.findall(r'[0-9]+',item)[0])
return None
return None
def find_isbn(identifiers):
for pair in identifiers:
if pair[0] =='ISBN' and len(pair[1])==13:
return pair[1]
return False
def parse_authors(dbconnection, feed_entry):
dict = feed_entry.to_dict()
cursor = dbconnection.cursor()
author_list = []
if 'authors' in dict:
for author in dict['authors']:
# First look in db for matches
auth_q = "SELECT * FROM person WHERE firstname LIKE %(fname)s OR lastname=%(lname)s"
names = author.split()
first_name_wildcard = names[0] + '%'
last_name = names[len(names)-1]
cursor.execute(auth_q, {'fname':first_name_wildcard, 'lname':last_name})
match = fetchone_dict(cursor)
if match != None and 'id' in match:
# Otherwise make new id
newid = ''.join([i[0] for i in author.split()]).lower()
if person_id_in_db(dbconnection, newid):
i = "1"
while person_id_in_db(dbconnection, newid+i):
i = str(int(i) + 1)
newid = newid + i
first_name = names[0] + " ".join(names[1:len(names)-1])
new_author = {'id':newid, 'firstname':first_name, 'lastname':last_name}
return author_list
def build_author(new_author):
author = new_author
author['action'] = 'new-person'
return author
#Cargo-cult coded function to unescape special XML characters
def select_from_list(list, attributes=False, item_name=""):
if len(list) == 0:
return None
elif len(list) == 1:
return list[0]
return None
def unescape(s):
want_unicode = False
if isinstance(s, unicode):
s = s.encode("utf-8")
want_unicode = True
# the rest of this assumes that `s` is UTF-8
list = []
# create and initialize a parser object
p = xml.parsers.expat.ParserCreate("utf-8")
p.buffer_text = True
p.returns_unicode = want_unicode
p.CharacterDataHandler = list.append
# parse the data wrapped in a dummy element
# (needed so the "document" is well-formed)
p.Parse("<e>", 0)
p.Parse(s, 0)
p.Parse("</e>", 1)
# join the extracted strings and return
es = ""
if want_unicode:
es = u""
return es.join(list)