2011-10-08 16:08:15 +02:00
|
|
|
#!/usr/bin/python
|
|
|
|
|
|
|
|
import os
|
|
|
|
from gdata.books.service import BookService
|
|
|
|
import xml.parsers.expat
|
|
|
|
import readline
|
|
|
|
import re
|
|
|
|
import random
|
|
|
|
import pgdb
|
|
|
|
import sys
|
|
|
|
from fileformat import *
|
|
|
|
from util import *
|
|
|
|
|
|
|
|
exit_commands = ['exit', 'abort', 'quit', 'bye', 'eat flaming death', 'q']
|
2011-10-08 21:41:57 +02:00
|
|
|
encoding = 'utf8'
|
|
|
|
|
2011-10-09 17:02:59 +02:00
|
|
|
def google_suggest_book_data(dbconnection, tmp_file=False):
|
2011-10-08 16:08:15 +02:00
|
|
|
service = BookService(source='Programvareverkstedet - Worblehat - 0.1a ')
|
|
|
|
action_list = []
|
2011-10-08 21:20:02 +02:00
|
|
|
authors_added = {}
|
|
|
|
file_prefix = "suggestion"
|
2011-10-09 18:13:20 +02:00
|
|
|
filler = ' -------------------------------- '
|
2011-10-09 15:18:36 +02:00
|
|
|
print("# Enter ISBN number(s), end with eof <CTRL+D>")
|
2011-10-08 21:20:02 +02:00
|
|
|
for ISBN in sys.stdin:
|
2011-10-09 15:32:11 +02:00
|
|
|
ISBN = ISBN.strip()
|
|
|
|
if ISBN in exit_commands:
|
2011-10-08 16:08:15 +02:00
|
|
|
print("aborted")
|
|
|
|
break
|
2011-10-08 21:20:02 +02:00
|
|
|
elif book_in_db(dbconnection, ISBN):
|
2011-10-09 17:02:59 +02:00
|
|
|
comment = "Book with ISBN: " + str(ISBN) + " is already in DB, skipped"
|
|
|
|
action_list.append(comment)
|
2011-10-08 16:08:15 +02:00
|
|
|
else:
|
2011-10-08 21:20:02 +02:00
|
|
|
# First print a long comment line to separate books
|
|
|
|
new_book = filler + "Book: " + ISBN.strip() + filler
|
|
|
|
action_list.append(new_book)
|
|
|
|
feed = service.search_by_keyword('isbn='+ISBN)
|
2011-10-08 16:08:15 +02:00
|
|
|
if feed.entry:
|
2011-10-08 19:11:34 +02:00
|
|
|
authors = parse_authors(dbconnection, feed.entry[0])
|
2011-10-08 21:20:02 +02:00
|
|
|
# For each author, check if author is already added as a new entry
|
|
|
|
# or already in DB, otherwise make an entry for a new author
|
2011-10-08 19:11:34 +02:00
|
|
|
for author in authors:
|
2011-10-08 21:20:02 +02:00
|
|
|
if author['id'] in authors_added:
|
|
|
|
comment = "Author already added when book: " + str(authors_added[author['id']]) + " was added"
|
|
|
|
action_list.append(comment)
|
|
|
|
elif not author_in_db(dbconnection, author):
|
|
|
|
comment = "Author: \"" + str(author) + "\" was not already in database"
|
|
|
|
d = build_author(author)
|
|
|
|
d.update({'comment':comment})
|
|
|
|
action_list.append(d)
|
|
|
|
authors_added[author['id']] = ISBN
|
|
|
|
d = {}
|
2011-10-08 19:11:34 +02:00
|
|
|
if len(authors) == 0:
|
2011-10-08 21:20:02 +02:00
|
|
|
# TODO: test this
|
|
|
|
comment = "Didn't find any authors for book. !!IMPORTANT!! Add correct author and id in new-book section"
|
|
|
|
d.update({'comment':comment})
|
2011-10-08 19:11:34 +02:00
|
|
|
authors.append({'id':'NO_AUTHOR', 'firstname':'John', 'lastname':'Doe'})
|
2011-10-08 21:20:02 +02:00
|
|
|
d.update(build_book(feed.entry[0], authors, ISBN))
|
|
|
|
action_list.append(d)
|
2011-10-08 19:11:34 +02:00
|
|
|
else:
|
|
|
|
print("No items found")
|
2011-10-08 16:08:15 +02:00
|
|
|
|
|
|
|
if tmp_file:
|
2011-10-09 18:13:20 +02:00
|
|
|
return write_tmpfile(file_prefix, write_actionlist(action_list))
|
2011-10-08 16:08:15 +02:00
|
|
|
else:
|
|
|
|
print(write_actionlist(action_list))
|
|
|
|
|
|
|
|
def book_in_db(dbconnection, isbn):
|
|
|
|
cursor = dbconnection.cursor()
|
2011-10-08 19:11:34 +02:00
|
|
|
query = "SELECT * FROM book WHERE isbn=%(num)s"
|
|
|
|
cursor.execute(query, {'num':isbn})
|
|
|
|
if cursor.rowcount > 0:
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
def author_in_db(dbconnection, author):
|
|
|
|
cursor = dbconnection.cursor()
|
|
|
|
query = "SELECT * FROM person WHERE firstname=%(fname)s OR lastname=%(lname)s"
|
|
|
|
cursor.execute(query, {'fname':author['firstname'], 'lname':author['lastname']})
|
|
|
|
if cursor.rowcount > 0:
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
def person_id_in_db(dbconnection, id):
|
|
|
|
cursor = dbconnection.cursor()
|
|
|
|
query = "SELECT * FROM person WHERE id=%(i)s"
|
|
|
|
cursor.execute(query, {'i':id})
|
|
|
|
if cursor.rowcount > 0:
|
|
|
|
return True
|
|
|
|
return False
|
2011-10-08 16:08:15 +02:00
|
|
|
|
|
|
|
def found_item(entry, indata):
|
|
|
|
print "Found: "+entry.dc_title[0].text
|
|
|
|
build_book(entry, indata)
|
|
|
|
|
2011-10-08 19:11:34 +02:00
|
|
|
def build_book(entry, authors, indata=False):
|
2011-10-08 16:08:15 +02:00
|
|
|
dic = entry.to_dict()
|
|
|
|
book = {}
|
|
|
|
|
|
|
|
book['action'] = 'new-book'
|
|
|
|
|
|
|
|
isbn = find_isbn(unicode(dic['identifiers']))
|
|
|
|
if isbn:
|
2011-10-09 15:32:11 +02:00
|
|
|
book['isbn'] = unicode(isbn)
|
2011-10-08 16:08:15 +02:00
|
|
|
elif indata:
|
2011-10-09 15:32:11 +02:00
|
|
|
if len(indata) == 13:
|
|
|
|
book['isbn'] = unicode(indata)
|
|
|
|
else:
|
|
|
|
print("!!!isbn length not 13")
|
2011-10-08 16:08:15 +02:00
|
|
|
else:
|
2011-10-09 15:32:11 +02:00
|
|
|
#comment("No ISBN found.")
|
|
|
|
print("No ISBN found.")
|
|
|
|
return False
|
|
|
|
|
2011-10-08 21:41:57 +02:00
|
|
|
book['title'] = unicode(entry.dc_title[0].text, encoding)
|
2011-10-08 16:08:15 +02:00
|
|
|
|
|
|
|
set_value(book, dic, 'category')
|
|
|
|
|
|
|
|
if len(entry.dc_title) > 0:
|
2011-10-08 21:41:57 +02:00
|
|
|
book['subtitle'] = unicode(''.join(map(lambda x: x.text, entry.dc_title[1:])), encoding)
|
2011-10-08 16:08:15 +02:00
|
|
|
|
|
|
|
book['persons'] = {}
|
|
|
|
book['persons']['author'] = [author['id'] for author in authors]
|
|
|
|
|
|
|
|
if 'publishers' in dic:
|
2011-10-08 21:41:57 +02:00
|
|
|
book['publisher'] = unicode(','.join(dic['publishers']), encoding)
|
2011-10-08 16:08:15 +02:00
|
|
|
|
|
|
|
if 'date' in dic:
|
|
|
|
book['published_year'] = int(dic['date'][:4])
|
|
|
|
|
|
|
|
set_value(book, dic, 'edition')
|
|
|
|
|
|
|
|
book['num_pages'] = find_page_number(dic)
|
|
|
|
|
|
|
|
set_value(book, dic, 'series')
|
|
|
|
|
2011-10-08 21:41:57 +02:00
|
|
|
book['description'] = unicode(find_description(dic), encoding)
|
2011-10-08 16:08:15 +02:00
|
|
|
|
|
|
|
book['references'] = {}
|
2011-10-08 21:41:57 +02:00
|
|
|
book['references']['google-books'] = [unicode(dic['preview'], encoding)]
|
2011-10-08 16:08:15 +02:00
|
|
|
|
|
|
|
|
2011-10-09 15:32:11 +02:00
|
|
|
return book
|
2011-10-08 16:08:15 +02:00
|
|
|
|
|
|
|
def set_value(book, dic, key):
|
|
|
|
if key in dic:
|
2011-10-08 21:41:57 +02:00
|
|
|
book[key] = unicode(dic[key], encoding)
|
2011-10-08 16:08:15 +02:00
|
|
|
else:
|
|
|
|
book[key] = None
|
|
|
|
|
|
|
|
|
|
|
|
def find_description(dic):
|
|
|
|
if 'description' in dic:
|
|
|
|
return unescape(dic['description'])
|
|
|
|
else:
|
|
|
|
return ""
|
|
|
|
|
|
|
|
def find_page_number(dic):
|
|
|
|
if 'format' in dic:
|
|
|
|
for item in dic['format']:
|
|
|
|
if 'pages' in item:
|
|
|
|
return int(re.findall(r'[0-9]+',item)[0])
|
|
|
|
return None
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
|
|
|
|
def find_isbn(identifiers):
|
|
|
|
for pair in identifiers:
|
|
|
|
if pair[0] =='ISBN' and len(pair[1])==13:
|
|
|
|
return pair[1]
|
|
|
|
return False
|
|
|
|
|
2011-10-08 19:11:34 +02:00
|
|
|
def parse_authors(dbconnection, feed_entry):
|
|
|
|
dict = feed_entry.to_dict()
|
|
|
|
cursor = dbconnection.cursor()
|
|
|
|
author_list = []
|
|
|
|
if 'authors' in dict:
|
|
|
|
for author in dict['authors']:
|
|
|
|
# First look in db for matches
|
|
|
|
auth_q = "SELECT * FROM person WHERE firstname LIKE %(fname)s OR lastname=%(lname)s"
|
|
|
|
names = author.split()
|
|
|
|
first_name_wildcard = names[0] + '%'
|
|
|
|
last_name = names[len(names)-1]
|
|
|
|
cursor.execute(auth_q, {'fname':first_name_wildcard, 'lname':last_name})
|
|
|
|
match = fetchone_dict(cursor)
|
|
|
|
if match != None and 'id' in match:
|
|
|
|
author_list.append(match)
|
|
|
|
continue
|
|
|
|
# Otherwise make new id
|
|
|
|
newid = ''.join([i[0] for i in author.split()]).lower()
|
|
|
|
if person_id_in_db(dbconnection, newid):
|
|
|
|
i = "1"
|
|
|
|
while person_id_in_db(dbconnection, newid+i):
|
|
|
|
i = str(int(i) + 1)
|
|
|
|
newid = newid + i
|
|
|
|
first_name = names[0] + " ".join(names[1:len(names)-1])
|
|
|
|
new_author = {'id':newid, 'firstname':first_name, 'lastname':last_name}
|
|
|
|
author_list.append(new_author)
|
|
|
|
return author_list
|
|
|
|
|
|
|
|
def build_author(new_author):
|
|
|
|
author = new_author
|
|
|
|
author['action'] = 'new-person'
|
|
|
|
|
|
|
|
return author
|
|
|
|
|
|
|
|
|
2011-10-08 16:08:15 +02:00
|
|
|
#Cargo-cult coded function to unescape special XML characters
|
|
|
|
|
|
|
|
def select_from_list(list, attributes=False, item_name=""):
|
|
|
|
if len(list) == 0:
|
|
|
|
return None
|
|
|
|
elif len(list) == 1:
|
|
|
|
return list[0]
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
|
|
|
|
def unescape(s):
|
|
|
|
want_unicode = False
|
|
|
|
if isinstance(s, unicode):
|
|
|
|
s = s.encode("utf-8")
|
|
|
|
want_unicode = True
|
|
|
|
|
|
|
|
# the rest of this assumes that `s` is UTF-8
|
|
|
|
list = []
|
|
|
|
|
|
|
|
# create and initialize a parser object
|
|
|
|
p = xml.parsers.expat.ParserCreate("utf-8")
|
|
|
|
p.buffer_text = True
|
|
|
|
p.returns_unicode = want_unicode
|
|
|
|
p.CharacterDataHandler = list.append
|
|
|
|
|
|
|
|
# parse the data wrapped in a dummy element
|
|
|
|
# (needed so the "document" is well-formed)
|
|
|
|
p.Parse("<e>", 0)
|
|
|
|
p.Parse(s, 0)
|
|
|
|
p.Parse("</e>", 1)
|
|
|
|
|
|
|
|
# join the extracted strings and return
|
|
|
|
es = ""
|
|
|
|
if want_unicode:
|
|
|
|
es = u""
|
|
|
|
return es.join(list)
|