Opprettet google_interface
google_interface.py inneholder funksjonalitet som svarer til kommandoen "worblehat suggest-book-data"
This commit is contained in:
parent
b808517be3
commit
d9f41d3e7e
|
@ -0,0 +1,222 @@
|
|||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
from gdata.books.service import BookService
|
||||
import xml.parsers.expat
|
||||
import readline
|
||||
import re
|
||||
import random
|
||||
import pgdb
|
||||
import sys
|
||||
from fileformat import *
|
||||
from util import *
|
||||
|
||||
exit_commands = ['exit', 'abort', 'quit', 'bye', 'eat flaming death', 'q']
|
||||
|
||||
comments = ""
|
||||
|
||||
# midlertidig
|
||||
connection = pgdb.connect(database='oysteini_pbb2',
|
||||
user='oysteini_pbb',
|
||||
password='lio5Aide',
|
||||
host='postgres.pvv.ntnu.no');
|
||||
|
||||
def suggest_book(dbconnection, tmp_file=False):
|
||||
service = BookService(source='Programvareverkstedet - Worblehat - 0.1a ')
|
||||
action_list = []
|
||||
print("Enter ISBN number(s), end with eof")
|
||||
for indata in sys.stdin:
|
||||
if indata in exit_commands:
|
||||
print("aborted")
|
||||
break
|
||||
if book_in_db(dbconnection, indata):
|
||||
action_list.append("# Book with isbn: " + str(indata) + " is already in DB, skipped")
|
||||
else:
|
||||
feed = service.search_by_keyword('isbn='+indata)
|
||||
if feed.entry:
|
||||
# action_list is passed to build_book to fill in missing authors
|
||||
action_list.append(build_book(dbconnection, feed.entry[0], indata, action_list))
|
||||
else:
|
||||
print("No items found")
|
||||
|
||||
if tmp_file:
|
||||
# TODO: write to tmp file
|
||||
else:
|
||||
print(write_actionlist(action_list))
|
||||
|
||||
def book_in_db(dbconnection, isbn):
|
||||
cursor = dbconnection.cursor()
|
||||
query = "SELECT book FROM books WHERE lastname=%(last)s OR firstname=%(first)s"
|
||||
cursor.execute(query, {'last':last, 'first':first} )
|
||||
candidates = fetchall_dict(cursor)
|
||||
pass
|
||||
|
||||
def found_item(entry, indata):
|
||||
print "Found: "+entry.dc_title[0].text
|
||||
build_book(entry, indata)
|
||||
|
||||
def build_book(dbconnection, entry, indata=False):
|
||||
dic = entry.to_dict()
|
||||
print("Dict in build_book:" + str(dic))
|
||||
print("")
|
||||
|
||||
book = {}
|
||||
|
||||
book['action'] = 'new-book'
|
||||
|
||||
isbn = find_isbn(unicode(dic['identifiers']))
|
||||
if isbn:
|
||||
book['isbn'] = unicode(isbn)
|
||||
elif indata:
|
||||
if len(indata) == 13:
|
||||
book['isbn'] = unicode(indata)
|
||||
else:
|
||||
comment("No ISBN found.")
|
||||
return False
|
||||
|
||||
book['title'] = unicode(entry.dc_title[0].text)
|
||||
|
||||
set_value(book, dic, 'category')
|
||||
|
||||
if len(entry.dc_title) > 0:
|
||||
book['subtitle'] = unicode(''.join(map(lambda x: x.text, entry.dc_title[1:])))
|
||||
|
||||
authors = build_authors(dbconnection, dic)
|
||||
book['persons'] = {}
|
||||
book['persons']['author'] = [author['id'] for author in authors]
|
||||
|
||||
if 'publishers' in dic:
|
||||
book['publisher'] = unicode(','.join(dic['publishers']))
|
||||
|
||||
if 'date' in dic:
|
||||
book['published_year'] = int(dic['date'][:4])
|
||||
|
||||
set_value(book, dic, 'edition')
|
||||
|
||||
book['num_pages'] = find_page_number(dic)
|
||||
|
||||
set_value(book, dic, 'series')
|
||||
|
||||
book['description'] = unicode(find_description(dic))
|
||||
|
||||
book['references'] = {}
|
||||
book['references']['google-books'] = [unicode(dic['preview'])]
|
||||
|
||||
global comments
|
||||
book['comment'] = comments
|
||||
comments = ''
|
||||
|
||||
return book
|
||||
|
||||
def comment(comm):
|
||||
global comments
|
||||
comments += u'%s\n' % comm
|
||||
|
||||
def set_value(book, dic, key):
|
||||
if key in dic:
|
||||
book[key] = unicode(dic[key])
|
||||
else:
|
||||
book[key] = None
|
||||
|
||||
|
||||
def find_description(dic):
|
||||
if 'description' in dic:
|
||||
return unescape(dic['description'])
|
||||
else:
|
||||
return ""
|
||||
|
||||
def find_page_number(dic):
|
||||
if 'format' in dic:
|
||||
for item in dic['format']:
|
||||
if 'pages' in item:
|
||||
return int(re.findall(r'[0-9]+',item)[0])
|
||||
return None
|
||||
else:
|
||||
return None
|
||||
|
||||
def find_isbn(identifiers):
|
||||
for pair in identifiers:
|
||||
if pair[0] =='ISBN' and len(pair[1])==13:
|
||||
return pair[1]
|
||||
return False
|
||||
|
||||
def build_authors(dbconnection, dictionary):
|
||||
if 'authors' in dictionary:
|
||||
author_list = []
|
||||
for author in dictionary['authors']:
|
||||
author_list.append(get_or_create_author(dbconnection, author))
|
||||
return author_list
|
||||
else:
|
||||
comment("No authors found.")
|
||||
return []
|
||||
|
||||
def get_or_create_author(dbconnection, author_name):
|
||||
author = {}
|
||||
author['action'] = 'new-person'
|
||||
names = author_name.split()
|
||||
first = ' '.join(names[:-1])
|
||||
last = names[-1]
|
||||
author['first_name'] = first
|
||||
author['last_name'] = last
|
||||
|
||||
cursor = dbconnection.cursor()
|
||||
query = "SELECT id FROM person WHERE lastname=%(last)s OR firstname=%(first)s"
|
||||
cursor.execute(query, {'last':last, 'first':first} )
|
||||
candidates = fetchall_dict(cursor)
|
||||
|
||||
if len(candidates) == 1:
|
||||
return candidates[0]
|
||||
if len(candidates) > 1:
|
||||
# TODO: TEST
|
||||
print("Found several candidates:")
|
||||
for i in range(candidates):
|
||||
print "%d: %s" % (i,
|
||||
feed.entry[i].dc_title[0].text)
|
||||
candno = (int)(raw_input("Which candidate do you want? [0-%d] " % (len(candidates) - 1)))
|
||||
return candidates[candno]
|
||||
else:
|
||||
print("No candidate found for " + author_name + ", making new person")
|
||||
newid = ''.join([i[0] for i in names]).lower()
|
||||
# Check if id already exists
|
||||
idq =
|
||||
|
||||
|
||||
#Cargo-cult coded function to unescape special XML characters
|
||||
|
||||
def select_from_list(list, attributes=False, item_name=""):
|
||||
if len(list) == 0:
|
||||
return None
|
||||
elif len(list) == 1:
|
||||
return list[0]
|
||||
else:
|
||||
return None
|
||||
|
||||
def unescape(s):
|
||||
want_unicode = False
|
||||
if isinstance(s, unicode):
|
||||
s = s.encode("utf-8")
|
||||
want_unicode = True
|
||||
|
||||
# the rest of this assumes that `s` is UTF-8
|
||||
list = []
|
||||
|
||||
# create and initialize a parser object
|
||||
p = xml.parsers.expat.ParserCreate("utf-8")
|
||||
p.buffer_text = True
|
||||
p.returns_unicode = want_unicode
|
||||
p.CharacterDataHandler = list.append
|
||||
|
||||
# parse the data wrapped in a dummy element
|
||||
# (needed so the "document" is well-formed)
|
||||
p.Parse("<e>", 0)
|
||||
p.Parse(s, 0)
|
||||
p.Parse("</e>", 1)
|
||||
|
||||
# join the extracted strings and return
|
||||
es = ""
|
||||
if want_unicode:
|
||||
es = u""
|
||||
return es.join(list)
|
||||
|
||||
|
||||
suggest_book(connection)
|
Reference in New Issue