From d9f41d3e7e4a82d0127e9408b1e761e3a360a5b0 Mon Sep 17 00:00:00 2001 From: andrfla Date: Sat, 8 Oct 2011 14:08:15 +0000 Subject: [PATCH] Opprettet google_interface google_interface.py inneholder funksjonalitet som svarer til kommandoen "worblehat suggest-book-data" --- cli/google_interface.py | 222 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 222 insertions(+) create mode 100644 cli/google_interface.py diff --git a/cli/google_interface.py b/cli/google_interface.py new file mode 100644 index 0000000..36b143b --- /dev/null +++ b/cli/google_interface.py @@ -0,0 +1,222 @@ +#!/usr/bin/python + +import os +from gdata.books.service import BookService +import xml.parsers.expat +import readline +import re +import random +import pgdb +import sys +from fileformat import * +from util import * + +exit_commands = ['exit', 'abort', 'quit', 'bye', 'eat flaming death', 'q'] + +comments = "" + +# midlertidig +connection = pgdb.connect(database='oysteini_pbb2', + user='oysteini_pbb', + password='lio5Aide', + host='postgres.pvv.ntnu.no'); + +def suggest_book(dbconnection, tmp_file=False): + service = BookService(source='Programvareverkstedet - Worblehat - 0.1a ') + action_list = [] + print("Enter ISBN number(s), end with eof") + for indata in sys.stdin: + if indata in exit_commands: + print("aborted") + break + if book_in_db(dbconnection, indata): + action_list.append("# Book with isbn: " + str(indata) + " is already in DB, skipped") + else: + feed = service.search_by_keyword('isbn='+indata) + if feed.entry: + # action_list is passed to build_book to fill in missing authors + action_list.append(build_book(dbconnection, feed.entry[0], indata, action_list)) + else: + print("No items found") + + if tmp_file: + # TODO: write to tmp file + else: + print(write_actionlist(action_list)) + +def book_in_db(dbconnection, isbn): + cursor = dbconnection.cursor() + query = "SELECT book FROM books WHERE lastname=%(last)s OR firstname=%(first)s" + cursor.execute(query, {'last':last, 'first':first} ) + candidates = fetchall_dict(cursor) + pass + +def found_item(entry, indata): + print "Found: "+entry.dc_title[0].text + build_book(entry, indata) + +def build_book(dbconnection, entry, indata=False): + dic = entry.to_dict() + print("Dict in build_book:" + str(dic)) + print("") + + book = {} + + book['action'] = 'new-book' + + isbn = find_isbn(unicode(dic['identifiers'])) + if isbn: + book['isbn'] = unicode(isbn) + elif indata: + if len(indata) == 13: + book['isbn'] = unicode(indata) + else: + comment("No ISBN found.") + return False + + book['title'] = unicode(entry.dc_title[0].text) + + set_value(book, dic, 'category') + + if len(entry.dc_title) > 0: + book['subtitle'] = unicode(''.join(map(lambda x: x.text, entry.dc_title[1:]))) + + authors = build_authors(dbconnection, dic) + book['persons'] = {} + book['persons']['author'] = [author['id'] for author in authors] + + if 'publishers' in dic: + book['publisher'] = unicode(','.join(dic['publishers'])) + + if 'date' in dic: + book['published_year'] = int(dic['date'][:4]) + + set_value(book, dic, 'edition') + + book['num_pages'] = find_page_number(dic) + + set_value(book, dic, 'series') + + book['description'] = unicode(find_description(dic)) + + book['references'] = {} + book['references']['google-books'] = [unicode(dic['preview'])] + + global comments + book['comment'] = comments + comments = '' + + return book + +def comment(comm): + global comments + comments += u'%s\n' % comm + +def set_value(book, dic, key): + if key in dic: + book[key] = unicode(dic[key]) + else: + book[key] = None + + +def find_description(dic): + if 'description' in dic: + return unescape(dic['description']) + else: + return "" + +def find_page_number(dic): + if 'format' in dic: + for item in dic['format']: + if 'pages' in item: + return int(re.findall(r'[0-9]+',item)[0]) + return None + else: + return None + +def find_isbn(identifiers): + for pair in identifiers: + if pair[0] =='ISBN' and len(pair[1])==13: + return pair[1] + return False + +def build_authors(dbconnection, dictionary): + if 'authors' in dictionary: + author_list = [] + for author in dictionary['authors']: + author_list.append(get_or_create_author(dbconnection, author)) + return author_list + else: + comment("No authors found.") + return [] + +def get_or_create_author(dbconnection, author_name): + author = {} + author['action'] = 'new-person' + names = author_name.split() + first = ' '.join(names[:-1]) + last = names[-1] + author['first_name'] = first + author['last_name'] = last + + cursor = dbconnection.cursor() + query = "SELECT id FROM person WHERE lastname=%(last)s OR firstname=%(first)s" + cursor.execute(query, {'last':last, 'first':first} ) + candidates = fetchall_dict(cursor) + + if len(candidates) == 1: + return candidates[0] + if len(candidates) > 1: + # TODO: TEST + print("Found several candidates:") + for i in range(candidates): + print "%d: %s" % (i, + feed.entry[i].dc_title[0].text) + candno = (int)(raw_input("Which candidate do you want? [0-%d] " % (len(candidates) - 1))) + return candidates[candno] + else: + print("No candidate found for " + author_name + ", making new person") + newid = ''.join([i[0] for i in names]).lower() + # Check if id already exists + idq = + + +#Cargo-cult coded function to unescape special XML characters + +def select_from_list(list, attributes=False, item_name=""): + if len(list) == 0: + return None + elif len(list) == 1: + return list[0] + else: + return None + +def unescape(s): + want_unicode = False + if isinstance(s, unicode): + s = s.encode("utf-8") + want_unicode = True + + # the rest of this assumes that `s` is UTF-8 + list = [] + + # create and initialize a parser object + p = xml.parsers.expat.ParserCreate("utf-8") + p.buffer_text = True + p.returns_unicode = want_unicode + p.CharacterDataHandler = list.append + + # parse the data wrapped in a dummy element + # (needed so the "document" is well-formed) + p.Parse("", 0) + p.Parse(s, 0) + p.Parse("", 1) + + # join the extracted strings and return + es = "" + if want_unicode: + es = u"" + return es.join(list) + + +suggest_book(connection)