worblehat-old/cli/google_interface.py

#!/usr/bin/python

import os
from gdata.books.service import BookService
import xml.parsers.expat
import readline
import re
import random
import pgdb
import sys
from fileformat import *
from util import *

exit_commands = ['exit', 'abort', 'quit', 'bye', 'eat flaming death', 'q']
encoding = 'utf8'

def google_suggest_book_data(dbconnection, tmp_file=False):
    service = BookService(source='Programvareverkstedet - Worblehat - 0.1a ')
    action_list = []
    authors_added = {}
    file_prefix = "suggestion"
    filler = ' -------------------------------- '
    print("# Enter ISBN number(s), end with eof <CTRL+D>")
    for ISBN in sys.stdin:
        ISBN = ISBN.strip()
        if ISBN in exit_commands:
            print("aborted")
            break
        elif book_in_db(dbconnection, ISBN):
            comment = "Book with ISBN: " + str(ISBN) + " is already in DB, skipped"
            action_list.append(comment)
        else:
            # First print a long comment line to separate books
            new_book = filler + "Book: " + ISBN.strip() + filler
            action_list.append(new_book)
            feed = service.search_by_keyword('isbn='+ISBN)
            if feed.entry:
                authors = parse_authors(dbconnection, feed.entry[0])
                # For each author, check if author is already added as a new entry
                # or already in DB, otherwise make an entry for a new author
                for author in authors:
                    if author['id'] in authors_added:
                        comment = "Author already added when book: " + str(authors_added[author['id']]) + " was added"
                        action_list.append(comment)
                    elif not author_in_db(dbconnection, author):
                        comment = "Author: \"" + str(author) + "\" was not already in database"
                        d = build_author(author)
                        d.update({'comment':comment})
                        action_list.append(d)
                        authors_added[author['id']] = ISBN
                d = {}
                if len(authors) == 0:
                    # TODO: test this
                    comment = "Didn't find any authors for book. !!IMPORTANT!! Add correct author and id in new-book section"
                    d.update({'comment':comment})
                    authors.append({'id':'NO_AUTHOR', 'firstname':'John', 'lastname':'Doe'})
                d.update(build_book(feed.entry[0], authors, ISBN))
                action_list.append(d)
            else:
                print("No items found")

    if tmp_file:
        return write_tmpfile(file_prefix, write_actionlist(action_list))
    else:
        print(write_actionlist(action_list))

def book_in_db(dbconnection, isbn):
    cursor = dbconnection.cursor()
    query = "SELECT * FROM book WHERE isbn=%(num)s"
    cursor.execute(query, {'num':isbn})
    if cursor.rowcount > 0:
        return True
    return False

def author_in_db(dbconnection, author):
    cursor = dbconnection.cursor()
    query = "SELECT * FROM person WHERE firstname=%(fname)s OR lastname=%(lname)s"
    cursor.execute(query, {'fname':author['firstname'], 'lname':author['lastname']})
    if cursor.rowcount > 0:
        return True
    return False

def person_id_in_db(dbconnection, id):
    cursor = dbconnection.cursor()
    query = "SELECT * FROM person WHERE id=%(i)s"
    cursor.execute(query, {'i':id})
    if cursor.rowcount > 0:
        return True
    return False

def found_item(entry, indata):
        print "Found: "+entry.dc_title[0].text
        build_book(entry, indata)

def build_book(entry, authors, indata=False):
	dic = entry.to_dict()
        book = {}

        book['action'] = 'new-book'

        isbn = find_isbn(unicode(dic['identifiers']))
        if isbn:
            book['isbn'] = unicode(isbn)
	elif indata:
            if len(indata) == 13:
                book['isbn'] = unicode(indata)
            else:
                print("!!!isbn length not 13")
	else:
            #comment("No ISBN found.")
            print("No ISBN found.")
            return False

        book['title'] = unicode(entry.dc_title[0].text, encoding)

        set_value(book, dic, 'category')

	if len(entry.dc_title) > 0:
		book['subtitle'] = unicode(''.join(map(lambda x: x.text, entry.dc_title[1:])), encoding)

        book['persons'] = {}
        book['persons']['author'] = [author['id'] for author in authors]

        if 'publishers' in dic:
		book['publisher'] = unicode(','.join(dic['publishers']), encoding)

        if 'date' in dic:
		book['published_year'] = int(dic['date'][:4])

        set_value(book, dic, 'edition')

        book['num_pages'] = find_page_number(dic)

        set_value(book, dic, 'series')

        book['description'] = unicode(find_description(dic), encoding)

        book['references'] = {}
        book['references']['google-books'] = [unicode(dic['preview'], encoding)]


        return book

def set_value(book, dic, key):
        if key in dic:
                book[key] = unicode(dic[key], encoding)
        else:
                book[key] = None


def find_description(dic):
	if 'description' in dic:
		return unescape(dic['description'])
	else:
		return ""

def find_page_number(dic):
	if 'format' in dic:
		for item in dic['format']:
			if 'pages' in item:
				return int(re.findall(r'[0-9]+',item)[0])
		return None
	else:
		return None

def find_isbn(identifiers):
	for pair in identifiers:
		if pair[0] =='ISBN' and len(pair[1])==13:
			return pair[1]
	return False

def parse_authors(dbconnection, feed_entry):
    dict = feed_entry.to_dict()
    cursor = dbconnection.cursor()
    author_list = []
    if 'authors' in dict:
        for author in dict['authors']:
            # First look in db for matches
            auth_q = "SELECT * FROM person WHERE firstname LIKE %(fname)s OR lastname=%(lname)s"
            names = author.split()
            first_name_wildcard = names[0] + '%'
            last_name = names[len(names)-1]
            cursor.execute(auth_q, {'fname':first_name_wildcard, 'lname':last_name})
            match = fetchone_dict(cursor)
            if match != None and 'id' in match:
                author_list.append(match)
                continue
            # Otherwise make new id
            newid = ''.join([i[0] for i in author.split()]).lower()
            if person_id_in_db(dbconnection, newid):
                i = "1"
                while person_id_in_db(dbconnection, newid+i):
                    i = str(int(i) + 1)
                newid = newid + i
            first_name = names[0] + " ".join(names[1:len(names)-1])
            new_author = {'id':newid, 'firstname':first_name, 'lastname':last_name}
            author_list.append(new_author)
    return author_list

def build_author(new_author):
    author = new_author
    author['action'] = 'new-person'

    return author


#Cargo-cult coded function to unescape special XML characters

def select_from_list(list, attributes=False, item_name=""):
	if len(list) == 0:
		return None
	elif len(list) == 1:
                return list[0]
	else:
		return None

def unescape(s):
	want_unicode = False
	if isinstance(s, unicode):
		s = s.encode("utf-8")
		want_unicode = True

	# the rest of this assumes that `s` is UTF-8
	list = []

	# create and initialize a parser object
	p = xml.parsers.expat.ParserCreate("utf-8")
	p.buffer_text = True
	p.returns_unicode = want_unicode
	p.CharacterDataHandler = list.append

	# parse the data wrapped in a dummy element
	# (needed so the "document" is well-formed)
	p.Parse("<e>", 0)
	p.Parse(s, 0)
	p.Parse("</e>", 1)

	# join the extracted strings and return
	es = ""
	if want_unicode:
		es = u""
	return es.join(list)