worblehat-old/cli/google_interface.py

#!/usr/bin/python

import os
from gdata.books.service import BookService
import xml.parsers.expat
import readline
import re
import random
import pgdb
import sys
from fileformat import *
from util import *

exit_commands = ['exit', 'abort', 'quit', 'bye', 'eat flaming death', 'q']
encoding = 'utf8'

# midlertidig
connection = pgdb.connect(database='oysteini_pbb2',
        user='oysteini_pbb',
        password='lio5Aide',
        host='postgres.pvv.ntnu.no');

def suggest_book(dbconnection, tmp_file=False):
    service = BookService(source='Programvareverkstedet - Worblehat - 0.1a ')
    action_list = []
    authors_added = {}
    file_prefix = "suggestion"
    filler = ' -------------------------------------------------- '
    print("# Enter ISBN number(s), end with eof <CTRL+D>")
    for ISBN in sys.stdin:
        ISBN = ISBN.strip()
        if ISBN in exit_commands:
            print("aborted")
            break
        elif book_in_db(dbconnection, ISBN):
            action_list.append("Book with ISBN: " + str(ISBN) + " is already in DB, skipped")
        else:
            # First print a long comment line to separate books
            new_book = filler + "Book: " + ISBN.strip() + filler
            action_list.append(new_book)
            feed = service.search_by_keyword('isbn='+ISBN)
            if feed.entry:
                authors = parse_authors(dbconnection, feed.entry[0])
                # For each author, check if author is already added as a new entry
                # or already in DB, otherwise make an entry for a new author
                for author in authors:
                    if author['id'] in authors_added:
                        comment = "Author already added when book: " + str(authors_added[author['id']]) + " was added"
                        action_list.append(comment) 
                    elif not author_in_db(dbconnection, author):
                        comment = "Author: \"" + str(author) + "\" was not already in database"
                        d = build_author(author)
                        d.update({'comment':comment})
                        action_list.append(d)
                        authors_added[author['id']] = ISBN
                d = {}
                if len(authors) == 0:
                    # TODO: test this
                    comment = "Didn't find any authors for book. !!IMPORTANT!! Add correct author and id in new-book section"
                    d.update({'comment':comment})
                    authors.append({'id':'NO_AUTHOR', 'firstname':'John', 'lastname':'Doe'})
                d.update(build_book(feed.entry[0], authors, ISBN))
                action_list.append(d)
            else:
                print("No items found")
        
    if tmp_file:
        write_tmpfile(file_prefix, write_actionlist(action_list))
    else:
        print(write_actionlist(action_list))

def book_in_db(dbconnection, isbn):
    cursor = dbconnection.cursor()
    query = "SELECT * FROM book WHERE isbn=%(num)s"
    cursor.execute(query, {'num':isbn})
    if cursor.rowcount > 0:
        return True
    return False

def author_in_db(dbconnection, author):
    cursor = dbconnection.cursor()
    query = "SELECT * FROM person WHERE firstname=%(fname)s OR lastname=%(lname)s"
    cursor.execute(query, {'fname':author['firstname'], 'lname':author['lastname']})
    if cursor.rowcount > 0:
        return True
    return False

def person_id_in_db(dbconnection, id):
    cursor = dbconnection.cursor()
    query = "SELECT * FROM person WHERE id=%(i)s"
    cursor.execute(query, {'i':id})
    if cursor.rowcount > 0:
        return True
    return False

def found_item(entry, indata):
        print "Found: "+entry.dc_title[0].text
        build_book(entry, indata)

def build_book(entry, authors, indata=False):
	dic = entry.to_dict()
        book = {}
        
        book['action'] = 'new-book'
        
        isbn = find_isbn(unicode(dic['identifiers']))
        if isbn:
            book['isbn'] = unicode(isbn)
	elif indata:
            if len(indata) == 13:
                book['isbn'] = unicode(indata)
            else:
                print("!!!isbn length not 13")
	else:
            #comment("No ISBN found.")
            print("No ISBN found.")
            return False
        
        book['title'] = unicode(entry.dc_title[0].text, encoding)

        set_value(book, dic, 'category')

	if len(entry.dc_title) > 0:
		book['subtitle'] = unicode(''.join(map(lambda x: x.text, entry.dc_title[1:])), encoding)
	
        book['persons'] = {}
        book['persons']['author'] = [author['id'] for author in authors]
        
        if 'publishers' in dic:
		book['publisher'] = unicode(','.join(dic['publishers']), encoding)

        if 'date' in dic:
		book['published_year'] = int(dic['date'][:4])
	
        set_value(book, dic, 'edition')

        book['num_pages'] = find_page_number(dic)
        
        set_value(book, dic, 'series')

        book['description'] = unicode(find_description(dic), encoding)
	
        book['references'] = {}
        book['references']['google-books'] = [unicode(dic['preview'], encoding)]
        

        return book

def set_value(book, dic, key):
        if key in dic:
                book[key] = unicode(dic[key], encoding)
        else: 
                book[key] = None


def find_description(dic):
	if 'description' in dic:
		return unescape(dic['description'])
	else:
		return ""

def find_page_number(dic):
	if 'format' in dic:
		for item in dic['format']:
			if 'pages' in item:
				return int(re.findall(r'[0-9]+',item)[0])
		return None
	else:
		return None

def find_isbn(identifiers):
	for pair in identifiers:
		if pair[0] =='ISBN' and len(pair[1])==13:
			return pair[1]
	return False

def parse_authors(dbconnection, feed_entry):
    dict = feed_entry.to_dict()
    cursor = dbconnection.cursor()
    author_list = []
    if 'authors' in dict:
        for author in dict['authors']:
            # First look in db for matches
            auth_q = "SELECT * FROM person WHERE firstname LIKE %(fname)s OR lastname=%(lname)s"
            names = author.split()
            first_name_wildcard = names[0] + '%'
            last_name = names[len(names)-1]
            cursor.execute(auth_q, {'fname':first_name_wildcard, 'lname':last_name})
            match = fetchone_dict(cursor)
            if match != None and 'id' in match:
                author_list.append(match)
                continue
            # Otherwise make new id
            newid = ''.join([i[0] for i in author.split()]).lower()
            if person_id_in_db(dbconnection, newid):
                i = "1"
                while person_id_in_db(dbconnection, newid+i):
                    i = str(int(i) + 1)
                newid = newid + i
            first_name = names[0] + " ".join(names[1:len(names)-1])
            new_author = {'id':newid, 'firstname':first_name, 'lastname':last_name}
            author_list.append(new_author)
    return author_list

def build_author(new_author):
    author = new_author
    author['action'] = 'new-person'
    
    return author


#Cargo-cult coded function to unescape special XML characters

def select_from_list(list, attributes=False, item_name=""):
	if len(list) == 0:
		return None
	elif len(list) == 1:
                return list[0]
	else:
		return None

def unescape(s):
	want_unicode = False
	if isinstance(s, unicode):
		s = s.encode("utf-8")
		want_unicode = True

	# the rest of this assumes that `s` is UTF-8
	list = []

	# create and initialize a parser object
	p = xml.parsers.expat.ParserCreate("utf-8")
	p.buffer_text = True
	p.returns_unicode = want_unicode
	p.CharacterDataHandler = list.append

	# parse the data wrapped in a dummy element
	# (needed so the "document" is well-formed)
	p.Parse("<e>", 0)
	p.Parse(s, 0)
	p.Parse("</e>", 1)

	# join the extracted strings and return
	es = ""
	if want_unicode:
		es = u""
	return es.join(list)

# remove me
suggest_book(connection, tmp_file=True)
Opprettet google_interface google_interface.py inneholder funksjonalitet som svarer til kommandoen "worblehat suggest-book-data" 2011-10-08 16:08:15 +02:00			`#!/usr/bin/python`

			`import os`
			`from gdata.books.service import BookService`
			`import xml.parsers.expat`
			`import readline`
			`import re`
			`import random`
			`import pgdb`
			`import sys`
			`from fileformat import *`
			`from util import *`

			`exit_commands = ['exit', 'abort', 'quit', 'bye', 'eat flaming death', 'q']`
Ugly encoding hacks. 2011-10-08 21:41:57 +02:00			`encoding = 'utf8'`

Opprettet google_interface google_interface.py inneholder funksjonalitet som svarer til kommandoen "worblehat suggest-book-data" 2011-10-08 16:08:15 +02:00			`# midlertidig`
			`connection = pgdb.connect(database='oysteini_pbb2',`
			`user='oysteini_pbb',`
			`password='lio5Aide',`
			`host='postgres.pvv.ntnu.no');`

			`def suggest_book(dbconnection, tmp_file=False):`
			`service = BookService(source='Programvareverkstedet - Worblehat - 0.1a ')`
			`action_list = []`
Fixed writing book/author suggestion to file, and made it prettier 2011-10-08 21:20:02 +02:00			`authors_added = {}`
			`file_prefix = "suggestion"`
			`filler = ' -------------------------------------------------- '`
added search-description to book search 2011-10-09 15:18:36 +02:00			`print("# Enter ISBN number(s), end with eof <CTRL+D>")`
Fixed writing book/author suggestion to file, and made it prettier 2011-10-08 21:20:02 +02:00			`for ISBN in sys.stdin:`
Fixed bug {22} where isbn was not correctly parsed 2011-10-09 15:32:11 +02:00			`ISBN = ISBN.strip()`
			`if ISBN in exit_commands:`
Opprettet google_interface google_interface.py inneholder funksjonalitet som svarer til kommandoen "worblehat suggest-book-data" 2011-10-08 16:08:15 +02:00			`print("aborted")`
			`break`
Fixed writing book/author suggestion to file, and made it prettier 2011-10-08 21:20:02 +02:00			`elif book_in_db(dbconnection, ISBN):`
			`action_list.append("Book with ISBN: " + str(ISBN) + " is already in DB, skipped")`
Opprettet google_interface google_interface.py inneholder funksjonalitet som svarer til kommandoen "worblehat suggest-book-data" 2011-10-08 16:08:15 +02:00			`else:`
Fixed writing book/author suggestion to file, and made it prettier 2011-10-08 21:20:02 +02:00			`# First print a long comment line to separate books`
			`new_book = filler + "Book: " + ISBN.strip() + filler`
			`action_list.append(new_book)`
			`feed = service.search_by_keyword('isbn='+ISBN)`
Opprettet google_interface google_interface.py inneholder funksjonalitet som svarer til kommandoen "worblehat suggest-book-data" 2011-10-08 16:08:15 +02:00			`if feed.entry:`
suggest_book now conceptually working, needs to look better 2011-10-08 19:11:34 +02:00			`authors = parse_authors(dbconnection, feed.entry[0])`
Fixed writing book/author suggestion to file, and made it prettier 2011-10-08 21:20:02 +02:00			`# For each author, check if author is already added as a new entry`
			`# or already in DB, otherwise make an entry for a new author`
suggest_book now conceptually working, needs to look better 2011-10-08 19:11:34 +02:00			`for author in authors:`
Fixed writing book/author suggestion to file, and made it prettier 2011-10-08 21:20:02 +02:00			`if author['id'] in authors_added:`
			`comment = "Author already added when book: " + str(authors_added[author['id']]) + " was added"`
			`action_list.append(comment)`
			`elif not author_in_db(dbconnection, author):`
			`comment = "Author: \"" + str(author) + "\" was not already in database"`
			`d = build_author(author)`
			`d.update({'comment':comment})`
			`action_list.append(d)`
			`authors_added[author['id']] = ISBN`
			`d = {}`
suggest_book now conceptually working, needs to look better 2011-10-08 19:11:34 +02:00			`if len(authors) == 0:`
Fixed writing book/author suggestion to file, and made it prettier 2011-10-08 21:20:02 +02:00			`# TODO: test this`
			`comment = "Didn't find any authors for book. !!IMPORTANT!! Add correct author and id in new-book section"`
			`d.update({'comment':comment})`
suggest_book now conceptually working, needs to look better 2011-10-08 19:11:34 +02:00			`authors.append({'id':'NO_AUTHOR', 'firstname':'John', 'lastname':'Doe'})`
Fixed writing book/author suggestion to file, and made it prettier 2011-10-08 21:20:02 +02:00			`d.update(build_book(feed.entry[0], authors, ISBN))`
			`action_list.append(d)`
suggest_book now conceptually working, needs to look better 2011-10-08 19:11:34 +02:00			`else:`
			`print("No items found")`
Opprettet google_interface google_interface.py inneholder funksjonalitet som svarer til kommandoen "worblehat suggest-book-data" 2011-10-08 16:08:15 +02:00
			`if tmp_file:`
Fixed writing book/author suggestion to file, and made it prettier 2011-10-08 21:20:02 +02:00			`write_tmpfile(file_prefix, write_actionlist(action_list))`
Opprettet google_interface google_interface.py inneholder funksjonalitet som svarer til kommandoen "worblehat suggest-book-data" 2011-10-08 16:08:15 +02:00			`else:`
			`print(write_actionlist(action_list))`

			`def book_in_db(dbconnection, isbn):`
			`cursor = dbconnection.cursor()`
suggest_book now conceptually working, needs to look better 2011-10-08 19:11:34 +02:00			`query = "SELECT * FROM book WHERE isbn=%(num)s"`
			`cursor.execute(query, {'num':isbn})`
			`if cursor.rowcount > 0:`
			`return True`
			`return False`

			`def author_in_db(dbconnection, author):`
			`cursor = dbconnection.cursor()`
			`query = "SELECT * FROM person WHERE firstname=%(fname)s OR lastname=%(lname)s"`
			`cursor.execute(query, {'fname':author['firstname'], 'lname':author['lastname']})`
			`if cursor.rowcount > 0:`
			`return True`
			`return False`

			`def person_id_in_db(dbconnection, id):`
			`cursor = dbconnection.cursor()`
			`query = "SELECT * FROM person WHERE id=%(i)s"`
			`cursor.execute(query, {'i':id})`
			`if cursor.rowcount > 0:`
			`return True`
			`return False`
Opprettet google_interface google_interface.py inneholder funksjonalitet som svarer til kommandoen "worblehat suggest-book-data" 2011-10-08 16:08:15 +02:00
			`def found_item(entry, indata):`
			`print "Found: "+entry.dc_title[0].text`
			`build_book(entry, indata)`

suggest_book now conceptually working, needs to look better 2011-10-08 19:11:34 +02:00			`def build_book(entry, authors, indata=False):`
Opprettet google_interface google_interface.py inneholder funksjonalitet som svarer til kommandoen "worblehat suggest-book-data" 2011-10-08 16:08:15 +02:00			`dic = entry.to_dict()`
			`book = {}`

			`book['action'] = 'new-book'`

			`isbn = find_isbn(unicode(dic['identifiers']))`
			`if isbn:`
Fixed bug {22} where isbn was not correctly parsed 2011-10-09 15:32:11 +02:00			`book['isbn'] = unicode(isbn)`
Opprettet google_interface google_interface.py inneholder funksjonalitet som svarer til kommandoen "worblehat suggest-book-data" 2011-10-08 16:08:15 +02:00			`elif indata:`
Fixed bug {22} where isbn was not correctly parsed 2011-10-09 15:32:11 +02:00			`if len(indata) == 13:`
			`book['isbn'] = unicode(indata)`
			`else:`
			`print("!!!isbn length not 13")`
Opprettet google_interface google_interface.py inneholder funksjonalitet som svarer til kommandoen "worblehat suggest-book-data" 2011-10-08 16:08:15 +02:00			`else:`
Fixed bug {22} where isbn was not correctly parsed 2011-10-09 15:32:11 +02:00			`#comment("No ISBN found.")`
			`print("No ISBN found.")`
			`return False`

Ugly encoding hacks. 2011-10-08 21:41:57 +02:00			`book['title'] = unicode(entry.dc_title[0].text, encoding)`
Opprettet google_interface google_interface.py inneholder funksjonalitet som svarer til kommandoen "worblehat suggest-book-data" 2011-10-08 16:08:15 +02:00
			`set_value(book, dic, 'category')`

			`if len(entry.dc_title) > 0:`
Ugly encoding hacks. 2011-10-08 21:41:57 +02:00			`book['subtitle'] = unicode(''.join(map(lambda x: x.text, entry.dc_title[1:])), encoding)`
Opprettet google_interface google_interface.py inneholder funksjonalitet som svarer til kommandoen "worblehat suggest-book-data" 2011-10-08 16:08:15 +02:00
			`book['persons'] = {}`
			`book['persons']['author'] = [author['id'] for author in authors]`

			`if 'publishers' in dic:`
Ugly encoding hacks. 2011-10-08 21:41:57 +02:00			`book['publisher'] = unicode(','.join(dic['publishers']), encoding)`
Opprettet google_interface google_interface.py inneholder funksjonalitet som svarer til kommandoen "worblehat suggest-book-data" 2011-10-08 16:08:15 +02:00
			`if 'date' in dic:`
			`book['published_year'] = int(dic['date'][:4])`

			`set_value(book, dic, 'edition')`

			`book['num_pages'] = find_page_number(dic)`

			`set_value(book, dic, 'series')`

Ugly encoding hacks. 2011-10-08 21:41:57 +02:00			`book['description'] = unicode(find_description(dic), encoding)`
Opprettet google_interface google_interface.py inneholder funksjonalitet som svarer til kommandoen "worblehat suggest-book-data" 2011-10-08 16:08:15 +02:00
			`book['references'] = {}`
Ugly encoding hacks. 2011-10-08 21:41:57 +02:00			`book['references']['google-books'] = [unicode(dic['preview'], encoding)]`
Opprettet google_interface google_interface.py inneholder funksjonalitet som svarer til kommandoen "worblehat suggest-book-data" 2011-10-08 16:08:15 +02:00

Fixed bug {22} where isbn was not correctly parsed 2011-10-09 15:32:11 +02:00			`return book`
Opprettet google_interface google_interface.py inneholder funksjonalitet som svarer til kommandoen "worblehat suggest-book-data" 2011-10-08 16:08:15 +02:00
			`def set_value(book, dic, key):`
			`if key in dic:`
Ugly encoding hacks. 2011-10-08 21:41:57 +02:00			`book[key] = unicode(dic[key], encoding)`
Opprettet google_interface google_interface.py inneholder funksjonalitet som svarer til kommandoen "worblehat suggest-book-data" 2011-10-08 16:08:15 +02:00			`else:`
			`book[key] = None`


			`def find_description(dic):`
			`if 'description' in dic:`
			`return unescape(dic['description'])`
			`else:`
			`return ""`

			`def find_page_number(dic):`
			`if 'format' in dic:`
			`for item in dic['format']:`
			`if 'pages' in item:`
			`return int(re.findall(r'[0-9]+',item)[0])`
			`return None`
			`else:`
			`return None`

			`def find_isbn(identifiers):`
			`for pair in identifiers:`
			`if pair[0] =='ISBN' and len(pair[1])==13:`
			`return pair[1]`
			`return False`

suggest_book now conceptually working, needs to look better 2011-10-08 19:11:34 +02:00			`def parse_authors(dbconnection, feed_entry):`
			`dict = feed_entry.to_dict()`
			`cursor = dbconnection.cursor()`
			`author_list = []`
			`if 'authors' in dict:`
			`for author in dict['authors']:`
			`# First look in db for matches`
			`auth_q = "SELECT * FROM person WHERE firstname LIKE %(fname)s OR lastname=%(lname)s"`
			`names = author.split()`
			`first_name_wildcard = names[0] + '%'`
			`last_name = names[len(names)-1]`
			`cursor.execute(auth_q, {'fname':first_name_wildcard, 'lname':last_name})`
			`match = fetchone_dict(cursor)`
			`if match != None and 'id' in match:`
			`author_list.append(match)`
			`continue`
			`# Otherwise make new id`
			`newid = ''.join([i[0] for i in author.split()]).lower()`
			`if person_id_in_db(dbconnection, newid):`
			`i = "1"`
			`while person_id_in_db(dbconnection, newid+i):`
			`i = str(int(i) + 1)`
			`newid = newid + i`
			`first_name = names[0] + " ".join(names[1:len(names)-1])`
			`new_author = {'id':newid, 'firstname':first_name, 'lastname':last_name}`
			`author_list.append(new_author)`
			`return author_list`

			`def build_author(new_author):`
			`author = new_author`
			`author['action'] = 'new-person'`

			`return author`


Opprettet google_interface google_interface.py inneholder funksjonalitet som svarer til kommandoen "worblehat suggest-book-data" 2011-10-08 16:08:15 +02:00			`#Cargo-cult coded function to unescape special XML characters`

			`def select_from_list(list, attributes=False, item_name=""):`
			`if len(list) == 0:`
			`return None`
			`elif len(list) == 1:`
			`return list[0]`
			`else:`
			`return None`

			`def unescape(s):`
			`want_unicode = False`
			`if isinstance(s, unicode):`
			`s = s.encode("utf-8")`
			`want_unicode = True`

			# the rest of this assumes that `s` is UTF-8
			`list = []`

			`# create and initialize a parser object`
			`p = xml.parsers.expat.ParserCreate("utf-8")`
			`p.buffer_text = True`
			`p.returns_unicode = want_unicode`
			`p.CharacterDataHandler = list.append`

			`# parse the data wrapped in a dummy element`
			`# (needed so the "document" is well-formed)`
			`p.Parse("<e>", 0)`
			`p.Parse(s, 0)`
			`p.Parse("</e>", 1)`

			`# join the extracted strings and return`
			`es = ""`
			`if want_unicode:`
			`es = u""`
			`return es.join(list)`

Fixed bug {22} where isbn was not correctly parsed 2011-10-09 15:32:11 +02:00			`# remove me`
added search-description to book search 2011-10-09 15:18:36 +02:00			`suggest_book(connection, tmp_file=True)`