worblehat-old/cli/google_interface.py

#!/usr/bin/python

import os
from gdata.books.service import BookService
import xml.parsers.expat
import readline
import re
import random
import pgdb
import sys
from fileformat import *
from util import *

exit_commands = ['exit', 'abort', 'quit', 'bye', 'eat flaming death', 'q']

comments = ""

# midlertidig
connection = pgdb.connect(database='oysteini_pbb2',
        user='oysteini_pbb',
        password='lio5Aide',
        host='postgres.pvv.ntnu.no');

def suggest_book(dbconnection, tmp_file=False):
    service = BookService(source='Programvareverkstedet - Worblehat - 0.1a ')
    action_list = []
    print("Enter ISBN number(s), end with eof")
    for indata in sys.stdin:
        if indata in exit_commands:
            print("aborted")
            break
        if book_in_db(dbconnection, indata):
            action_list.append("# Book with isbn: " + str(indata) + " is already in DB, skipped")
        else:
            feed = service.search_by_keyword('isbn='+indata)
            if feed.entry:
                # action_list is passed to build_book to fill in missing authors
                action_list.append(build_book(dbconnection, feed.entry[0], indata, action_list))
        else:
            print("No items found")
        
    if tmp_file:
        # TODO: write to tmp file
    else:
        print(write_actionlist(action_list))

def book_in_db(dbconnection, isbn):
    cursor = dbconnection.cursor()
    query = "SELECT book FROM books WHERE lastname=%(last)s OR firstname=%(first)s"
    cursor.execute(query, {'last':last, 'first':first} )
    candidates = fetchall_dict(cursor)
    pass  

def found_item(entry, indata):
        print "Found: "+entry.dc_title[0].text
        build_book(entry, indata)

def build_book(dbconnection, entry, indata=False):
	dic = entry.to_dict()
        print("Dict in build_book:" + str(dic))
        print("")
        
        book = {}
        
        book['action'] = 'new-book'
        
        isbn = find_isbn(unicode(dic['identifiers']))
        if isbn:
		book['isbn'] = unicode(isbn)
	elif indata:
		 if len(indata) == 13:
			book['isbn'] = unicode(indata)
	else:
		comment("No ISBN found.")
		return False
	
        book['title'] = unicode(entry.dc_title[0].text)

        set_value(book, dic, 'category')

	if len(entry.dc_title) > 0:
		book['subtitle'] = unicode(''.join(map(lambda x: x.text, entry.dc_title[1:])))
	
        authors = build_authors(dbconnection, dic)
        book['persons'] = {}
        book['persons']['author'] = [author['id'] for author in authors]
        
        if 'publishers' in dic:
		book['publisher'] = unicode(','.join(dic['publishers']))

        if 'date' in dic:
		book['published_year'] = int(dic['date'][:4])
	
        set_value(book, dic, 'edition')

        book['num_pages'] = find_page_number(dic)
        
        set_value(book, dic, 'series')

        book['description'] = unicode(find_description(dic))
	
        book['references'] = {}
        book['references']['google-books'] = [unicode(dic['preview'])]
        
        global comments
        book['comment'] = comments
        comments = ''

        return book

def comment(comm):
        global comments
        comments += u'%s\n' % comm

def set_value(book, dic, key):
        if key in dic:
                book[key] = unicode(dic[key])
        else: 
                book[key] = None


def find_description(dic):
	if 'description' in dic:
		return unescape(dic['description'])
	else:
		return ""

def find_page_number(dic):
	if 'format' in dic:
		for item in dic['format']:
			if 'pages' in item:
				return int(re.findall(r'[0-9]+',item)[0])
		return None
	else:
		return None

def find_isbn(identifiers):
	for pair in identifiers:
		if pair[0] =='ISBN' and len(pair[1])==13:
			return pair[1]
	return False

def build_authors(dbconnection, dictionary):
	if 'authors' in dictionary:
		author_list = []
		for author in dictionary['authors']:
			author_list.append(get_or_create_author(dbconnection, author))
		return author_list
	else:
                comment("No authors found.")
		return []

def get_or_create_author(dbconnection, author_name):
        author = {}
        author['action'] = 'new-person'
	names = author_name.split()
	first = ' '.join(names[:-1])
	last = names[-1]
        author['first_name'] = first
        author['last_name'] = last

        cursor = dbconnection.cursor()
        query = "SELECT id FROM person WHERE lastname=%(last)s OR firstname=%(first)s"
        cursor.execute(query, {'last':last, 'first':first} )
	candidates = fetchall_dict(cursor)
        
        if len(candidates) == 1:
            return candidates[0]
        if len(candidates) > 1:
            # TODO: TEST
            print("Found several candidates:")
            for i in range(candidates): 
                print "%d:  %s" % (i, 
                        feed.entry[i].dc_title[0].text)
            candno = (int)(raw_input("Which candidate do you want? [0-%d] " % (len(candidates) - 1)))
            return candidates[candno]
        else:
            print("No candidate found for " + author_name + ", making new person")
            newid = ''.join([i[0] for i in names]).lower()
            # Check if id already exists 
            idq = 


#Cargo-cult coded function to unescape special XML characters

def select_from_list(list, attributes=False, item_name=""):
	if len(list) == 0:
		return None
	elif len(list) == 1:
                return list[0]
	else:
		return None

def unescape(s):
	want_unicode = False
	if isinstance(s, unicode):
		s = s.encode("utf-8")
		want_unicode = True

	# the rest of this assumes that `s` is UTF-8
	list = []

	# create and initialize a parser object
	p = xml.parsers.expat.ParserCreate("utf-8")
	p.buffer_text = True
	p.returns_unicode = want_unicode
	p.CharacterDataHandler = list.append

	# parse the data wrapped in a dummy element
	# (needed so the "document" is well-formed)
	p.Parse("<e>", 0)
	p.Parse(s, 0)
	p.Parse("</e>", 1)

	# join the extracted strings and return
	es = ""
	if want_unicode:
		es = u""
	return es.join(list)


suggest_book(connection)
Opprettet google_interface google_interface.py inneholder funksjonalitet som svarer til kommandoen "worblehat suggest-book-data" 2011-10-08 16:08:15 +02:00			`#!/usr/bin/python`

			`import os`
			`from gdata.books.service import BookService`
			`import xml.parsers.expat`
			`import readline`
			`import re`
			`import random`
			`import pgdb`
			`import sys`
			`from fileformat import *`
			`from util import *`

			`exit_commands = ['exit', 'abort', 'quit', 'bye', 'eat flaming death', 'q']`

			`comments = ""`

			`# midlertidig`
			`connection = pgdb.connect(database='oysteini_pbb2',`
			`user='oysteini_pbb',`
			`password='lio5Aide',`
			`host='postgres.pvv.ntnu.no');`

			`def suggest_book(dbconnection, tmp_file=False):`
			`service = BookService(source='Programvareverkstedet - Worblehat - 0.1a ')`
			`action_list = []`
			`print("Enter ISBN number(s), end with eof")`
			`for indata in sys.stdin:`
			`if indata in exit_commands:`
			`print("aborted")`
			`break`
			`if book_in_db(dbconnection, indata):`
			`action_list.append("# Book with isbn: " + str(indata) + " is already in DB, skipped")`
			`else:`
			`feed = service.search_by_keyword('isbn='+indata)`
			`if feed.entry:`
			`# action_list is passed to build_book to fill in missing authors`
			`action_list.append(build_book(dbconnection, feed.entry[0], indata, action_list))`
			`else:`
			`print("No items found")`

			`if tmp_file:`
			`# TODO: write to tmp file`
			`else:`
			`print(write_actionlist(action_list))`

			`def book_in_db(dbconnection, isbn):`
			`cursor = dbconnection.cursor()`
			`query = "SELECT book FROM books WHERE lastname=%(last)s OR firstname=%(first)s"`
			`cursor.execute(query, {'last':last, 'first':first} )`
			`candidates = fetchall_dict(cursor)`
			`pass`

			`def found_item(entry, indata):`
			`print "Found: "+entry.dc_title[0].text`
			`build_book(entry, indata)`

			`def build_book(dbconnection, entry, indata=False):`
			`dic = entry.to_dict()`
			`print("Dict in build_book:" + str(dic))`
			`print("")`

			`book = {}`

			`book['action'] = 'new-book'`

			`isbn = find_isbn(unicode(dic['identifiers']))`
			`if isbn:`
			`book['isbn'] = unicode(isbn)`
			`elif indata:`
			`if len(indata) == 13:`
			`book['isbn'] = unicode(indata)`
			`else:`
			`comment("No ISBN found.")`
			`return False`

			`book['title'] = unicode(entry.dc_title[0].text)`

			`set_value(book, dic, 'category')`

			`if len(entry.dc_title) > 0:`
			`book['subtitle'] = unicode(''.join(map(lambda x: x.text, entry.dc_title[1:])))`

			`authors = build_authors(dbconnection, dic)`
			`book['persons'] = {}`
			`book['persons']['author'] = [author['id'] for author in authors]`

			`if 'publishers' in dic:`
			`book['publisher'] = unicode(','.join(dic['publishers']))`

			`if 'date' in dic:`
			`book['published_year'] = int(dic['date'][:4])`

			`set_value(book, dic, 'edition')`

			`book['num_pages'] = find_page_number(dic)`

			`set_value(book, dic, 'series')`

			`book['description'] = unicode(find_description(dic))`

			`book['references'] = {}`
			`book['references']['google-books'] = [unicode(dic['preview'])]`

			`global comments`
			`book['comment'] = comments`
			`comments = ''`

			`return book`

			`def comment(comm):`
			`global comments`
			`comments += u'%s\n' % comm`

			`def set_value(book, dic, key):`
			`if key in dic:`
			`book[key] = unicode(dic[key])`
			`else:`
			`book[key] = None`


			`def find_description(dic):`
			`if 'description' in dic:`
			`return unescape(dic['description'])`
			`else:`
			`return ""`

			`def find_page_number(dic):`
			`if 'format' in dic:`
			`for item in dic['format']:`
			`if 'pages' in item:`
			`return int(re.findall(r'[0-9]+',item)[0])`
			`return None`
			`else:`
			`return None`

			`def find_isbn(identifiers):`
			`for pair in identifiers:`
			`if pair[0] =='ISBN' and len(pair[1])==13:`
			`return pair[1]`
			`return False`

			`def build_authors(dbconnection, dictionary):`
			`if 'authors' in dictionary:`
			`author_list = []`
			`for author in dictionary['authors']:`
			`author_list.append(get_or_create_author(dbconnection, author))`
			`return author_list`
			`else:`
			`comment("No authors found.")`
			`return []`

			`def get_or_create_author(dbconnection, author_name):`
			`author = {}`
			`author['action'] = 'new-person'`
			`names = author_name.split()`
			`first = ' '.join(names[:-1])`
			`last = names[-1]`
			`author['first_name'] = first`
			`author['last_name'] = last`

			`cursor = dbconnection.cursor()`
			`query = "SELECT id FROM person WHERE lastname=%(last)s OR firstname=%(first)s"`
			`cursor.execute(query, {'last':last, 'first':first} )`
			`candidates = fetchall_dict(cursor)`

			`if len(candidates) == 1:`
			`return candidates[0]`
			`if len(candidates) > 1:`
			`# TODO: TEST`
			`print("Found several candidates:")`
			`for i in range(candidates):`
			`print "%d: %s" % (i,`
			`feed.entry[i].dc_title[0].text)`
			`candno = (int)(raw_input("Which candidate do you want? [0-%d] " % (len(candidates) - 1)))`
			`return candidates[candno]`
			`else:`
			`print("No candidate found for " + author_name + ", making new person")`
			`newid = ''.join([i[0] for i in names]).lower()`
			`# Check if id already exists`
			`idq =`


			`#Cargo-cult coded function to unescape special XML characters`

			`def select_from_list(list, attributes=False, item_name=""):`
			`if len(list) == 0:`
			`return None`
			`elif len(list) == 1:`
			`return list[0]`
			`else:`
			`return None`

			`def unescape(s):`
			`want_unicode = False`
			`if isinstance(s, unicode):`
			`s = s.encode("utf-8")`
			`want_unicode = True`

			# the rest of this assumes that `s` is UTF-8
			`list = []`

			`# create and initialize a parser object`
			`p = xml.parsers.expat.ParserCreate("utf-8")`
			`p.buffer_text = True`
			`p.returns_unicode = want_unicode`
			`p.CharacterDataHandler = list.append`

			`# parse the data wrapped in a dummy element`
			`# (needed so the "document" is well-formed)`
			`p.Parse("<e>", 0)`
			`p.Parse(s, 0)`
			`p.Parse("</e>", 1)`

			`# join the extracted strings and return`
			`es = ""`
			`if want_unicode:`
			`es = u""`
			`return es.join(list)`


			`suggest_book(connection)`