worblehat-old/python/google_interface.py

#!/usr/bin/python

import os
os.environ['DJANGO_SETTINGS_MODULE']='web.settings'
from web.library.models import *
from gdata.books.service import BookService
import xml.parsers.expat
import readline
import re
import random
from web.library.fileformat import *

exit_commands = ['exit', 'abort', 'quit', 'bye', 'eat flaming death', 'q']

comments = ""
action_list = []

def get_book_loop():
	service = BookService(source='Programvareverkstedet - Worblehat - 0.1a ')
        global action_list

	while True:
		indata = raw_input('Enter ISBN number> ')
		if indata in exit_commands:
			break
		feed = service.search_by_keyword('isbn='+indata)
                if feed.entry:
                        if len(feed.entry) == 1:
                                print "Found one book: "+feed.entry[0].dc_title[0].text
                                action_list.append(build_book(feed.entry[0], indata))
                        else:
                                print "Found multiple books: "
                                for i in range(len(feed.entry)): #entry in feed.entry:
                                        print "%d:  %s" % (i,
                                                        feed.entry[i].dc_title[0].text)
                                bookno = (int)(raw_input("Which book do you want? [0-%d] " % (len(feed.entry) - 1)))
                                action_list.append(build_book(feed.entry[bookno], indata))
                else:
                        print "No items found"

        print
        print write_actionlist(action_list)

def found_item(entry, indata):
        print "Found: "+entry.dc_title[0].text
        build_book(entry, indata)

def build_book(entry, indata=False):
	dic = entry.to_dict()

        book = {}

        book['action'] = 'new-book'

        isbn = find_isbn(unicode(dic['identifiers']))
        if isbn:
		book['isbn'] = unicode(isbn)
	elif indata:
		 if len(indata) == 13:
			book['isbn'] = unicode(indata)
	else:
		comment("No ISBN found.")
		return False

        book['title'] = unicode(entry.dc_title[0].text)

        set_value(book, dic, 'category')

	if len(entry.dc_title) > 0:
		book['subtitle'] = unicode(''.join(map(lambda x: x.text, entry.dc_title[1:])))

        authors = build_authors(dic)
        book['persons'] = {}
        book['persons']['author'] = [author['id'] for author in authors]

        if 'publishers' in dic:
		book['publisher'] = unicode(','.join(dic['publishers']))

        if 'date' in dic:
		book['published_year'] = int(dic['date'][:4])

        set_value(book, dic, 'edition')

        book['num_pages'] = find_page_number(dic)

        set_value(book, dic, 'series')

        book['description'] = unicode(find_description(dic))

        book['references'] = {}
        book['references']['google-books'] = [unicode(dic['preview'])]

        global comments
        book['comment'] = comments
        comments = ''

        return book

def comment(comm):
        global comments
        comments += u'%s\n' % comm

def set_value(book, dic, key):
        if key in dic:
                book[key] = unicode(dic[key])
        else:
                book[key] = None


def find_description(dic):
	if 'description' in dic:
		return unescape(dic['description'])
	else:
		return ""

def find_page_number(dic):
	if 'format' in dic:
		for item in dic['format']:
			if 'pages' in item:
				return int(re.findall(r'[0-9]+',item)[0])
		return None
	else:
		return None

def find_isbn(identifiers):
	for pair in identifiers:
		if pair[0] =='ISBN' and len(pair[1])==13:
			return pair[1]
	return False

def build_authors(dictionary):
	if 'authors' in dictionary:
		author_list = []
		for author in dictionary['authors']:
			author_list.append(get_or_create_author(author))
		return author_list
	else:
                comment("No authors found.")
		return []

def get_or_create_author(author_name):
        global action_list
        author = {}
        author['action'] = 'new-person'
	names = author_name.split()
	first = ' '.join(names[:-1])
	last = names[-1]
        author['first_name'] = first
        author['last_name'] = last
	candidates = Person.objects.filter(first_name__contains=names[0],last_name=last)
	selected = select_from_list(candidates, attributes=['first_name','last_name'], item_name='author')
	if selected:
		return selected
	else:
                comment("No author found, creating author.")
		newid = ''.join([i[0] for i in names]).lower()
                author['id'] = newid
		while True:
			existing = Person.objects.filter(id=newid)
			if len(existing)==0:
                                action_list.append(author)
				return author #Person(first_name=first, last_name=last, id=newid)
                        comment("Another author found with same initials, please fix id.")
                        newid = newid+"%f" % random.random()
                        author['id'] = newid


#Cargo-cult coded function to unescape special XML characters

def select_from_list(list, attributes=False, item_name=""):
	if len(list) == 0:
		return None
	elif len(list) == 1:
                return list[0]
	else:
		return None

def unescape(s):
	want_unicode = False
	if isinstance(s, unicode):
		s = s.encode("utf-8")
		want_unicode = True

	# the rest of this assumes that `s` is UTF-8
	list = []

	# create and initialize a parser object
	p = xml.parsers.expat.ParserCreate("utf-8")
	p.buffer_text = True
	p.returns_unicode = want_unicode
	p.CharacterDataHandler = list.append

	# parse the data wrapped in a dummy element
	# (needed so the "document" is well-formed)
	p.Parse("<e>", 0)
	p.Parse(s, 0)
	p.Parse("</e>", 1)

	# join the extracted strings and return
	es = ""
	if want_unicode:
		es = u""
	return es.join(list)

get_book_loop()