worblehat-old/python/google_interface.py

#!/usr/bin/python

from web.library.models import *
from gdata.books.service import BookService
#from dibbler_snippet import ConfirmMenu
#from xml.dom import minidom
#from xml.sax.saxutils import unescape
import xml.parsers.expat
import readline
import re

exit_commands = ['exit', 'abort', 'quit', 'bye', 'eat flaming death', 'q']

def get_book_loop():
	service = BookService(source='Programvareverkstedet - Worblehat - 0.1a ')
	while True:
		input = raw_input('Enter ISBN number> ')
		if input in exit_commands:
			break
		feed = service.search_by_keyword('isbn='+input)
		if len(feed.entry) == 0:
			print "No items found"
		elif len(feed.entry) == 1:
			print "Found one item: "+feed.entry[0].dc_title[0].text
			b = build_book(feed.entry[0], input)
		else:
			print "Found several items, OWNOES!"

def build_book(entry, input=False):
	dic = entry.to_dict()
#	print dic
#	print entry
	b = Book(title=entry.dc_title[0].text)
	if len(entry.dc_title) > 0:
		b.subtitle = ''.join(map(lambda x: x.text, entry.dc_title[1:]))
	isbn = find_isbn(dic['identifiers'])
	if isbn:
		b.isbn = isbn
	elif input:
		if len(input) == 13:
			b.isbn = input
	else:
		print "No ISBN found"
		return False
	b.description = find_description(dic)
	print entry.description
	if 'date' in dic:
		b.published_year = int(dic['date'][:4])
	if 'publishers' in dic:
		b.publisher = ','.join(dic['publishers'])
	b.num_pages = find_page_number(dic)
	new_objects = build_authors(b, dic)
	b.full_print()
	b.save()
	for object in new_objects:
		object.save()


def find_description(dic):
	if 'description' in dic:
		return unescape(dic['description'])
	else:
		return ""

def find_page_number(dic):
	if 'format' in dic:
		for item in dic['format']:
			if 'pages' in item:
				return int(re.findall(r'[0-9]+',item)[0])
		return None
	else:
		return None

def find_isbn(identifiers):
	for pair in identifiers:
		if pair[0] =='ISBN' and len(pair[1])==13:
			return pair[1]
	return False

def build_authors(book, dictionary):
	if 'authors' in dictionary:
		author_list = []
		for author in dictionary['authors']:
			author_list.append(get_or_create_author(author))
		relation_list = []
		auth_rel = Relation.objects.get_or_create(name="Author")[0]
		for author in author_list:
			relation_list.append(BookPerson(book=book,person=author,relation=auth_rel))
		return author_list+relation_list
	else:
		print "No authors found"
		return []

def get_or_create_author(author_name):
	print "Processing author: ", author_name
	names = author_name.split()
	first = ' '.join(names[:-1])
	last = names[-1]
	candidates = Person.objects.filter(first_name__contains=names[0],last_name=last)
	selected = select_from_list(candidates, attributes=['first_name','last_name'], item_name='author')
	if selected:
		return selected
	else:
		print "No author found, creating author"
		newid = ''.join([i[0] for i in names]).lower()
		print newid
		while True:
			existing = Person.objects.filter(id=newid)
			if len(existing)==0:
				return Person(first_name=first, last_name=last, id=newid)
			newid = raw_input("Another authour found with same intials, please suggest an id> ")


#Cargo-cult coded function to unescape special XML characters

def select_from_list(list, attributes=False, item_name=""):
	if len(list) == 0:
		print "No candidate %sfound" %(item_name+' ')
		return None
	elif len(list) == 1:
		if attributes:
			answer = raw_input(str("Found one %s: %s. Use this? [y]/n> " %(item_name,' '.join([eval("list[0]."+attribute) for attribute in attributes]))))
		else:
			answer = raw_input(str("Found one %s: %s. Use this? [y]/n> " %(item_name,list[0])))
		if answer in ['yes', 'y', '']:
			return list[0]
		else:
			return None
	else:
		print "several candidates found"
		return None

def unescape(s):
	want_unicode = False
	if isinstance(s, unicode):
		s = s.encode("utf-8")
		want_unicode = True

	# the rest of this assumes that `s` is UTF-8
	list = []

	# create and initialize a parser object
	p = xml.parsers.expat.ParserCreate("utf-8")
	p.buffer_text = True
	p.returns_unicode = want_unicode
	p.CharacterDataHandler = list.append

	# parse the data wrapped in a dummy element
	# (needed so the "document" is well-formed)
	p.Parse("<e>", 0)
	p.Parse(s, 0)
	p.Parse("</e>", 1)

	# join the extracted strings and return
	es = ""
	if want_unicode:
		es = u""
	return es.join(list)

get_book_loop()