From 21755a5bde1043fe80d4c42173bbc2dbc4e73535 Mon Sep 17 00:00:00 2001
From: almelid <almelid@pvv.ntnu.no>
Date: Sat, 25 Sep 2010 16:03:23 +0000
Subject: [PATCH] =?UTF-8?q?F=C3=A5tt=20build=5Fbook=20til=20=C3=A5=20funge?=
 =?UTF-8?q?re.=20Denne=20sp=C3=B8r=20n=C3=A5=20brukeren=20om=20ISBN,=20byg?=
 =?UTF-8?q?ger=20en=20bok,=20og=20dytter=20denne=20inn=20i=20databasen.=20?=
 =?UTF-8?q?Ingen=20fornuftig=20sjekking=20av=20dataene=20foreg=C3=A5r=20s?=
 =?UTF-8?q?=C3=A5=20langt.=20Forfattere=20og=20relasjoner=20bygges=20etter?=
 =?UTF-8?q?=20behov,=20og=20stort=20sett=20uten=20=C3=A5=20sp=C3=B8rre=20b?=
 =?UTF-8?q?rukeren.=20Ogs=C3=A5=20dette=20m=C3=A5=20endres=20p=C3=A5.=20En?=
 =?UTF-8?q?=20god=20del=20generalisering=20er=20n=C3=B8dvendig=20for=20?=
 =?UTF-8?q?=C3=A5=20gj=C3=B8re=20denne=20koden=20forst=C3=A5elig.=20=20=20?=
 =?UTF-8?q?=20=5F=5F=5F=5F=5F=20=20=20/=20=20=20=20=20\=20=20=20vvvvvvv=20?=
 =?UTF-8?q?=20/|=5F=5F/|=20=20=20=20=20=20=20I=20=20=20/O,O=20=20=20|=20?=
 =?UTF-8?q?=20=20=20=20=20=20I=20/=5F=5F=5F=5F=5F=20=20=20|=20=20=20=20=20?=
 =?UTF-8?q?=20/|/|=20=20=20=20=20=20J|/^=20^=20^=20\=20=20|=20=20=20=20/00?=
 =?UTF-8?q?=20=20|=20=20=20=20=5F//|=20=20=20=20=20=20=20|^=20^=20^=20^=20?=
 =?UTF-8?q?|W|=20=20=20|/^^\=20|=20=20=20/oo=20|=20=20=20=20=20=20=20=20\m?=
 =?UTF-8?q?=5F=5F=5Fm=5F=5F|=5F|=20=20=20=20\m=5Fm=5F|=20=20=20\mm=5F|=20?=
 =?UTF-8?q?=20The=20totoros=20approve=20these=20changes=20...?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 python/google_interface.py   | 107 +++++++++++++++++++++++++++++++----
 python/web/library/models.py |   2 +-
 2 files changed, 97 insertions(+), 12 deletions(-)

diff --git a/python/google_interface.py b/python/google_interface.py
index 0a7de76..d2d2652 100644
--- a/python/google_interface.py
+++ b/python/google_interface.py
@@ -2,7 +2,11 @@
 
 from web.library.models import *
 from gdata.books.service import BookService
-from xml.dom import minidom
+#from dibbler_snippet import ConfirmMenu
+#from xml.dom import minidom
+#from xml.sax.saxutils import unescape
+import xml.parsers.expat
+import readline
 import re
 
 exit_commands = ['exit', 'abort', 'quit', 'bye', 'eat flaming death', 'q']
@@ -24,7 +28,7 @@ def get_book_loop():
 
 def build_book(entry, input=False):
 	dic = entry.to_dict()
-	print dic
+#	print dic
 #	print entry
 	b = Book(title=entry.dc_title[0].text)
 	if len(entry.dc_title) > 0:
@@ -38,21 +42,23 @@ def build_book(entry, input=False):
 	else:
 		print "No ISBN found"
 		return False
-#	if 'description' in dic:
-#		b.description = dic['description']
-	b.description = find_description(entry)
+	b.description = find_description(dic)
+	print entry.description
 	if 'date' in dic:
 		b.published_year = int(dic['date'][:4])
 	if 'publishers' in dic:
 		b.publisher = ','.join(dic['publishers'])
 	b.num_pages = find_page_number(dic)
+	new_objects = build_authors(b, dic)
 	b.full_print()
+	b.save()
+	for object in new_objects:
+		object.save()
 
-def find_description(entry):
-	if entry.description:
-		d = minidom.parseString(str(entry.description))
-		print d.childNodes[0].childNodes[0].data
-		return ""
+
+def find_description(dic):
+	if 'description' in dic:
+		return unescape(dic['description'])
 	else:
 		return ""
 
@@ -70,6 +76,85 @@ def find_isbn(identifiers):
 		if pair[0] =='ISBN' and len(pair[1])==13:
 			return pair[1]
 	return False
-			
+
+def build_authors(book, dictionary):
+	if 'authors' in dictionary:
+		author_list = []
+		for author in dictionary['authors']:
+			author_list.append(get_or_create_author(author))
+		relation_list = []
+		auth_rel = Relation.objects.get_or_create(name="Author")[0]
+		for author in author_list:
+			relation_list.append(BookPerson(book=book,person=author,relation=auth_rel))
+		return author_list+relation_list
+	else:
+		print "No authors found"
+		return []
+
+def get_or_create_author(author_name):
+	print "Processing author: ", author_name
+	names = author_name.split()
+	first = ' '.join(names[:-1])
+	last = names[-1]
+	candidates = Person.objects.filter(first_name__contains=names[0],last_name=last)
+	selected = select_from_list(candidates, attributes=['first_name','last_name'], item_name='author')
+	if selected:
+		return selected
+	else:
+		print "No author found, creating author"
+		newid = ''.join([i[0] for i in names]).lower()
+		print newid
+		while True:	
+			existing = Person.objects.filter(id=newid)
+			if len(existing)==0:
+				return Person(first_name=first, last_name=last, id=newid)	
+			newid = raw_input("Another authour found with same intials, please suggest an id> ")
+
+
+#Cargo-cult coded function to unescape special XML characters
+
+def select_from_list(list, attributes=False, item_name=""):
+	if len(list) == 0:
+		print "No candidate %sfound" %(item_name+' ')
+		return None
+	elif len(list) == 1:
+		if attributes:
+			answer = raw_input(str("Found one %s: %s. Use this? [y]/n> " %(item_name,' '.join([eval("list[0]."+attribute) for attribute in attributes]))))
+		else:
+			answer = raw_input(str("Found one %s: %s. Use this? [y]/n> " %(item_name,list[0])))
+		if answer in ['yes', 'y']:
+			return list[0]
+		else:
+			return None
+	else:
+		print "several candidates found"
+		return None
+
+def unescape(s):
+	want_unicode = False
+	if isinstance(s, unicode):
+		s = s.encode("utf-8")
+		want_unicode = True
+
+	# the rest of this assumes that `s` is UTF-8
+	list = []
+
+	# create and initialize a parser object
+	p = xml.parsers.expat.ParserCreate("utf-8")
+	p.buffer_text = True
+	p.returns_unicode = want_unicode
+	p.CharacterDataHandler = list.append
+
+	# parse the data wrapped in a dummy element
+	# (needed so the "document" is well-formed)
+	p.Parse("<e>", 0)
+	p.Parse(s, 0)
+	p.Parse("</e>", 1)
+
+	# join the extracted strings and return
+	es = ""
+	if want_unicode:
+		es = u""
+	return es.join(list)
 
 get_book_loop()
diff --git a/python/web/library/models.py b/python/web/library/models.py
index e6cd4e7..f608fee 100644
--- a/python/web/library/models.py
+++ b/python/web/library/models.py
@@ -25,7 +25,7 @@ class Book(models.Model):
 	id = models.CharField(max_length=255)
 	title = models.CharField(max_length=511)
 	subtitle = models.CharField(max_length=511, null=True, blank=True)
-	category = models.ForeignKey(Category)
+	category = models.ForeignKey(Category, null=True, blank=True)
 	publisher = models.CharField(max_length=255, null=True, blank=True)
 	published_year = models.IntegerField(null=True, blank=True)
 	edition = models.IntegerField(null=True, blank=True)