Fått build_book til å fungere. Denne spør nå brukeren om ISBN,

bygger en bok, og dytter denne inn i databasen. Ingen fornuftig sjekking av dataene foregår så langt. Forfattere og relasjoner bygges etter behov, og stort sett uten å spørre brukeren. Også dette må endres på. En god del generalisering er nødvendig for å gjøre denne koden forståelig. _____ / \ vvvvvvv /|__/| I /O,O | I /_____ | /|/| J|/^ ^ ^ \ | /00 | _//| |^ ^ ^ ^ |W| |/^^\ | /oo | \m___m__|_| \m_m_| \mm_| The totoros approve these changes ...
2010-09-25 16:03:23 +00:00
parent 6b6f02017a
commit 21755a5bde
2 changed files with 97 additions and 12 deletions
@@ -2,7 +2,11 @@

 from web.library.models import *
 from gdata.books.service import BookService
-from xml.dom import minidom
+#from dibbler_snippet import ConfirmMenu
+#from xml.dom import minidom
+#from xml.sax.saxutils import unescape
+import xml.parsers.expat
+import readline
 import re

 exit_commands = ['exit', 'abort', 'quit', 'bye', 'eat flaming death', 'q']
@@ -24,7 +28,7 @@ def get_book_loop():

 def build_book(entry, input=False):
 	dic = entry.to_dict()
-	print dic
+#	print dic
 #	print entry
 	b = Book(title=entry.dc_title[0].text)
 	if len(entry.dc_title) > 0:
@@ -38,21 +42,23 @@ def build_book(entry, input=False):
 	else:
 		print "No ISBN found"
 		return False
-#	if 'description' in dic:
-#		b.description = dic['description']
-	b.description = find_description(entry)
+	b.description = find_description(dic)
+	print entry.description
 	if 'date' in dic:
 		b.published_year = int(dic['date'][:4])
 	if 'publishers' in dic:
 		b.publisher = ','.join(dic['publishers'])
 	b.num_pages = find_page_number(dic)
+	new_objects = build_authors(b, dic)
 	b.full_print()
+	b.save()
+	for object in new_objects:
+		object.save()

-def find_description(entry):
-	if entry.description:
-		d = minidom.parseString(str(entry.description))
-		print d.childNodes[0].childNodes[0].data
-		return ""
+
+def find_description(dic):
+	if 'description' in dic:
+		return unescape(dic['description'])
 	else:
 		return ""

@@ -70,6 +76,85 @@ def find_isbn(identifiers):
 		if pair[0] =='ISBN' and len(pair[1])==13:
 			return pair[1]
 	return False
-			
+
+def build_authors(book, dictionary):
+	if 'authors' in dictionary:
+		author_list = []
+		for author in dictionary['authors']:
+			author_list.append(get_or_create_author(author))
+		relation_list = []
+		auth_rel = Relation.objects.get_or_create(name="Author")[0]
+		for author in author_list:
+			relation_list.append(BookPerson(book=book,person=author,relation=auth_rel))
+		return author_list+relation_list
+	else:
+		print "No authors found"
+		return []
+
+def get_or_create_author(author_name):
+	print "Processing author: ", author_name
+	names = author_name.split()
+	first = ' '.join(names[:-1])
+	last = names[-1]
+	candidates = Person.objects.filter(first_name__contains=names[0],last_name=last)
+	selected = select_from_list(candidates, attributes=['first_name','last_name'], item_name='author')
+	if selected:
+		return selected
+	else:
+		print "No author found, creating author"
+		newid = ''.join([i[0] for i in names]).lower()
+		print newid
+		while True:	
+			existing = Person.objects.filter(id=newid)
+			if len(existing)==0:
+				return Person(first_name=first, last_name=last, id=newid)	
+			newid = raw_input("Another authour found with same intials, please suggest an id> ")
+
+
+#Cargo-cult coded function to unescape special XML characters
+
+def select_from_list(list, attributes=False, item_name=""):
+	if len(list) == 0:
+		print "No candidate %sfound" %(item_name+' ')
+		return None
+	elif len(list) == 1:
+		if attributes:
+			answer = raw_input(str("Found one %s: %s. Use this? [y]/n> " %(item_name,' '.join([eval("list[0]."+attribute) for attribute in attributes]))))
+		else:
+			answer = raw_input(str("Found one %s: %s. Use this? [y]/n> " %(item_name,list[0])))
+		if answer in ['yes', 'y']:
+			return list[0]
+		else:
+			return None
+	else:
+		print "several candidates found"
+		return None
+
+def unescape(s):
+	want_unicode = False
+	if isinstance(s, unicode):
+		s = s.encode("utf-8")
+		want_unicode = True
+
+	# the rest of this assumes that `s` is UTF-8
+	list = []
+
+	# create and initialize a parser object
+	p = xml.parsers.expat.ParserCreate("utf-8")
+	p.buffer_text = True
+	p.returns_unicode = want_unicode
+	p.CharacterDataHandler = list.append
+
+	# parse the data wrapped in a dummy element
+	# (needed so the "document" is well-formed)
+	p.Parse("<e>", 0)
+	p.Parse(s, 0)
+	p.Parse("</e>", 1)
+
+	# join the extracted strings and return
+	es = ""
+	if want_unicode:
+		es = u""
+	return es.join(list)

 get_book_loop()
@@ -25,7 +25,7 @@ class Book(models.Model):
 	id = models.CharField(max_length=255)
 	title = models.CharField(max_length=511)
 	subtitle = models.CharField(max_length=511, null=True, blank=True)
-	category = models.ForeignKey(Category)
+	category = models.ForeignKey(Category, null=True, blank=True)
 	publisher = models.CharField(max_length=255, null=True, blank=True)
 	published_year = models.IntegerField(null=True, blank=True)
 	edition = models.IntegerField(null=True, blank=True)