diff --git a/python/google_interface.py b/python/google_interface.py index 0a7de76..d2d2652 100644 --- a/python/google_interface.py +++ b/python/google_interface.py @@ -2,7 +2,11 @@ from web.library.models import * from gdata.books.service import BookService -from xml.dom import minidom +#from dibbler_snippet import ConfirmMenu +#from xml.dom import minidom +#from xml.sax.saxutils import unescape +import xml.parsers.expat +import readline import re exit_commands = ['exit', 'abort', 'quit', 'bye', 'eat flaming death', 'q'] @@ -24,7 +28,7 @@ def get_book_loop(): def build_book(entry, input=False): dic = entry.to_dict() - print dic +# print dic # print entry b = Book(title=entry.dc_title[0].text) if len(entry.dc_title) > 0: @@ -38,21 +42,23 @@ def build_book(entry, input=False): else: print "No ISBN found" return False -# if 'description' in dic: -# b.description = dic['description'] - b.description = find_description(entry) + b.description = find_description(dic) + print entry.description if 'date' in dic: b.published_year = int(dic['date'][:4]) if 'publishers' in dic: b.publisher = ','.join(dic['publishers']) b.num_pages = find_page_number(dic) + new_objects = build_authors(b, dic) b.full_print() + b.save() + for object in new_objects: + object.save() -def find_description(entry): - if entry.description: - d = minidom.parseString(str(entry.description)) - print d.childNodes[0].childNodes[0].data - return "" + +def find_description(dic): + if 'description' in dic: + return unescape(dic['description']) else: return "" @@ -70,6 +76,85 @@ def find_isbn(identifiers): if pair[0] =='ISBN' and len(pair[1])==13: return pair[1] return False - + +def build_authors(book, dictionary): + if 'authors' in dictionary: + author_list = [] + for author in dictionary['authors']: + author_list.append(get_or_create_author(author)) + relation_list = [] + auth_rel = Relation.objects.get_or_create(name="Author")[0] + for author in author_list: + relation_list.append(BookPerson(book=book,person=author,relation=auth_rel)) + return author_list+relation_list + else: + print "No authors found" + return [] + +def get_or_create_author(author_name): + print "Processing author: ", author_name + names = author_name.split() + first = ' '.join(names[:-1]) + last = names[-1] + candidates = Person.objects.filter(first_name__contains=names[0],last_name=last) + selected = select_from_list(candidates, attributes=['first_name','last_name'], item_name='author') + if selected: + return selected + else: + print "No author found, creating author" + newid = ''.join([i[0] for i in names]).lower() + print newid + while True: + existing = Person.objects.filter(id=newid) + if len(existing)==0: + return Person(first_name=first, last_name=last, id=newid) + newid = raw_input("Another authour found with same intials, please suggest an id> ") + + +#Cargo-cult coded function to unescape special XML characters + +def select_from_list(list, attributes=False, item_name=""): + if len(list) == 0: + print "No candidate %sfound" %(item_name+' ') + return None + elif len(list) == 1: + if attributes: + answer = raw_input(str("Found one %s: %s. Use this? [y]/n> " %(item_name,' '.join([eval("list[0]."+attribute) for attribute in attributes])))) + else: + answer = raw_input(str("Found one %s: %s. Use this? [y]/n> " %(item_name,list[0]))) + if answer in ['yes', 'y']: + return list[0] + else: + return None + else: + print "several candidates found" + return None + +def unescape(s): + want_unicode = False + if isinstance(s, unicode): + s = s.encode("utf-8") + want_unicode = True + + # the rest of this assumes that `s` is UTF-8 + list = [] + + # create and initialize a parser object + p = xml.parsers.expat.ParserCreate("utf-8") + p.buffer_text = True + p.returns_unicode = want_unicode + p.CharacterDataHandler = list.append + + # parse the data wrapped in a dummy element + # (needed so the "document" is well-formed) + p.Parse("", 0) + p.Parse(s, 0) + p.Parse("", 1) + + # join the extracted strings and return + es = "" + if want_unicode: + es = u"" + return es.join(list) get_book_loop() diff --git a/python/web/library/models.py b/python/web/library/models.py index e6cd4e7..f608fee 100644 --- a/python/web/library/models.py +++ b/python/web/library/models.py @@ -25,7 +25,7 @@ class Book(models.Model): id = models.CharField(max_length=255) title = models.CharField(max_length=511) subtitle = models.CharField(max_length=511, null=True, blank=True) - category = models.ForeignKey(Category) + category = models.ForeignKey(Category, null=True, blank=True) publisher = models.CharField(max_length=255, null=True, blank=True) published_year = models.IntegerField(null=True, blank=True) edition = models.IntegerField(null=True, blank=True)