From 21755a5bde1043fe80d4c42173bbc2dbc4e73535 Mon Sep 17 00:00:00 2001 From: almelid Date: Sat, 25 Sep 2010 16:03:23 +0000 Subject: [PATCH] =?UTF-8?q?F=C3=A5tt=20build=5Fbook=20til=20=C3=A5=20funge?= =?UTF-8?q?re.=20Denne=20sp=C3=B8r=20n=C3=A5=20brukeren=20om=20ISBN,=20byg?= =?UTF-8?q?ger=20en=20bok,=20og=20dytter=20denne=20inn=20i=20databasen.=20?= =?UTF-8?q?Ingen=20fornuftig=20sjekking=20av=20dataene=20foreg=C3=A5r=20s?= =?UTF-8?q?=C3=A5=20langt.=20Forfattere=20og=20relasjoner=20bygges=20etter?= =?UTF-8?q?=20behov,=20og=20stort=20sett=20uten=20=C3=A5=20sp=C3=B8rre=20b?= =?UTF-8?q?rukeren.=20Ogs=C3=A5=20dette=20m=C3=A5=20endres=20p=C3=A5.=20En?= =?UTF-8?q?=20god=20del=20generalisering=20er=20n=C3=B8dvendig=20for=20?= =?UTF-8?q?=C3=A5=20gj=C3=B8re=20denne=20koden=20forst=C3=A5elig.=20=20=20?= =?UTF-8?q?=20=5F=5F=5F=5F=5F=20=20=20/=20=20=20=20=20\=20=20=20vvvvvvv=20?= =?UTF-8?q?=20/|=5F=5F/|=20=20=20=20=20=20=20I=20=20=20/O,O=20=20=20|=20?= =?UTF-8?q?=20=20=20=20=20=20I=20/=5F=5F=5F=5F=5F=20=20=20|=20=20=20=20=20?= =?UTF-8?q?=20/|/|=20=20=20=20=20=20J|/^=20^=20^=20\=20=20|=20=20=20=20/00?= =?UTF-8?q?=20=20|=20=20=20=20=5F//|=20=20=20=20=20=20=20|^=20^=20^=20^=20?= =?UTF-8?q?|W|=20=20=20|/^^\=20|=20=20=20/oo=20|=20=20=20=20=20=20=20=20\m?= =?UTF-8?q?=5F=5F=5Fm=5F=5F|=5F|=20=20=20=20\m=5Fm=5F|=20=20=20\mm=5F|=20?= =?UTF-8?q?=20The=20totoros=20approve=20these=20changes=20...?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- python/google_interface.py | 107 +++++++++++++++++++++++++++++++---- python/web/library/models.py | 2 +- 2 files changed, 97 insertions(+), 12 deletions(-) diff --git a/python/google_interface.py b/python/google_interface.py index 0a7de76..d2d2652 100644 --- a/python/google_interface.py +++ b/python/google_interface.py @@ -2,7 +2,11 @@ from web.library.models import * from gdata.books.service import BookService -from xml.dom import minidom +#from dibbler_snippet import ConfirmMenu +#from xml.dom import minidom +#from xml.sax.saxutils import unescape +import xml.parsers.expat +import readline import re exit_commands = ['exit', 'abort', 'quit', 'bye', 'eat flaming death', 'q'] @@ -24,7 +28,7 @@ def get_book_loop(): def build_book(entry, input=False): dic = entry.to_dict() - print dic +# print dic # print entry b = Book(title=entry.dc_title[0].text) if len(entry.dc_title) > 0: @@ -38,21 +42,23 @@ def build_book(entry, input=False): else: print "No ISBN found" return False -# if 'description' in dic: -# b.description = dic['description'] - b.description = find_description(entry) + b.description = find_description(dic) + print entry.description if 'date' in dic: b.published_year = int(dic['date'][:4]) if 'publishers' in dic: b.publisher = ','.join(dic['publishers']) b.num_pages = find_page_number(dic) + new_objects = build_authors(b, dic) b.full_print() + b.save() + for object in new_objects: + object.save() -def find_description(entry): - if entry.description: - d = minidom.parseString(str(entry.description)) - print d.childNodes[0].childNodes[0].data - return "" + +def find_description(dic): + if 'description' in dic: + return unescape(dic['description']) else: return "" @@ -70,6 +76,85 @@ def find_isbn(identifiers): if pair[0] =='ISBN' and len(pair[1])==13: return pair[1] return False - + +def build_authors(book, dictionary): + if 'authors' in dictionary: + author_list = [] + for author in dictionary['authors']: + author_list.append(get_or_create_author(author)) + relation_list = [] + auth_rel = Relation.objects.get_or_create(name="Author")[0] + for author in author_list: + relation_list.append(BookPerson(book=book,person=author,relation=auth_rel)) + return author_list+relation_list + else: + print "No authors found" + return [] + +def get_or_create_author(author_name): + print "Processing author: ", author_name + names = author_name.split() + first = ' '.join(names[:-1]) + last = names[-1] + candidates = Person.objects.filter(first_name__contains=names[0],last_name=last) + selected = select_from_list(candidates, attributes=['first_name','last_name'], item_name='author') + if selected: + return selected + else: + print "No author found, creating author" + newid = ''.join([i[0] for i in names]).lower() + print newid + while True: + existing = Person.objects.filter(id=newid) + if len(existing)==0: + return Person(first_name=first, last_name=last, id=newid) + newid = raw_input("Another authour found with same intials, please suggest an id> ") + + +#Cargo-cult coded function to unescape special XML characters + +def select_from_list(list, attributes=False, item_name=""): + if len(list) == 0: + print "No candidate %sfound" %(item_name+' ') + return None + elif len(list) == 1: + if attributes: + answer = raw_input(str("Found one %s: %s. Use this? [y]/n> " %(item_name,' '.join([eval("list[0]."+attribute) for attribute in attributes])))) + else: + answer = raw_input(str("Found one %s: %s. Use this? [y]/n> " %(item_name,list[0]))) + if answer in ['yes', 'y']: + return list[0] + else: + return None + else: + print "several candidates found" + return None + +def unescape(s): + want_unicode = False + if isinstance(s, unicode): + s = s.encode("utf-8") + want_unicode = True + + # the rest of this assumes that `s` is UTF-8 + list = [] + + # create and initialize a parser object + p = xml.parsers.expat.ParserCreate("utf-8") + p.buffer_text = True + p.returns_unicode = want_unicode + p.CharacterDataHandler = list.append + + # parse the data wrapped in a dummy element + # (needed so the "document" is well-formed) + p.Parse("", 0) + p.Parse(s, 0) + p.Parse("", 1) + + # join the extracted strings and return + es = "" + if want_unicode: + es = u"" + return es.join(list) get_book_loop() diff --git a/python/web/library/models.py b/python/web/library/models.py index e6cd4e7..f608fee 100644 --- a/python/web/library/models.py +++ b/python/web/library/models.py @@ -25,7 +25,7 @@ class Book(models.Model): id = models.CharField(max_length=255) title = models.CharField(max_length=511) subtitle = models.CharField(max_length=511, null=True, blank=True) - category = models.ForeignKey(Category) + category = models.ForeignKey(Category, null=True, blank=True) publisher = models.CharField(max_length=255, null=True, blank=True) published_year = models.IntegerField(null=True, blank=True) edition = models.IntegerField(null=True, blank=True)