2010-09-23 19:04:28 +02:00
|
|
|
#!/usr/bin/python
|
|
|
|
|
2011-03-05 18:20:04 +01:00
|
|
|
import os
|
|
|
|
os.environ['DJANGO_SETTINGS_MODULE']='web.settings'
|
2010-09-23 15:57:37 +02:00
|
|
|
from web.library.models import *
|
|
|
|
from gdata.books.service import BookService
|
2010-09-25 18:03:23 +02:00
|
|
|
import xml.parsers.expat
|
|
|
|
import readline
|
2010-09-24 17:28:09 +02:00
|
|
|
import re
|
2011-03-05 22:51:29 +01:00
|
|
|
import random
|
|
|
|
from web.library.fileformat import *
|
2010-09-23 15:57:37 +02:00
|
|
|
|
|
|
|
exit_commands = ['exit', 'abort', 'quit', 'bye', 'eat flaming death', 'q']
|
|
|
|
|
2011-03-05 22:51:29 +01:00
|
|
|
comments = ""
|
|
|
|
action_list = []
|
|
|
|
|
2010-09-23 15:57:37 +02:00
|
|
|
def get_book_loop():
|
|
|
|
service = BookService(source='Programvareverkstedet - Worblehat - 0.1a ')
|
2011-03-05 22:51:29 +01:00
|
|
|
global action_list
|
|
|
|
|
2010-09-23 15:57:37 +02:00
|
|
|
while True:
|
2011-03-05 18:20:04 +01:00
|
|
|
indata = raw_input('Enter ISBN number> ')
|
|
|
|
if indata in exit_commands:
|
2010-09-23 15:57:37 +02:00
|
|
|
break
|
2011-03-05 18:20:04 +01:00
|
|
|
feed = service.search_by_keyword('isbn='+indata)
|
|
|
|
if feed.entry:
|
|
|
|
if len(feed.entry) == 1:
|
|
|
|
print "Found one book: "+feed.entry[0].dc_title[0].text
|
2011-03-05 22:51:29 +01:00
|
|
|
action_list.append(build_book(feed.entry[0], indata))
|
2011-03-05 18:20:04 +01:00
|
|
|
else:
|
|
|
|
print "Found multiple books: "
|
|
|
|
for i in range(len(feed.entry)): #entry in feed.entry:
|
|
|
|
print "%d: %s" % (i,
|
|
|
|
feed.entry[i].dc_title[0].text)
|
|
|
|
bookno = (int)(raw_input("Which book do you want? [0-%d] " % (len(feed.entry) - 1)))
|
2011-03-05 22:51:29 +01:00
|
|
|
action_list.append(build_book(feed.entry[bookno], indata))
|
2011-03-05 18:20:04 +01:00
|
|
|
else:
|
|
|
|
print "No items found"
|
|
|
|
|
2011-03-05 22:57:11 +01:00
|
|
|
print
|
2011-03-05 22:51:29 +01:00
|
|
|
print write_actionlist(action_list)
|
|
|
|
|
2011-03-05 18:20:04 +01:00
|
|
|
def found_item(entry, indata):
|
|
|
|
print "Found: "+entry.dc_title[0].text
|
|
|
|
build_book(entry, indata)
|
|
|
|
|
|
|
|
def build_book(entry, indata=False):
|
2010-09-23 19:04:28 +02:00
|
|
|
dic = entry.to_dict()
|
2011-03-05 18:20:04 +01:00
|
|
|
|
|
|
|
book = {}
|
|
|
|
|
|
|
|
book['action'] = 'new-book'
|
|
|
|
|
2011-03-05 22:51:29 +01:00
|
|
|
isbn = find_isbn(unicode(dic['identifiers']))
|
2011-03-05 18:20:04 +01:00
|
|
|
if isbn:
|
2011-03-05 22:51:29 +01:00
|
|
|
book['isbn'] = unicode(isbn)
|
2011-03-05 18:20:04 +01:00
|
|
|
elif indata:
|
2011-03-05 22:51:29 +01:00
|
|
|
if len(indata) == 13:
|
|
|
|
book['isbn'] = unicode(indata)
|
2010-09-23 19:04:28 +02:00
|
|
|
else:
|
2011-03-05 22:51:29 +01:00
|
|
|
comment("No ISBN found.")
|
2010-09-23 19:04:28 +02:00
|
|
|
return False
|
2011-03-05 18:20:04 +01:00
|
|
|
|
2011-03-05 22:51:29 +01:00
|
|
|
book['title'] = unicode(entry.dc_title[0].text)
|
|
|
|
|
|
|
|
set_value(book, dic, 'category')
|
|
|
|
|
|
|
|
if len(entry.dc_title) > 0:
|
|
|
|
book['subtitle'] = unicode(''.join(map(lambda x: x.text, entry.dc_title[1:])))
|
2011-03-05 18:20:04 +01:00
|
|
|
|
2011-03-05 22:51:29 +01:00
|
|
|
authors = build_authors(dic)
|
|
|
|
book['persons'] = {}
|
|
|
|
book['persons']['author'] = [author['id'] for author in authors]
|
|
|
|
|
|
|
|
if 'publishers' in dic:
|
|
|
|
book['publisher'] = unicode(','.join(dic['publishers']))
|
|
|
|
|
2011-03-05 18:20:04 +01:00
|
|
|
if 'date' in dic:
|
|
|
|
book['published_year'] = int(dic['date'][:4])
|
|
|
|
|
2011-03-05 22:51:29 +01:00
|
|
|
set_value(book, dic, 'edition')
|
2011-03-05 18:20:04 +01:00
|
|
|
|
|
|
|
book['num_pages'] = find_page_number(dic)
|
|
|
|
|
2011-03-05 22:51:29 +01:00
|
|
|
set_value(book, dic, 'series')
|
|
|
|
|
|
|
|
book['description'] = unicode(find_description(dic))
|
|
|
|
|
2011-03-05 18:20:04 +01:00
|
|
|
book['references'] = {}
|
2011-03-05 22:51:29 +01:00
|
|
|
book['references']['google-books'] = [unicode(dic['preview'])]
|
2011-03-05 18:20:04 +01:00
|
|
|
|
2011-03-05 22:51:29 +01:00
|
|
|
global comments
|
|
|
|
book['comment'] = comments
|
|
|
|
comments = ''
|
2011-03-05 18:20:04 +01:00
|
|
|
|
|
|
|
return book
|
|
|
|
|
2011-03-05 22:51:29 +01:00
|
|
|
def comment(comm):
|
|
|
|
global comments
|
|
|
|
comments += u'%s\n' % comm
|
2011-03-05 18:20:04 +01:00
|
|
|
|
|
|
|
def set_value(book, dic, key):
|
|
|
|
if key in dic:
|
2011-03-05 22:51:29 +01:00
|
|
|
book[key] = unicode(dic[key])
|
2011-03-05 18:20:04 +01:00
|
|
|
else:
|
2011-03-05 22:51:29 +01:00
|
|
|
book[key] = None
|
2010-09-23 19:04:28 +02:00
|
|
|
|
2010-09-25 18:03:23 +02:00
|
|
|
|
|
|
|
def find_description(dic):
|
|
|
|
if 'description' in dic:
|
|
|
|
return unescape(dic['description'])
|
2010-09-24 23:14:54 +02:00
|
|
|
else:
|
|
|
|
return ""
|
|
|
|
|
2010-09-24 17:28:09 +02:00
|
|
|
def find_page_number(dic):
|
|
|
|
if 'format' in dic:
|
|
|
|
for item in dic['format']:
|
|
|
|
if 'pages' in item:
|
|
|
|
return int(re.findall(r'[0-9]+',item)[0])
|
|
|
|
return None
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
|
2010-09-23 19:04:28 +02:00
|
|
|
def find_isbn(identifiers):
|
|
|
|
for pair in identifiers:
|
|
|
|
if pair[0] =='ISBN' and len(pair[1])==13:
|
|
|
|
return pair[1]
|
|
|
|
return False
|
2010-09-25 18:03:23 +02:00
|
|
|
|
2011-03-05 18:20:04 +01:00
|
|
|
def build_authors(dictionary):
|
2010-09-25 18:03:23 +02:00
|
|
|
if 'authors' in dictionary:
|
|
|
|
author_list = []
|
|
|
|
for author in dictionary['authors']:
|
|
|
|
author_list.append(get_or_create_author(author))
|
2011-03-05 18:20:04 +01:00
|
|
|
return author_list
|
2010-09-25 18:03:23 +02:00
|
|
|
else:
|
2011-03-05 22:51:29 +01:00
|
|
|
comment("No authors found.")
|
2010-09-25 18:03:23 +02:00
|
|
|
return []
|
|
|
|
|
|
|
|
def get_or_create_author(author_name):
|
2011-03-05 22:51:29 +01:00
|
|
|
global action_list
|
|
|
|
author = {}
|
|
|
|
author['action'] = 'new-person'
|
2010-09-25 18:03:23 +02:00
|
|
|
names = author_name.split()
|
|
|
|
first = ' '.join(names[:-1])
|
|
|
|
last = names[-1]
|
2011-03-05 22:51:29 +01:00
|
|
|
author['first_name'] = first
|
|
|
|
author['last_name'] = last
|
2010-09-25 18:03:23 +02:00
|
|
|
candidates = Person.objects.filter(first_name__contains=names[0],last_name=last)
|
|
|
|
selected = select_from_list(candidates, attributes=['first_name','last_name'], item_name='author')
|
|
|
|
if selected:
|
|
|
|
return selected
|
|
|
|
else:
|
2011-03-05 22:51:29 +01:00
|
|
|
comment("No author found, creating author.")
|
2010-09-25 18:03:23 +02:00
|
|
|
newid = ''.join([i[0] for i in names]).lower()
|
2011-03-05 22:51:29 +01:00
|
|
|
author['id'] = newid
|
2010-09-25 18:03:23 +02:00
|
|
|
while True:
|
|
|
|
existing = Person.objects.filter(id=newid)
|
|
|
|
if len(existing)==0:
|
2011-03-05 22:51:29 +01:00
|
|
|
action_list.append(author)
|
|
|
|
return author #Person(first_name=first, last_name=last, id=newid)
|
|
|
|
comment("Another author found with same initials, please fix id.")
|
|
|
|
newid = newid+"%f" % random.random()
|
|
|
|
author['id'] = newid
|
2010-09-25 18:03:23 +02:00
|
|
|
|
|
|
|
|
|
|
|
#Cargo-cult coded function to unescape special XML characters
|
|
|
|
|
|
|
|
def select_from_list(list, attributes=False, item_name=""):
|
|
|
|
if len(list) == 0:
|
|
|
|
return None
|
|
|
|
elif len(list) == 1:
|
2011-03-05 22:51:29 +01:00
|
|
|
return list[0]
|
2010-09-25 18:03:23 +02:00
|
|
|
else:
|
|
|
|
return None
|
|
|
|
|
|
|
|
def unescape(s):
|
|
|
|
want_unicode = False
|
|
|
|
if isinstance(s, unicode):
|
|
|
|
s = s.encode("utf-8")
|
|
|
|
want_unicode = True
|
|
|
|
|
|
|
|
# the rest of this assumes that `s` is UTF-8
|
|
|
|
list = []
|
|
|
|
|
|
|
|
# create and initialize a parser object
|
|
|
|
p = xml.parsers.expat.ParserCreate("utf-8")
|
|
|
|
p.buffer_text = True
|
|
|
|
p.returns_unicode = want_unicode
|
|
|
|
p.CharacterDataHandler = list.append
|
|
|
|
|
|
|
|
# parse the data wrapped in a dummy element
|
|
|
|
# (needed so the "document" is well-formed)
|
|
|
|
p.Parse("<e>", 0)
|
|
|
|
p.Parse(s, 0)
|
|
|
|
p.Parse("</e>", 1)
|
|
|
|
|
|
|
|
# join the extracted strings and return
|
|
|
|
es = ""
|
|
|
|
if want_unicode:
|
|
|
|
es = u""
|
|
|
|
return es.join(list)
|
2010-09-23 19:04:28 +02:00
|
|
|
|
|
|
|
get_book_loop()
|