Projects/worblehat-old
Projects
/
worblehat-old
Archived
12
0
Fork 0
This repository has been archived on 2024-07-04. You can view files and clone it, but cannot push or open issues or pull requests.
worblehat-old/python/google_interface.py

191 lines
5.4 KiB
Python

#!/usr/bin/python
import os
os.environ['DJANGO_SETTINGS_MODULE']='web.settings'
from web.library.models import *
from gdata.books.service import BookService
#from dibbler_snippet import ConfirmMenu
#from xml.dom import minidom
#from xml.sax.saxutils import unescape
import xml.parsers.expat
import readline
import re
exit_commands = ['exit', 'abort', 'quit', 'bye', 'eat flaming death', 'q']
def get_book_loop():
service = BookService(source='Programvareverkstedet - Worblehat - 0.1a ')
while True:
indata = raw_input('Enter ISBN number> ')
if indata in exit_commands:
break
feed = service.search_by_keyword('isbn='+indata)
if feed.entry:
if len(feed.entry) == 1:
print "Found one book: "+feed.entry[0].dc_title[0].text
build_book(feed.entry[0], indata)
else:
print "Found multiple books: "
for i in range(len(feed.entry)): #entry in feed.entry:
print "%d: %s" % (i,
# build_authors(feed.entry[i].to_dict())[0],
feed.entry[i].dc_title[0].text)
bookno = (int)(raw_input("Which book do you want? [0-%d] " % (len(feed.entry) - 1)))
build_book(feed.entry[bookno], indata)
else:
print "No items found"
def found_item(entry, indata):
print "Found: "+entry.dc_title[0].text
build_book(entry, indata)
def build_book(entry, indata=False):
dic = entry.to_dict()
book = {}
book['action'] = 'new-book'
book['title'] = entry.dc_title[0].text
if len(entry.dc_title) > 0:
book['subtitle'] = ''.join(map(lambda x: x.text, entry.dc_title[1:]))
isbn = find_isbn(dic['identifiers'])
if isbn:
book['isbn'] = isbn
elif indata:
if len(indata) == 13:
book['isbn'] = indata
else:
print "No ISBN found"
return False
book['description'] = find_description(dic)
if 'date' in dic:
book['published_year'] = int(dic['date'][:4])
if 'publishers' in dic:
book['publisher'] = ','.join(dic['publishers'])
book['num_pages'] = find_page_number(dic)
book['persons'] = {}
book['persons']['author'] = build_authors(dic)
book['references'] = {}
book['references']['google'] = dic['preview']
keys_to_add = ['edition', 'series', 'category', 'thumbnail', 'picture']
for key in keys_to_add:
set_value(book, dic, key)
return book
def set_value(book, dic, key):
if key in dic:
book[key] = dic[key]
else:
book[key] = ''
def find_description(dic):
if 'description' in dic:
return unescape(dic['description'])
else:
return ""
def find_page_number(dic):
if 'format' in dic:
for item in dic['format']:
if 'pages' in item:
return int(re.findall(r'[0-9]+',item)[0])
return None
else:
return None
def find_isbn(identifiers):
for pair in identifiers:
if pair[0] =='ISBN' and len(pair[1])==13:
return pair[1]
return False
def build_authors(dictionary):
if 'authors' in dictionary:
author_list = []
for author in dictionary['authors']:
author_list.append(get_or_create_author(author))
return author_list
else:
print "No authors found"
return []
def get_or_create_author(author_name):
print "Processing author: ", author_name
names = author_name.split()
first = ' '.join(names[:-1])
last = names[-1]
candidates = Person.objects.filter(first_name__contains=names[0],last_name=last)
selected = select_from_list(candidates, attributes=['first_name','last_name'], item_name='author')
if selected:
return selected
else:
print "No author found, creating author"
newid = ''.join([i[0] for i in names]).lower()
print newid
while True:
existing = Person.objects.filter(id=newid)
if len(existing)==0:
return Person(first_name=first, last_name=last, id=newid)
newid = raw_input("Another authour found with same intials, please suggest an id> ")
#Cargo-cult coded function to unescape special XML characters
def select_from_list(list, attributes=False, item_name=""):
if len(list) == 0:
print "No candidate %sfound" %(item_name+' ')
return None
elif len(list) == 1:
if attributes:
answer = raw_input(str("Found one %s: %s. Use this? [y]/n> " %(item_name,' '.join([eval("list[0]."+attribute) for attribute in attributes]))))
else:
answer = raw_input(str("Found one %s: %s. Use this? [y]/n> " %(item_name,list[0])))
if answer in ['yes', 'y', '']:
return list[0]
else:
return None
else:
print "several candidates found"
return None
def unescape(s):
want_unicode = False
if isinstance(s, unicode):
s = s.encode("utf-8")
want_unicode = True
# the rest of this assumes that `s` is UTF-8
list = []
# create and initialize a parser object
p = xml.parsers.expat.ParserCreate("utf-8")
p.buffer_text = True
p.returns_unicode = want_unicode
p.CharacterDataHandler = list.append
# parse the data wrapped in a dummy element
# (needed so the "document" is well-formed)
p.Parse("<e>", 0)
p.Parse(s, 0)
p.Parse("</e>", 1)
# join the extracted strings and return
es = ""
if want_unicode:
es = u""
return es.join(list)
get_book_loop()