#!/usr/bin/python import os from gdata.books.service import BookService import xml.parsers.expat import readline import re import random import pgdb import sys from fileformat import * from util import * exit_commands = ['exit', 'abort', 'quit', 'bye', 'eat flaming death', 'q'] encoding = 'utf8' def google_suggest_book_data(dbconnection, tmp_file=False): service = BookService(source='Programvareverkstedet - Worblehat - 0.1a ') action_list = [] authors_added = {} file_prefix = "suggestion" filler = ' -------------------------------- ' print("# Enter ISBN number(s), end with eof ") for ISBN in sys.stdin: ISBN = ISBN.strip() if ISBN in exit_commands: print("aborted") break elif book_in_db(dbconnection, ISBN): comment = "Book with ISBN: " + str(ISBN) + " is already in DB, skipped" action_list.append(comment) else: # First print a long comment line to separate books new_book = filler + "Book: " + ISBN.strip() + filler action_list.append(new_book) feed = service.search_by_keyword('isbn='+ISBN) if feed.entry: authors = parse_authors(dbconnection, feed.entry[0]) # For each author, check if author is already added as a new entry # or already in DB, otherwise make an entry for a new author for author in authors: if author['id'] in authors_added: comment = "Author already added when book: " + str(authors_added[author['id']]) + " was added" action_list.append(comment) elif not author_in_db(dbconnection, author): comment = "Author: \"" + str(author) + "\" was not already in database" d = build_author(author) d.update({'comment':comment}) action_list.append(d) authors_added[author['id']] = ISBN d = {} if len(authors) == 0: # TODO: test this comment = "Didn't find any authors for book. !!IMPORTANT!! Add correct author and id in new-book section" d.update({'comment':comment}) authors.append({'id':'NO_AUTHOR', 'firstname':'John', 'lastname':'Doe'}) d.update(build_book(feed.entry[0], authors, ISBN)) action_list.append(d) else: print("No items found") if tmp_file: return write_tmpfile(file_prefix, write_actionlist(action_list)) else: print(write_actionlist(action_list)) def book_in_db(dbconnection, isbn): cursor = dbconnection.cursor() query = "SELECT * FROM book WHERE isbn=%(num)s" cursor.execute(query, {'num':isbn}) if cursor.rowcount > 0: return True return False def author_in_db(dbconnection, author): cursor = dbconnection.cursor() query = "SELECT * FROM person WHERE firstname=%(fname)s OR lastname=%(lname)s" cursor.execute(query, {'fname':author['firstname'], 'lname':author['lastname']}) if cursor.rowcount > 0: return True return False def person_id_in_db(dbconnection, id): cursor = dbconnection.cursor() query = "SELECT * FROM person WHERE id=%(i)s" cursor.execute(query, {'i':id}) if cursor.rowcount > 0: return True return False def found_item(entry, indata): print "Found: "+entry.dc_title[0].text build_book(entry, indata) def build_book(entry, authors, indata=False): dic = entry.to_dict() book = {} book['action'] = 'new-book' isbn = find_isbn(unicode(dic['identifiers'])) if isbn: book['isbn'] = unicode(isbn) elif indata: if len(indata) == 13: book['isbn'] = unicode(indata) else: print("!!!isbn length not 13") else: #comment("No ISBN found.") print("No ISBN found.") return False book['title'] = unicode(entry.dc_title[0].text, encoding) set_value(book, dic, 'category') if len(entry.dc_title) > 0: book['subtitle'] = unicode(''.join(map(lambda x: x.text, entry.dc_title[1:])), encoding) book['persons'] = {} book['persons']['author'] = [author['id'] for author in authors] if 'publishers' in dic: book['publisher'] = unicode(','.join(dic['publishers']), encoding) if 'date' in dic: book['published_year'] = int(dic['date'][:4]) set_value(book, dic, 'edition') book['num_pages'] = find_page_number(dic) set_value(book, dic, 'series') book['description'] = unicode(find_description(dic), encoding) book['references'] = {} book['references']['google-books'] = [unicode(dic['preview'], encoding)] return book def set_value(book, dic, key): if key in dic: book[key] = unicode(dic[key], encoding) else: book[key] = None def find_description(dic): if 'description' in dic: return unescape(dic['description']) else: return "" def find_page_number(dic): if 'format' in dic: for item in dic['format']: if 'pages' in item: return int(re.findall(r'[0-9]+',item)[0]) return None else: return None def find_isbn(identifiers): for pair in identifiers: if pair[0] =='ISBN' and len(pair[1])==13: return pair[1] return False def parse_authors(dbconnection, feed_entry): dict = feed_entry.to_dict() cursor = dbconnection.cursor() author_list = [] if 'authors' in dict: for author in dict['authors']: # First look in db for matches auth_q = "SELECT * FROM person WHERE firstname LIKE %(fname)s OR lastname=%(lname)s" names = author.split() first_name_wildcard = names[0] + '%' last_name = names[len(names)-1] cursor.execute(auth_q, {'fname':first_name_wildcard, 'lname':last_name}) match = fetchone_dict(cursor) if match != None and 'id' in match: author_list.append(match) continue # Otherwise make new id newid = ''.join([i[0] for i in author.split()]).lower() if person_id_in_db(dbconnection, newid): i = "1" while person_id_in_db(dbconnection, newid+i): i = str(int(i) + 1) newid = newid + i first_name = names[0] + " ".join(names[1:len(names)-1]) new_author = {'id':newid, 'firstname':first_name, 'lastname':last_name} author_list.append(new_author) return author_list def build_author(new_author): author = new_author author['action'] = 'new-person' return author #Cargo-cult coded function to unescape special XML characters def select_from_list(list, attributes=False, item_name=""): if len(list) == 0: return None elif len(list) == 1: return list[0] else: return None def unescape(s): want_unicode = False if isinstance(s, unicode): s = s.encode("utf-8") want_unicode = True # the rest of this assumes that `s` is UTF-8 list = [] # create and initialize a parser object p = xml.parsers.expat.ParserCreate("utf-8") p.buffer_text = True p.returns_unicode = want_unicode p.CharacterDataHandler = list.append # parse the data wrapped in a dummy element # (needed so the "document" is well-formed) p.Parse("", 0) p.Parse(s, 0) p.Parse("", 1) # join the extracted strings and return es = "" if want_unicode: es = u"" return es.join(list)