Opprettet google_interface

google_interface.py inneholder funksjonalitet som svarer til kommandoen "worblehat suggest-book-data"
2011-10-08 14:08:15 +00:00
parent b808517be3
commit d9f41d3e7e
1 changed files with 222 additions and 0 deletions
@@ -0,0 +1,222 @@
+#!/usr/bin/python
+
+import os
+from gdata.books.service import BookService
+import xml.parsers.expat
+import readline
+import re
+import random
+import pgdb
+import sys
+from fileformat import *
+from util import *
+
+exit_commands = ['exit', 'abort', 'quit', 'bye', 'eat flaming death', 'q']
+
+comments = ""
+
+# midlertidig
+connection = pgdb.connect(database='oysteini_pbb2',
+        user='oysteini_pbb',
+        password='lio5Aide',
+        host='postgres.pvv.ntnu.no');
+
+def suggest_book(dbconnection, tmp_file=False):
+    service = BookService(source='Programvareverkstedet - Worblehat - 0.1a ')
+    action_list = []
+    print("Enter ISBN number(s), end with eof")
+    for indata in sys.stdin:
+        if indata in exit_commands:
+            print("aborted")
+            break
+        if book_in_db(dbconnection, indata):
+            action_list.append("# Book with isbn: " + str(indata) + " is already in DB, skipped")
+        else:
+            feed = service.search_by_keyword('isbn='+indata)
+            if feed.entry:
+                # action_list is passed to build_book to fill in missing authors
+                action_list.append(build_book(dbconnection, feed.entry[0], indata, action_list))
+        else:
+            print("No items found")
+        
+    if tmp_file:
+        # TODO: write to tmp file
+    else:
+        print(write_actionlist(action_list))
+
+def book_in_db(dbconnection, isbn):
+    cursor = dbconnection.cursor()
+    query = "SELECT book FROM books WHERE lastname=%(last)s OR firstname=%(first)s"
+    cursor.execute(query, {'last':last, 'first':first} )
+    candidates = fetchall_dict(cursor)
+    pass  
+
+def found_item(entry, indata):
+        print "Found: "+entry.dc_title[0].text
+        build_book(entry, indata)
+
+def build_book(dbconnection, entry, indata=False):
+	dic = entry.to_dict()
+        print("Dict in build_book:" + str(dic))
+        print("")
+        
+        book = {}
+        
+        book['action'] = 'new-book'
+        
+        isbn = find_isbn(unicode(dic['identifiers']))
+        if isbn:
+		book['isbn'] = unicode(isbn)
+	elif indata:
+		 if len(indata) == 13:
+			book['isbn'] = unicode(indata)
+	else:
+		comment("No ISBN found.")
+		return False
+	
+        book['title'] = unicode(entry.dc_title[0].text)
+
+        set_value(book, dic, 'category')
+
+	if len(entry.dc_title) > 0:
+		book['subtitle'] = unicode(''.join(map(lambda x: x.text, entry.dc_title[1:])))
+	
+        authors = build_authors(dbconnection, dic)
+        book['persons'] = {}
+        book['persons']['author'] = [author['id'] for author in authors]
+        
+        if 'publishers' in dic:
+		book['publisher'] = unicode(','.join(dic['publishers']))
+
+        if 'date' in dic:
+		book['published_year'] = int(dic['date'][:4])
+	
+        set_value(book, dic, 'edition')
+
+        book['num_pages'] = find_page_number(dic)
+        
+        set_value(book, dic, 'series')
+
+        book['description'] = unicode(find_description(dic))
+	
+        book['references'] = {}
+        book['references']['google-books'] = [unicode(dic['preview'])]
+        
+        global comments
+        book['comment'] = comments
+        comments = ''
+
+        return book
+
+def comment(comm):
+        global comments
+        comments += u'%s\n' % comm
+
+def set_value(book, dic, key):
+        if key in dic:
+                book[key] = unicode(dic[key])
+        else: 
+                book[key] = None
+
+
+def find_description(dic):
+	if 'description' in dic:
+		return unescape(dic['description'])
+	else:
+		return ""
+
+def find_page_number(dic):
+	if 'format' in dic:
+		for item in dic['format']:
+			if 'pages' in item:
+				return int(re.findall(r'[0-9]+',item)[0])
+		return None
+	else:
+		return None
+
+def find_isbn(identifiers):
+	for pair in identifiers:
+		if pair[0] =='ISBN' and len(pair[1])==13:
+			return pair[1]
+	return False
+
+def build_authors(dbconnection, dictionary):
+	if 'authors' in dictionary:
+		author_list = []
+		for author in dictionary['authors']:
+			author_list.append(get_or_create_author(dbconnection, author))
+		return author_list
+	else:
+                comment("No authors found.")
+		return []
+
+def get_or_create_author(dbconnection, author_name):
+        author = {}
+        author['action'] = 'new-person'
+	names = author_name.split()
+	first = ' '.join(names[:-1])
+	last = names[-1]
+        author['first_name'] = first
+        author['last_name'] = last
+
+        cursor = dbconnection.cursor()
+        query = "SELECT id FROM person WHERE lastname=%(last)s OR firstname=%(first)s"
+        cursor.execute(query, {'last':last, 'first':first} )
+	candidates = fetchall_dict(cursor)
+        
+        if len(candidates) == 1:
+            return candidates[0]
+        if len(candidates) > 1:
+            # TODO: TEST
+            print("Found several candidates:")
+            for i in range(candidates): 
+                print "%d:  %s" % (i, 
+                        feed.entry[i].dc_title[0].text)
+            candno = (int)(raw_input("Which candidate do you want? [0-%d] " % (len(candidates) - 1)))
+            return candidates[candno]
+        else:
+            print("No candidate found for " + author_name + ", making new person")
+            newid = ''.join([i[0] for i in names]).lower()
+            # Check if id already exists 
+            idq = 
+
+
+#Cargo-cult coded function to unescape special XML characters
+
+def select_from_list(list, attributes=False, item_name=""):
+	if len(list) == 0:
+		return None
+	elif len(list) == 1:
+                return list[0]
+	else:
+		return None
+
+def unescape(s):
+	want_unicode = False
+	if isinstance(s, unicode):
+		s = s.encode("utf-8")
+		want_unicode = True
+
+	# the rest of this assumes that `s` is UTF-8
+	list = []
+
+	# create and initialize a parser object
+	p = xml.parsers.expat.ParserCreate("utf-8")
+	p.buffer_text = True
+	p.returns_unicode = want_unicode
+	p.CharacterDataHandler = list.append
+
+	# parse the data wrapped in a dummy element
+	# (needed so the "document" is well-formed)
+	p.Parse("<e>", 0)
+	p.Parse(s, 0)
+	p.Parse("</e>", 1)
+
+	# join the extracted strings and return
+	es = ""
+	if want_unicode:
+		es = u""
+	return es.join(list)
+
+
+suggest_book(connection)