faset over fra Z3950 til google books

This commit is contained in:
2010-09-23 13:57:37 +00:00
parent d173b9396e
commit 644e79f9b5
221 changed files with 60948 additions and 27273 deletions

View File

@@ -0,0 +1,544 @@
#!/usr/bin/python
#
# Copyright (C) 2008 Yu-Jie Lin
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains extensions to Atom objects used with Google Webmaster Tools."""
__author__ = 'livibetter (Yu-Jie Lin)'
try:
from xml.etree import cElementTree as ElementTree
except ImportError:
try:
import cElementTree as ElementTree
except ImportError:
try:
from xml.etree import ElementTree
except ImportError:
from elementtree import ElementTree
import atom
import gdata
# XML namespaces which are often used in Google Webmaster Tools entities.
GWEBMASTERTOOLS_NAMESPACE = 'http://schemas.google.com/webmasters/tools/2007'
GWEBMASTERTOOLS_TEMPLATE = '{http://schemas.google.com/webmasters/tools/2007}%s'
class Indexed(atom.AtomBase):
_tag = 'indexed'
_namespace = GWEBMASTERTOOLS_NAMESPACE
def IndexedFromString(xml_string):
return atom.CreateClassFromXMLString(Indexed, xml_string)
class Crawled(atom.Date):
_tag = 'crawled'
_namespace = GWEBMASTERTOOLS_NAMESPACE
def CrawledFromString(xml_string):
return atom.CreateClassFromXMLString(Crawled, xml_string)
class GeoLocation(atom.AtomBase):
_tag = 'geolocation'
_namespace = GWEBMASTERTOOLS_NAMESPACE
def GeoLocationFromString(xml_string):
return atom.CreateClassFromXMLString(GeoLocation, xml_string)
class PreferredDomain(atom.AtomBase):
_tag = 'preferred-domain'
_namespace = GWEBMASTERTOOLS_NAMESPACE
def PreferredDomainFromString(xml_string):
return atom.CreateClassFromXMLString(PreferredDomain, xml_string)
class CrawlRate(atom.AtomBase):
_tag = 'crawl-rate'
_namespace = GWEBMASTERTOOLS_NAMESPACE
def CrawlRateFromString(xml_string):
return atom.CreateClassFromXMLString(CrawlRate, xml_string)
class EnhancedImageSearch(atom.AtomBase):
_tag = 'enhanced-image-search'
_namespace = GWEBMASTERTOOLS_NAMESPACE
def EnhancedImageSearchFromString(xml_string):
return atom.CreateClassFromXMLString(EnhancedImageSearch, xml_string)
class Verified(atom.AtomBase):
_tag = 'verified'
_namespace = GWEBMASTERTOOLS_NAMESPACE
def VerifiedFromString(xml_string):
return atom.CreateClassFromXMLString(Verified, xml_string)
class VerificationMethodMeta(atom.AtomBase):
_tag = 'meta'
_namespace = atom.ATOM_NAMESPACE
_children = atom.AtomBase._children.copy()
_attributes = atom.AtomBase._attributes.copy()
_attributes['name'] = 'name'
_attributes['content'] = 'content'
def __init__(self, text=None, name=None, content=None,
extension_elements=None, extension_attributes=None):
self.text = text
self.name = name
self.content = content
self.extension_elements = extension_elements or []
self.extension_attributes = extension_attributes or {}
def VerificationMethodMetaFromString(xml_string):
return atom.CreateClassFromXMLString(VerificationMethodMeta, xml_string)
class VerificationMethod(atom.AtomBase):
_tag = 'verification-method'
_namespace = GWEBMASTERTOOLS_NAMESPACE
_children = atom.Text._children.copy()
_attributes = atom.Text._attributes.copy()
_children['{%s}meta' % atom.ATOM_NAMESPACE] = (
'meta', VerificationMethodMeta)
_attributes['in-use'] = 'in_use'
_attributes['type'] = 'type'
def __init__(self, text=None, in_use=None, meta=None, type=None,
extension_elements=None, extension_attributes=None):
self.text = text
self.in_use = in_use
self.meta = meta
self.type = type
self.extension_elements = extension_elements or []
self.extension_attributes = extension_attributes or {}
def VerificationMethodFromString(xml_string):
return atom.CreateClassFromXMLString(VerificationMethod, xml_string)
class MarkupLanguage(atom.AtomBase):
_tag = 'markup-language'
_namespace = GWEBMASTERTOOLS_NAMESPACE
def MarkupLanguageFromString(xml_string):
return atom.CreateClassFromXMLString(MarkupLanguage, xml_string)
class SitemapMobile(atom.AtomBase):
_tag = 'sitemap-mobile'
_namespace = GWEBMASTERTOOLS_NAMESPACE
_children = atom.AtomBase._children.copy()
_attributes = atom.AtomBase._attributes.copy()
_children['{%s}markup-language' % GWEBMASTERTOOLS_NAMESPACE] = (
'markup_language', [MarkupLanguage])
def __init__(self, markup_language=None,
extension_elements=None, extension_attributes=None, text=None):
self.markup_language = markup_language or []
self.text = text
self.extension_elements = extension_elements or []
self.extension_attributes = extension_attributes or {}
def SitemapMobileFromString(xml_string):
return atom.CreateClassFromXMLString(SitemapMobile, xml_string)
class SitemapMobileMarkupLanguage(atom.AtomBase):
_tag = 'sitemap-mobile-markup-language'
_namespace = GWEBMASTERTOOLS_NAMESPACE
def SitemapMobileMarkupLanguageFromString(xml_string):
return atom.CreateClassFromXMLString(SitemapMobileMarkupLanguage, xml_string)
class PublicationLabel(atom.AtomBase):
_tag = 'publication-label'
_namespace = GWEBMASTERTOOLS_NAMESPACE
def PublicationLabelFromString(xml_string):
return atom.CreateClassFromXMLString(PublicationLabel, xml_string)
class SitemapNews(atom.AtomBase):
_tag = 'sitemap-news'
_namespace = GWEBMASTERTOOLS_NAMESPACE
_children = atom.AtomBase._children.copy()
_attributes = atom.AtomBase._attributes.copy()
_children['{%s}publication-label' % GWEBMASTERTOOLS_NAMESPACE] = (
'publication_label', [PublicationLabel])
def __init__(self, publication_label=None,
extension_elements=None, extension_attributes=None, text=None):
self.publication_label = publication_label or []
self.text = text
self.extension_elements = extension_elements or []
self.extension_attributes = extension_attributes or {}
def SitemapNewsFromString(xml_string):
return atom.CreateClassFromXMLString(SitemapNews, xml_string)
class SitemapNewsPublicationLabel(atom.AtomBase):
_tag = 'sitemap-news-publication-label'
_namespace = GWEBMASTERTOOLS_NAMESPACE
def SitemapNewsPublicationLabelFromString(xml_string):
return atom.CreateClassFromXMLString(SitemapNewsPublicationLabel, xml_string)
class SitemapLastDownloaded(atom.Date):
_tag = 'sitemap-last-downloaded'
_namespace = GWEBMASTERTOOLS_NAMESPACE
def SitemapLastDownloadedFromString(xml_string):
return atom.CreateClassFromXMLString(SitemapLastDownloaded, xml_string)
class SitemapType(atom.AtomBase):
_tag = 'sitemap-type'
_namespace = GWEBMASTERTOOLS_NAMESPACE
def SitemapTypeFromString(xml_string):
return atom.CreateClassFromXMLString(SitemapType, xml_string)
class SitemapStatus(atom.AtomBase):
_tag = 'sitemap-status'
_namespace = GWEBMASTERTOOLS_NAMESPACE
def SitemapStatusFromString(xml_string):
return atom.CreateClassFromXMLString(SitemapStatus, xml_string)
class SitemapUrlCount(atom.AtomBase):
_tag = 'sitemap-url-count'
_namespace = GWEBMASTERTOOLS_NAMESPACE
def SitemapUrlCountFromString(xml_string):
return atom.CreateClassFromXMLString(SitemapUrlCount, xml_string)
class LinkFinder(atom.LinkFinder):
"""An "interface" providing methods to find link elements
SitesEntry elements often contain multiple links which differ in the rel
attribute or content type. Often, developers are interested in a specific
type of link so this class provides methods to find specific classes of links.
This class is used as a mixin in SitesEntry.
"""
def GetSelfLink(self):
"""Find the first link with rel set to 'self'
Returns:
An atom.Link or none if none of the links had rel equal to 'self'
"""
for a_link in self.link:
if a_link.rel == 'self':
return a_link
return None
def GetEditLink(self):
for a_link in self.link:
if a_link.rel == 'edit':
return a_link
return None
def GetPostLink(self):
"""Get a link containing the POST target URL.
The POST target URL is used to insert new entries.
Returns:
A link object with a rel matching the POST type.
"""
for a_link in self.link:
if a_link.rel == 'http://schemas.google.com/g/2005#post':
return a_link
return None
def GetFeedLink(self):
for a_link in self.link:
if a_link.rel == 'http://schemas.google.com/g/2005#feed':
return a_link
return None
class SitesEntry(atom.Entry, LinkFinder):
"""A Google Webmaster Tools meta Entry flavor of an Atom Entry """
_tag = atom.Entry._tag
_namespace = atom.Entry._namespace
_children = atom.Entry._children.copy()
_attributes = atom.Entry._attributes.copy()
_children['{%s}entryLink' % gdata.GDATA_NAMESPACE] = (
'entry_link', [gdata.EntryLink])
_children['{%s}indexed' % GWEBMASTERTOOLS_NAMESPACE] = ('indexed', Indexed)
_children['{%s}crawled' % GWEBMASTERTOOLS_NAMESPACE] = (
'crawled', Crawled)
_children['{%s}geolocation' % GWEBMASTERTOOLS_NAMESPACE] = (
'geolocation', GeoLocation)
_children['{%s}preferred-domain' % GWEBMASTERTOOLS_NAMESPACE] = (
'preferred_domain', PreferredDomain)
_children['{%s}crawl-rate' % GWEBMASTERTOOLS_NAMESPACE] = (
'crawl_rate', CrawlRate)
_children['{%s}enhanced-image-search' % GWEBMASTERTOOLS_NAMESPACE] = (
'enhanced_image_search', EnhancedImageSearch)
_children['{%s}verified' % GWEBMASTERTOOLS_NAMESPACE] = (
'verified', Verified)
_children['{%s}verification-method' % GWEBMASTERTOOLS_NAMESPACE] = (
'verification_method', [VerificationMethod])
def __GetId(self):
return self.__id
# This method was created to strip the unwanted whitespace from the id's
# text node.
def __SetId(self, id):
self.__id = id
if id is not None and id.text is not None:
self.__id.text = id.text.strip()
id = property(__GetId, __SetId)
def __init__(self, category=None, content=None,
atom_id=None, link=None, title=None, updated=None,
entry_link=None, indexed=None, crawled=None,
geolocation=None, preferred_domain=None, crawl_rate=None,
enhanced_image_search=None,
verified=None, verification_method=None,
extension_elements=None, extension_attributes=None, text=None):
atom.Entry.__init__(self, category=category,
content=content, atom_id=atom_id, link=link,
title=title, updated=updated, text=text)
self.entry_link = entry_link or []
self.indexed = indexed
self.crawled = crawled
self.geolocation = geolocation
self.preferred_domain = preferred_domain
self.crawl_rate = crawl_rate
self.enhanced_image_search = enhanced_image_search
self.verified = verified
self.verification_method = verification_method or []
def SitesEntryFromString(xml_string):
return atom.CreateClassFromXMLString(SitesEntry, xml_string)
class SitesFeed(atom.Feed, LinkFinder):
"""A Google Webmaster Tools meta Sites feed flavor of an Atom Feed"""
_tag = atom.Feed._tag
_namespace = atom.Feed._namespace
_children = atom.Feed._children.copy()
_attributes = atom.Feed._attributes.copy()
_children['{%s}startIndex' % gdata.OPENSEARCH_NAMESPACE] = (
'start_index', gdata.StartIndex)
_children['{%s}entry' % atom.ATOM_NAMESPACE] = ('entry', [SitesEntry])
del _children['{%s}generator' % atom.ATOM_NAMESPACE]
del _children['{%s}author' % atom.ATOM_NAMESPACE]
del _children['{%s}contributor' % atom.ATOM_NAMESPACE]
del _children['{%s}logo' % atom.ATOM_NAMESPACE]
del _children['{%s}icon' % atom.ATOM_NAMESPACE]
del _children['{%s}rights' % atom.ATOM_NAMESPACE]
del _children['{%s}subtitle' % atom.ATOM_NAMESPACE]
def __GetId(self):
return self.__id
def __SetId(self, id):
self.__id = id
if id is not None and id.text is not None:
self.__id.text = id.text.strip()
id = property(__GetId, __SetId)
def __init__(self, start_index=None, atom_id=None, title=None, entry=None,
category=None, link=None, updated=None,
extension_elements=None, extension_attributes=None, text=None):
"""Constructor for Source
Args:
category: list (optional) A list of Category instances
id: Id (optional) The entry's Id element
link: list (optional) A list of Link instances
title: Title (optional) the entry's title element
updated: Updated (optional) the entry's updated element
entry: list (optional) A list of the Entry instances contained in the
feed.
text: String (optional) The text contents of the element. This is the
contents of the Entry's XML text node.
(Example: <foo>This is the text</foo>)
extension_elements: list (optional) A list of ExtensionElement instances
which are children of this element.
extension_attributes: dict (optional) A dictionary of strings which are
the values for additional XML attributes of this element.
"""
self.start_index = start_index
self.category = category or []
self.id = atom_id
self.link = link or []
self.title = title
self.updated = updated
self.entry = entry or []
self.text = text
self.extension_elements = extension_elements or []
self.extension_attributes = extension_attributes or {}
def SitesFeedFromString(xml_string):
return atom.CreateClassFromXMLString(SitesFeed, xml_string)
class SitemapsEntry(atom.Entry, LinkFinder):
"""A Google Webmaster Tools meta Sitemaps Entry flavor of an Atom Entry """
_tag = atom.Entry._tag
_namespace = atom.Entry._namespace
_children = atom.Entry._children.copy()
_attributes = atom.Entry._attributes.copy()
_children['{%s}sitemap-type' % GWEBMASTERTOOLS_NAMESPACE] = (
'sitemap_type', SitemapType)
_children['{%s}sitemap-status' % GWEBMASTERTOOLS_NAMESPACE] = (
'sitemap_status', SitemapStatus)
_children['{%s}sitemap-last-downloaded' % GWEBMASTERTOOLS_NAMESPACE] = (
'sitemap_last_downloaded', SitemapLastDownloaded)
_children['{%s}sitemap-url-count' % GWEBMASTERTOOLS_NAMESPACE] = (
'sitemap_url_count', SitemapUrlCount)
_children['{%s}sitemap-mobile-markup-language' % GWEBMASTERTOOLS_NAMESPACE] \
= ('sitemap_mobile_markup_language', SitemapMobileMarkupLanguage)
_children['{%s}sitemap-news-publication-label' % GWEBMASTERTOOLS_NAMESPACE] \
= ('sitemap_news_publication_label', SitemapNewsPublicationLabel)
def __GetId(self):
return self.__id
# This method was created to strip the unwanted whitespace from the id's
# text node.
def __SetId(self, id):
self.__id = id
if id is not None and id.text is not None:
self.__id.text = id.text.strip()
id = property(__GetId, __SetId)
def __init__(self, category=None, content=None,
atom_id=None, link=None, title=None, updated=None,
sitemap_type=None, sitemap_status=None, sitemap_last_downloaded=None,
sitemap_url_count=None, sitemap_mobile_markup_language=None,
sitemap_news_publication_label=None,
extension_elements=None, extension_attributes=None, text=None):
atom.Entry.__init__(self, category=category,
content=content, atom_id=atom_id, link=link,
title=title, updated=updated, text=text)
self.sitemap_type = sitemap_type
self.sitemap_status = sitemap_status
self.sitemap_last_downloaded = sitemap_last_downloaded
self.sitemap_url_count = sitemap_url_count
self.sitemap_mobile_markup_language = sitemap_mobile_markup_language
self.sitemap_news_publication_label = sitemap_news_publication_label
def SitemapsEntryFromString(xml_string):
return atom.CreateClassFromXMLString(SitemapsEntry, xml_string)
class SitemapsFeed(atom.Feed, LinkFinder):
"""A Google Webmaster Tools meta Sitemaps feed flavor of an Atom Feed"""
_tag = atom.Feed._tag
_namespace = atom.Feed._namespace
_children = atom.Feed._children.copy()
_attributes = atom.Feed._attributes.copy()
_children['{%s}entry' % atom.ATOM_NAMESPACE] = ('entry', [SitemapsEntry])
_children['{%s}sitemap-mobile' % GWEBMASTERTOOLS_NAMESPACE] = (
'sitemap_mobile', SitemapMobile)
_children['{%s}sitemap-news' % GWEBMASTERTOOLS_NAMESPACE] = (
'sitemap_news', SitemapNews)
del _children['{%s}generator' % atom.ATOM_NAMESPACE]
del _children['{%s}author' % atom.ATOM_NAMESPACE]
del _children['{%s}contributor' % atom.ATOM_NAMESPACE]
del _children['{%s}logo' % atom.ATOM_NAMESPACE]
del _children['{%s}icon' % atom.ATOM_NAMESPACE]
del _children['{%s}rights' % atom.ATOM_NAMESPACE]
del _children['{%s}subtitle' % atom.ATOM_NAMESPACE]
def __GetId(self):
return self.__id
def __SetId(self, id):
self.__id = id
if id is not None and id.text is not None:
self.__id.text = id.text.strip()
id = property(__GetId, __SetId)
def __init__(self, category=None, content=None,
atom_id=None, link=None, title=None, updated=None,
entry=None, sitemap_mobile=None, sitemap_news=None,
extension_elements=None, extension_attributes=None, text=None):
self.category = category or []
self.id = atom_id
self.link = link or []
self.title = title
self.updated = updated
self.entry = entry or []
self.text = text
self.sitemap_mobile = sitemap_mobile
self.sitemap_news = sitemap_news
self.extension_elements = extension_elements or []
self.extension_attributes = extension_attributes or {}
def SitemapsFeedFromString(xml_string):
return atom.CreateClassFromXMLString(SitemapsFeed, xml_string)

View File

@@ -0,0 +1,217 @@
#!/usr/bin/python
#
# Copyright (C) 2009 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the data classes of the Google Webmaster Tools Data API"""
__author__ = 'j.s@google.com (Jeff Scudder)'
import atom.core
import atom.data
import gdata.data
import gdata.opensearch.data
WT_TEMPLATE = '{http://schemas.google.com/webmaster/tools/2007/}%s'
class CrawlIssueCrawlType(atom.core.XmlElement):
"""Type of crawl of the crawl issue"""
_qname = WT_TEMPLATE % 'crawl-type'
class CrawlIssueDateDetected(atom.core.XmlElement):
"""Detection date for the issue"""
_qname = WT_TEMPLATE % 'date-detected'
class CrawlIssueDetail(atom.core.XmlElement):
"""Detail of the crawl issue"""
_qname = WT_TEMPLATE % 'detail'
class CrawlIssueIssueType(atom.core.XmlElement):
"""Type of crawl issue"""
_qname = WT_TEMPLATE % 'issue-type'
class CrawlIssueLinkedFromUrl(atom.core.XmlElement):
"""Source URL that links to the issue URL"""
_qname = WT_TEMPLATE % 'linked-from'
class CrawlIssueUrl(atom.core.XmlElement):
"""URL affected by the crawl issue"""
_qname = WT_TEMPLATE % 'url'
class CrawlIssueEntry(gdata.data.GDEntry):
"""Describes a crawl issue entry"""
date_detected = CrawlIssueDateDetected
url = CrawlIssueUrl
detail = CrawlIssueDetail
issue_type = CrawlIssueIssueType
crawl_type = CrawlIssueCrawlType
linked_from = [CrawlIssueLinkedFromUrl]
class CrawlIssuesFeed(gdata.data.GDFeed):
"""Feed of crawl issues for a particular site"""
entry = [CrawlIssueEntry]
class Indexed(atom.core.XmlElement):
"""Describes the indexing status of a site"""
_qname = WT_TEMPLATE % 'indexed'
class Keyword(atom.core.XmlElement):
"""A keyword in a site or in a link to a site"""
_qname = WT_TEMPLATE % 'keyword'
source = 'source'
class KeywordEntry(gdata.data.GDEntry):
"""Describes a keyword entry"""
class KeywordsFeed(gdata.data.GDFeed):
"""Feed of keywords for a particular site"""
entry = [KeywordEntry]
keyword = [Keyword]
class LastCrawled(atom.core.XmlElement):
"""Describes the last crawled date of a site"""
_qname = WT_TEMPLATE % 'last-crawled'
class MessageBody(atom.core.XmlElement):
"""Message body"""
_qname = WT_TEMPLATE % 'body'
class MessageDate(atom.core.XmlElement):
"""Message date"""
_qname = WT_TEMPLATE % 'date'
class MessageLanguage(atom.core.XmlElement):
"""Message language"""
_qname = WT_TEMPLATE % 'language'
class MessageRead(atom.core.XmlElement):
"""Indicates if the message has already been read"""
_qname = WT_TEMPLATE % 'read'
class MessageSubject(atom.core.XmlElement):
"""Message subject"""
_qname = WT_TEMPLATE % 'subject'
class SiteId(atom.core.XmlElement):
"""Site URL"""
_qname = WT_TEMPLATE % 'id'
class MessageEntry(gdata.data.GDEntry):
"""Describes a message entry"""
wt_id = SiteId
subject = MessageSubject
date = MessageDate
body = MessageBody
language = MessageLanguage
read = MessageRead
class MessagesFeed(gdata.data.GDFeed):
"""Describes a messages feed"""
entry = [MessageEntry]
class SitemapEntry(gdata.data.GDEntry):
"""Describes a sitemap entry"""
indexed = Indexed
wt_id = SiteId
class SitemapMobileMarkupLanguage(atom.core.XmlElement):
"""Describes a markup language for URLs in this sitemap"""
_qname = WT_TEMPLATE % 'sitemap-mobile-markup-language'
class SitemapMobile(atom.core.XmlElement):
"""Lists acceptable mobile markup languages for URLs in this sitemap"""
_qname = WT_TEMPLATE % 'sitemap-mobile'
sitemap_mobile_markup_language = [SitemapMobileMarkupLanguage]
class SitemapNewsPublicationLabel(atom.core.XmlElement):
"""Specifies the publication label for this sitemap"""
_qname = WT_TEMPLATE % 'sitemap-news-publication-label'
class SitemapNews(atom.core.XmlElement):
"""Lists publication labels for this sitemap"""
_qname = WT_TEMPLATE % 'sitemap-news'
sitemap_news_publication_label = [SitemapNewsPublicationLabel]
class SitemapType(atom.core.XmlElement):
"""Indicates the type of sitemap. Not used for News or Mobile Sitemaps"""
_qname = WT_TEMPLATE % 'sitemap-type'
class SitemapUrlCount(atom.core.XmlElement):
"""Indicates the number of URLs contained in the sitemap"""
_qname = WT_TEMPLATE % 'sitemap-url-count'
class SitemapsFeed(gdata.data.GDFeed):
"""Describes a sitemaps feed"""
entry = [SitemapEntry]
class VerificationMethod(atom.core.XmlElement):
"""Describes a verification method that may be used for a site"""
_qname = WT_TEMPLATE % 'verification-method'
in_use = 'in-use'
type = 'type'
class Verified(atom.core.XmlElement):
"""Describes the verification status of a site"""
_qname = WT_TEMPLATE % 'verified'
class SiteEntry(gdata.data.GDEntry):
"""Describes a site entry"""
indexed = Indexed
wt_id = SiteId
verified = Verified
last_crawled = LastCrawled
verification_method = [VerificationMethod]
class SitesFeed(gdata.data.GDFeed):
"""Describes a sites feed"""
entry = [SiteEntry]

View File

@@ -0,0 +1,516 @@
#!/usr/bin/python
#
# Copyright (C) 2008 Yu-Jie Lin
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""GWebmasterToolsService extends the GDataService to streamline
Google Webmaster Tools operations.
GWebmasterToolsService: Provides methods to query feeds and manipulate items.
Extends GDataService.
"""
__author__ = 'livibetter (Yu-Jie Lin)'
import urllib
import gdata
import atom.service
import gdata.service
import gdata.webmastertools as webmastertools
import atom
FEED_BASE = 'https://www.google.com/webmasters/tools/feeds/'
SITES_FEED = FEED_BASE + 'sites/'
SITE_TEMPLATE = SITES_FEED + '%s'
SITEMAPS_FEED_TEMPLATE = FEED_BASE + '%(site_id)s/sitemaps/'
SITEMAP_TEMPLATE = SITEMAPS_FEED_TEMPLATE + '%(sitemap_id)s'
class Error(Exception):
pass
class RequestError(Error):
pass
class GWebmasterToolsService(gdata.service.GDataService):
"""Client for the Google Webmaster Tools service."""
def __init__(self, email=None, password=None, source=None,
server='www.google.com', **kwargs):
"""Creates a client for the Google Webmaster Tools service.
Args:
email: string (optional) The user's email address, used for
authentication.
password: string (optional) The user's password.
source: string (optional) The name of the user's application.
server: string (optional) The name of the server to which a connection
will be opened. Default value: 'www.google.com'.
**kwargs: The other parameters to pass to gdata.service.GDataService
constructor.
"""
gdata.service.GDataService.__init__(
self, email=email, password=password, service='sitemaps', source=source,
server=server, **kwargs)
def GetSitesFeed(self, uri=SITES_FEED,
converter=webmastertools.SitesFeedFromString):
"""Gets sites feed.
Args:
uri: str (optional) URI to retrieve sites feed.
converter: func (optional) Function which is executed on the server's
response before it is returned. Usually this is a function like
SitesFeedFromString which will parse the response and turn it into
an object.
Returns:
If converter is defined, the results of running converter on the server's
response. Otherwise, it will be a SitesFeed object.
"""
return self.Get(uri, converter=converter)
def AddSite(self, site_uri, uri=SITES_FEED,
url_params=None, escape_params=True, converter=None):
"""Adds a site to Google Webmaster Tools.
Args:
site_uri: str URI of which site to add.
uri: str (optional) URI to add a site.
url_params: dict (optional) Additional URL parameters to be included
in the insertion request.
escape_params: boolean (optional) If true, the url_parameters will be
escaped before they are included in the request.
converter: func (optional) Function which is executed on the server's
response before it is returned. Usually this is a function like
SitesEntryFromString which will parse the response and turn it into
an object.
Returns:
If converter is defined, the results of running converter on the server's
response. Otherwise, it will be a SitesEntry object.
"""
site_entry = webmastertools.SitesEntry()
site_entry.content = atom.Content(src=site_uri)
response = self.Post(site_entry, uri,
url_params=url_params,
escape_params=escape_params, converter=converter)
if not converter and isinstance(response, atom.Entry):
return webmastertools.SitesEntryFromString(response.ToString())
return response
def DeleteSite(self, site_uri, uri=SITE_TEMPLATE,
url_params=None, escape_params=True):
"""Removes a site from Google Webmaster Tools.
Args:
site_uri: str URI of which site to remove.
uri: str (optional) A URI template to send DELETE request.
Default SITE_TEMPLATE.
url_params: dict (optional) Additional URL parameters to be included
in the insertion request.
escape_params: boolean (optional) If true, the url_parameters will be
escaped before they are included in the request.
Returns:
True if the delete succeeded.
"""
return self.Delete(
uri % urllib.quote_plus(site_uri),
url_params=url_params, escape_params=escape_params)
def VerifySite(self, site_uri, verification_method, uri=SITE_TEMPLATE,
url_params=None, escape_params=True, converter=None):
"""Requests a verification of a site.
Args:
site_uri: str URI of which site to add sitemap for.
verification_method: str The method to verify a site. Valid values are
'htmlpage', and 'metatag'.
uri: str (optional) URI template to update a site.
Default SITE_TEMPLATE.
url_params: dict (optional) Additional URL parameters to be included
in the insertion request.
escape_params: boolean (optional) If true, the url_parameters will be
escaped before they are included in the request.
converter: func (optional) Function which is executed on the server's
response before it is returned. Usually this is a function like
SitemapsEntryFromString which will parse the response and turn it into
an object.
Returns:
If converter is defined, the results of running converter on the server's
response. Otherwise, it will be a SitesEntry object.
"""
site_entry = webmastertools.SitesEntry(
atom_id=atom.Id(text=site_uri),
category=atom.Category(
scheme='http://schemas.google.com/g/2005#kind',
term='http://schemas.google.com/webmasters/tools/2007#sites-info'),
verification_method=webmastertools.VerificationMethod(
type=verification_method, in_use='true')
)
response = self.Put(
site_entry,
uri % urllib.quote_plus(site_uri),
url_params=url_params,
escape_params=escape_params, converter=converter)
if not converter and isinstance(response, atom.Entry):
return webmastertools.SitesEntryFromString(response.ToString())
return response
def UpdateGeoLocation(self, site_uri, geolocation, uri=SITE_TEMPLATE,
url_params=None, escape_params=True, converter=None):
"""Updates geolocation setting of a site.
Args:
site_uri: str URI of which site to add sitemap for.
geolocation: str The geographic location. Valid values are listed in
http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
uri: str (optional) URI template to update a site.
Default SITE_TEMPLATE.
url_params: dict (optional) Additional URL parameters to be included
in the insertion request.
escape_params: boolean (optional) If true, the url_parameters will be
escaped before they are included in the request.
converter: func (optional) Function which is executed on the server's
response before it is returned. Usually this is a function like
SitemapsEntryFromString which will parse the response and turn it into
an object.
Returns:
If converter is defined, the results of running converter on the server's
response. Otherwise, it will be a SitesEntry object.
"""
site_entry = webmastertools.SitesEntry(
atom_id=atom.Id(text=site_uri),
category=atom.Category(
scheme='http://schemas.google.com/g/2005#kind',
term='http://schemas.google.com/webmasters/tools/2007#sites-info'),
geolocation=webmastertools.GeoLocation(text=geolocation)
)
response = self.Put(
site_entry,
uri % urllib.quote_plus(site_uri),
url_params=url_params,
escape_params=escape_params, converter=converter)
if not converter and isinstance(response, atom.Entry):
return webmastertools.SitesEntryFromString(response.ToString())
return response
def UpdateCrawlRate(self, site_uri, crawl_rate, uri=SITE_TEMPLATE,
url_params=None, escape_params=True, converter=None):
"""Updates crawl rate setting of a site.
Args:
site_uri: str URI of which site to add sitemap for.
crawl_rate: str The crawl rate for a site. Valid values are 'slower',
'normal', and 'faster'.
uri: str (optional) URI template to update a site.
Default SITE_TEMPLATE.
url_params: dict (optional) Additional URL parameters to be included
in the insertion request.
escape_params: boolean (optional) If true, the url_parameters will be
escaped before they are included in the request.
converter: func (optional) Function which is executed on the server's
response before it is returned. Usually this is a function like
SitemapsEntryFromString which will parse the response and turn it into
an object.
Returns:
If converter is defined, the results of running converter on the server's
response. Otherwise, it will be a SitesEntry object.
"""
site_entry = webmastertools.SitesEntry(
atom_id=atom.Id(text=site_uri),
category=atom.Category(
scheme='http://schemas.google.com/g/2005#kind',
term='http://schemas.google.com/webmasters/tools/2007#sites-info'),
crawl_rate=webmastertools.CrawlRate(text=crawl_rate)
)
response = self.Put(
site_entry,
uri % urllib.quote_plus(site_uri),
url_params=url_params,
escape_params=escape_params, converter=converter)
if not converter and isinstance(response, atom.Entry):
return webmastertools.SitesEntryFromString(response.ToString())
return response
def UpdatePreferredDomain(self, site_uri, preferred_domain, uri=SITE_TEMPLATE,
url_params=None, escape_params=True, converter=None):
"""Updates preferred domain setting of a site.
Note that if using 'preferwww', will also need www.example.com in account to
take effect.
Args:
site_uri: str URI of which site to add sitemap for.
preferred_domain: str The preferred domain for a site. Valid values are 'none',
'preferwww', and 'prefernowww'.
uri: str (optional) URI template to update a site.
Default SITE_TEMPLATE.
url_params: dict (optional) Additional URL parameters to be included
in the insertion request.
escape_params: boolean (optional) If true, the url_parameters will be
escaped before they are included in the request.
converter: func (optional) Function which is executed on the server's
response before it is returned. Usually this is a function like
SitemapsEntryFromString which will parse the response and turn it into
an object.
Returns:
If converter is defined, the results of running converter on the server's
response. Otherwise, it will be a SitesEntry object.
"""
site_entry = webmastertools.SitesEntry(
atom_id=atom.Id(text=site_uri),
category=atom.Category(
scheme='http://schemas.google.com/g/2005#kind',
term='http://schemas.google.com/webmasters/tools/2007#sites-info'),
preferred_domain=webmastertools.PreferredDomain(text=preferred_domain)
)
response = self.Put(
site_entry,
uri % urllib.quote_plus(site_uri),
url_params=url_params,
escape_params=escape_params, converter=converter)
if not converter and isinstance(response, atom.Entry):
return webmastertools.SitesEntryFromString(response.ToString())
return response
def UpdateEnhancedImageSearch(self, site_uri, enhanced_image_search,
uri=SITE_TEMPLATE, url_params=None, escape_params=True, converter=None):
"""Updates enhanced image search setting of a site.
Args:
site_uri: str URI of which site to add sitemap for.
enhanced_image_search: str The enhanced image search setting for a site.
Valid values are 'true', and 'false'.
uri: str (optional) URI template to update a site.
Default SITE_TEMPLATE.
url_params: dict (optional) Additional URL parameters to be included
in the insertion request.
escape_params: boolean (optional) If true, the url_parameters will be
escaped before they are included in the request.
converter: func (optional) Function which is executed on the server's
response before it is returned. Usually this is a function like
SitemapsEntryFromString which will parse the response and turn it into
an object.
Returns:
If converter is defined, the results of running converter on the server's
response. Otherwise, it will be a SitesEntry object.
"""
site_entry = webmastertools.SitesEntry(
atom_id=atom.Id(text=site_uri),
category=atom.Category(
scheme='http://schemas.google.com/g/2005#kind',
term='http://schemas.google.com/webmasters/tools/2007#sites-info'),
enhanced_image_search=webmastertools.EnhancedImageSearch(
text=enhanced_image_search)
)
response = self.Put(
site_entry,
uri % urllib.quote_plus(site_uri),
url_params=url_params,
escape_params=escape_params, converter=converter)
if not converter and isinstance(response, atom.Entry):
return webmastertools.SitesEntryFromString(response.ToString())
return response
def GetSitemapsFeed(self, site_uri, uri=SITEMAPS_FEED_TEMPLATE,
converter=webmastertools.SitemapsFeedFromString):
"""Gets sitemaps feed of a site.
Args:
site_uri: str (optional) URI of which site to retrieve its sitemaps feed.
uri: str (optional) URI to retrieve sites feed.
converter: func (optional) Function which is executed on the server's
response before it is returned. Usually this is a function like
SitemapsFeedFromString which will parse the response and turn it into
an object.
Returns:
If converter is defined, the results of running converter on the server's
response. Otherwise, it will be a SitemapsFeed object.
"""
return self.Get(uri % {'site_id': urllib.quote_plus(site_uri)},
converter=converter)
def AddSitemap(self, site_uri, sitemap_uri, sitemap_type='WEB',
uri=SITEMAPS_FEED_TEMPLATE,
url_params=None, escape_params=True, converter=None):
"""Adds a regular sitemap to a site.
Args:
site_uri: str URI of which site to add sitemap for.
sitemap_uri: str URI of sitemap to add to a site.
sitemap_type: str Type of added sitemap. Valid types: WEB, VIDEO, or CODE.
uri: str (optional) URI template to add a sitemap.
Default SITEMAP_FEED_TEMPLATE.
url_params: dict (optional) Additional URL parameters to be included
in the insertion request.
escape_params: boolean (optional) If true, the url_parameters will be
escaped before they are included in the request.
converter: func (optional) Function which is executed on the server's
response before it is returned. Usually this is a function like
SitemapsEntryFromString which will parse the response and turn it into
an object.
Returns:
If converter is defined, the results of running converter on the server's
response. Otherwise, it will be a SitemapsEntry object.
"""
sitemap_entry = webmastertools.SitemapsEntry(
atom_id=atom.Id(text=sitemap_uri),
category=atom.Category(
scheme='http://schemas.google.com/g/2005#kind',
term='http://schemas.google.com/webmasters/tools/2007#sitemap-regular'),
sitemap_type=webmastertools.SitemapType(text=sitemap_type))
response = self.Post(
sitemap_entry,
uri % {'site_id': urllib.quote_plus(site_uri)},
url_params=url_params,
escape_params=escape_params, converter=converter)
if not converter and isinstance(response, atom.Entry):
return webmastertools.SitemapsEntryFromString(response.ToString())
return response
def AddMobileSitemap(self, site_uri, sitemap_uri,
sitemap_mobile_markup_language='XHTML', uri=SITEMAPS_FEED_TEMPLATE,
url_params=None, escape_params=True, converter=None):
"""Adds a mobile sitemap to a site.
Args:
site_uri: str URI of which site to add sitemap for.
sitemap_uri: str URI of sitemap to add to a site.
sitemap_mobile_markup_language: str Format of added sitemap. Valid types:
XHTML, WML, or cHTML.
uri: str (optional) URI template to add a sitemap.
Default SITEMAP_FEED_TEMPLATE.
url_params: dict (optional) Additional URL parameters to be included
in the insertion request.
escape_params: boolean (optional) If true, the url_parameters will be
escaped before they are included in the request.
converter: func (optional) Function which is executed on the server's
response before it is returned. Usually this is a function like
SitemapsEntryFromString which will parse the response and turn it into
an object.
Returns:
If converter is defined, the results of running converter on the server's
response. Otherwise, it will be a SitemapsEntry object.
"""
# FIXME
sitemap_entry = webmastertools.SitemapsEntry(
atom_id=atom.Id(text=sitemap_uri),
category=atom.Category(
scheme='http://schemas.google.com/g/2005#kind',
term='http://schemas.google.com/webmasters/tools/2007#sitemap-mobile'),
sitemap_mobile_markup_language=\
webmastertools.SitemapMobileMarkupLanguage(
text=sitemap_mobile_markup_language))
print sitemap_entry
response = self.Post(
sitemap_entry,
uri % {'site_id': urllib.quote_plus(site_uri)},
url_params=url_params,
escape_params=escape_params, converter=converter)
if not converter and isinstance(response, atom.Entry):
return webmastertools.SitemapsEntryFromString(response.ToString())
return response
def AddNewsSitemap(self, site_uri, sitemap_uri,
sitemap_news_publication_label, uri=SITEMAPS_FEED_TEMPLATE,
url_params=None, escape_params=True, converter=None):
"""Adds a news sitemap to a site.
Args:
site_uri: str URI of which site to add sitemap for.
sitemap_uri: str URI of sitemap to add to a site.
sitemap_news_publication_label: str, list of str Publication Labels for
sitemap.
uri: str (optional) URI template to add a sitemap.
Default SITEMAP_FEED_TEMPLATE.
url_params: dict (optional) Additional URL parameters to be included
in the insertion request.
escape_params: boolean (optional) If true, the url_parameters will be
escaped before they are included in the request.
converter: func (optional) Function which is executed on the server's
response before it is returned. Usually this is a function like
SitemapsEntryFromString which will parse the response and turn it into
an object.
Returns:
If converter is defined, the results of running converter on the server's
response. Otherwise, it will be a SitemapsEntry object.
"""
sitemap_entry = webmastertools.SitemapsEntry(
atom_id=atom.Id(text=sitemap_uri),
category=atom.Category(
scheme='http://schemas.google.com/g/2005#kind',
term='http://schemas.google.com/webmasters/tools/2007#sitemap-news'),
sitemap_news_publication_label=[],
)
if isinstance(sitemap_news_publication_label, str):
sitemap_news_publication_label = [sitemap_news_publication_label]
for label in sitemap_news_publication_label:
sitemap_entry.sitemap_news_publication_label.append(
webmastertools.SitemapNewsPublicationLabel(text=label))
print sitemap_entry
response = self.Post(
sitemap_entry,
uri % {'site_id': urllib.quote_plus(site_uri)},
url_params=url_params,
escape_params=escape_params, converter=converter)
if not converter and isinstance(response, atom.Entry):
return webmastertools.SitemapsEntryFromString(response.ToString())
return response
def DeleteSitemap(self, site_uri, sitemap_uri, uri=SITEMAP_TEMPLATE,
url_params=None, escape_params=True):
"""Removes a sitemap from a site.
Args:
site_uri: str URI of which site to remove a sitemap from.
sitemap_uri: str URI of sitemap to remove from a site.
uri: str (optional) A URI template to send DELETE request.
Default SITEMAP_TEMPLATE.
url_params: dict (optional) Additional URL parameters to be included
in the insertion request.
escape_params: boolean (optional) If true, the url_parameters will be
escaped before they are included in the request.
Returns:
True if the delete succeeded.
"""
return self.Delete(
uri % {'site_id': urllib.quote_plus(site_uri),
'sitemap_id': urllib.quote_plus(sitemap_uri)},
url_params=url_params, escape_params=escape_params)