Source code for viapy.api

import logging
import time

from attrdict import AttrMap
from cached_property import cached_property
import requests
import rdflib
from rdflib.namespace import Namespace


logger = logging.getLogger(__name__)


SCHEMA_NS = Namespace('http://schema.org/')


[docs]class ViafAPI(object): """Wrapper for VIAF API. https://platform.worldcat.org/api-explorer/apis/VIAF """ # NOTE: API urls use www prefix, but VIAF URIs do not #: base url for VIAF API methods api_base = "https://www.viaf.org/viaf" #: base url for VIAF URIs uri_base = "http://viaf.org/viaf"
[docs] @classmethod def uri_from_id(cls, viaf_id): """Generate a canonical VIAF URI for the specified id""" return "%s/%s" % (cls.uri_base, viaf_id)
[docs] def suggest(self, term): '''Query autosuggest API. Returns a list of results, or an empty list if no suggestions are found or if something went wrong''' # 'viaf/AutoSuggest?query=[searchTerms]&callback[optionalCallbackName] autosuggest_url = '%s/AutoSuggest' % self.api_base response = requests.get(autosuggest_url, params={'query': term}, headers={'accept': 'application/json'}) logger.debug('autosuggest \'%s\': %s %s, %0.2f', term, response.status_code, response.reason, response.elapsed.total_seconds()) if response.status_code == requests.codes.ok: return response.json().get('result', None) or [] # if there was an http error, raise it response.raise_for_status() return []
[docs] def search(self, query): '''Query VIAF seach interface. Returns a list of :class:`SRUItem` :param query: CQL query in viaf syntax (e.g., ``cql.any all "term"``) ''' search_url = '%s/search' % self.api_base params = { 'query': query, 'httpAccept': 'application/json', 'maximumRecords': 50, # TODO: configurable ? # sort by number of holdings (default sort on web search) # - so better known names show up first 'sortKeys': 'holdingscount' } response = requests.get(search_url, params=params) logger.debug('search \'%s\': %s %s, %0.2f', params['query'], response.status_code, response.reason, response.elapsed.total_seconds()) if response.status_code == requests.codes.ok: data = SRUResult(response.json()) if data.total_results: return data.records # if response was not ok, raise the error response.raise_for_status()
def _find_type(self, fltr, value): return self.search('%s all "%s"' % (fltr, value))
[docs] def find_person(self, name): '''Search VIAF for local.personalNames''' return self._find_type('local.personalNames', name)
[docs] def find_corporate(self, name): '''Search VIAF for local.corporateNames''' return self._find_type('local.corporateNames', name)
[docs] def find_place(self, name): '''Search VIAF for local.geographicNames''' return self._find_type('local.geographicNames', name)
[docs]class ViafEntity(object): '''Object for working with a single VIAF entity. :param viaf_id: viaf identifier (either integer or uri) ''' def __init__(self, viaf_id): try: int(viaf_id) self.uri = ViafAPI.uri_from_id(viaf_id) except ValueError: # NOTE: do we need to canonicalize the URI in any way to # ensure RDF queries work properly? self.uri = viaf_id @property def uriref(self): '''VIAF URI reference as instance of :class:`rdflib.URIRef`''' return rdflib.URIRef(self.uri) @cached_property def rdf(self): '''VIAF data for this entity as :class:`rdflib.Graph`''' start = time.time() graph = rdflib.Graph() graph.parse(self.uri) logger.debug('Loaded VIAF RDF %s: %0.2f sec', self.uri, time.time() - start) return graph # person-specific properties @property def birthdate(self): '''schema birthdate as :class:`rdflib.Literal`''' return self.rdf.value(self.uriref, SCHEMA_NS.birthDate) @property def deathdate(self): '''schema deathdate as :class:`rdflib.Literal`''' return self.rdf.value(self.uriref, SCHEMA_NS.deathDate) @property def birthyear(self): '''birth year''' if self.birthdate: return self.year_from_isodate(str(self.birthdate)) @property def deathyear(self): '''death year''' if self.deathdate: return self.year_from_isodate(str(self.deathdate)) # utility method for date parsing
[docs] @classmethod def year_from_isodate(cls, date): '''Return just the year portion of an ISO8601 date. Expects a string, returns an integer''' return int(date.split('-')[0])
[docs]class SRUResult(object): '''SRU search result object, for use with :meth:`ViafAPI.search`.''' def __init__(self, data): self._data = data.get('searchRetrieveResponse', {}) @cached_property def total_results(self): '''number of records matching the query''' return int(self._data.get('numberOfRecords', 0)) @cached_property def records(self): '''list of results as :class:`SRUItem`.''' return [SRUItem(d['record']) for d in self._data.get('records', [])]
[docs]class SRUItem(AttrMap): '''Single item returned by a SRU search, for use with :meth:`ViafAPI.search` and :class:`SRUResult`.''' @property def uri(self): '''VIAF URI for this result''' return self.recordData.Document['@about'] @property def viaf_id(self): '''VIAF numeric identifier''' return self.recordData.viafID @property def nametype(self): '''type of name (personal, corporate, title, etc)''' return self.recordData.nameType @property def label(self): '''first main heading for this item''' try: return self.recordData.mainHeadings.data[0].text except KeyError: return self.recordData.mainHeadings.data.text