import logging
import re
import time
import rdflib
import requests
from attrdict import AttrMap
from cached_property import cached_property
from rdflib.namespace import Namespace
logger = logging.getLogger(__name__)
SCHEMA_NS = Namespace("http://schema.org/")
[docs]
class ViafAPI:
"""Wrapper for VIAF API.
https://platform.worldcat.org/api-explorer/apis/VIAF
"""
# NOTE: API urls use www prefix, but VIAF URIs do not
#: base url for VIAF API methods
api_base = "https://www.viaf.org/viaf"
#: base url for VIAF URIs
uri_base = "http://viaf.org/viaf"
[docs]
@classmethod
def uri_from_id(cls, viaf_id):
"""Generate a canonical VIAF URI for the specified id"""
return f"{cls.uri_base}/{viaf_id}"
[docs]
def suggest(self, term):
"""Query autosuggest API. Returns a list of results, or an
empty list if no suggestions are found or if something went wrong"""
# 'viaf/AutoSuggest?query=[searchTerms]&callback[optionalCallbackName]
autosuggest_url = f"{self.api_base}/AutoSuggest"
response = requests.get(
autosuggest_url,
params={"query": term},
headers={"accept": "application/json"},
)
logger.debug(
"autosuggest '%s': %s %s, %0.2f",
term,
response.status_code,
response.reason,
response.elapsed.total_seconds(),
)
if response.status_code == requests.codes.ok:
return response.json().get("result", None) or []
# if there was an http error, raise it
response.raise_for_status()
return []
[docs]
def search(self, query):
"""Query VIAF search interface. Returns a list of :class:`SRUItem`
:param query: CQL query in viaf syntax (e.g., ``cql.any all "term"``)
"""
search_url = f"{self.api_base}/search"
params = {
"query": query,
"maximumRecords": 10, # TODO: configurable ?
# sort by number of holdings (default sort on web search)
# - so better known names show up first
"sortKey": "holdingscount",
}
response = requests.get(
search_url, params=params, headers={"Accept": "application/json"}
)
logger.debug(
"search '%s': %s %s, %0.2f",
params["query"],
response.status_code,
response.reason,
response.elapsed.total_seconds(),
)
if response.status_code == requests.codes.ok:
data = SRUResult(response.json())
if data.total_results:
return data.records
# if response was not ok, raise the error
response.raise_for_status()
def _find_type(self, fltr, value):
return self.search(f'{fltr} all "{value}"')
[docs]
def find_person(self, name):
"""Search VIAF for local.personalNames"""
return self._find_type("local.personalNames", name)
[docs]
def find_corporate(self, name):
"""Search VIAF for local.corporateNames"""
return self._find_type("local.corporateNames", name)
[docs]
def find_place(self, name):
"""Search VIAF for local.geographicNames"""
return self._find_type("local.geographicNames", name)
[docs]
class ViafEntity:
"""Object for working with a single VIAF entity.
:param viaf_id: viaf identifier (either integer or uri)
"""
def __init__(self, viaf_id):
try:
int(viaf_id)
self.uri = ViafAPI.uri_from_id(viaf_id)
except ValueError:
# NOTE: do we need to canonicalize the URI in any way to
# ensure RDF queries work properly?
self.uri = viaf_id
@property
def uriref(self):
"""VIAF URI reference as instance of :class:`rdflib.URIRef`"""
return rdflib.URIRef(self.uri)
@cached_property
def rdf(self):
"""VIAF data for this entity as :class:`rdflib.Graph`"""
start = time.time()
graph = rdflib.Graph()
# 2025 update: Accept header now required, so use requests.get to retrieve RDF
response = requests.get(self.uri, headers={"Accept": "application/rdf+xml"})
response.raise_for_status() # raise HTTPError on non-success status
graph.parse(data=response.text, format="xml")
logger.debug("Loaded VIAF RDF %s: %0.2f sec", self.uri, time.time() - start)
return graph
# person-specific properties
@property
def birthdate(self):
"""schema birthdate as :class:`rdflib.Literal`"""
return self.rdf.value(self.uriref, SCHEMA_NS.birthDate)
@property
def deathdate(self):
"""schema deathdate as :class:`rdflib.Literal`"""
return self.rdf.value(self.uriref, SCHEMA_NS.deathDate)
@property
def birthyear(self):
"""birth year"""
if self.birthdate:
return self.year_from_isodate(str(self.birthdate))
@property
def deathyear(self):
"""death year"""
if self.deathdate:
return self.year_from_isodate(str(self.deathdate))
# utility method for date parsing
[docs]
@classmethod
def year_from_isodate(cls, date):
"""Return just the year portion of an ISO8601 date. Expects
a string, returns an integer. Supports negative dates."""
negative = False
# if the date starts with a dash, strip off before trying to split
if date.startswith("-"):
date = date[1:]
negative = True
value = int(date.split("-")[0])
if negative:
return -value
return value
[docs]
class SRUResult:
"""SRU search result object, for use with :meth:`ViafAPI.search`."""
def __init__(self, data):
self._data = data.get("searchRetrieveResponse", {})
@cached_property
def total_results(self):
"""number of records matching the query"""
return int(self._data.get("numberOfRecords", {}).get("content", 0))
@cached_property
def records(self):
"""List of results as :class:`SRUItem`."""
record_or_records = self._data.get("records", {}).get("record")
if isinstance(record_or_records, dict):
return [SRUItem(self.normalize_record(record_or_records))]
elif isinstance(record_or_records, list):
return [SRUItem(self.normalize_record(d)) for d in record_or_records]
return []
[docs]
def normalize_record(self, data):
"""Added in May 2025 to match updates to the /search API records, where
the JSON response now uses namespaced keys that increase per result:
ns2, ns3, ns4, and so on, applying to most subkeys (ns2:VIAFCluster,
ns2:Document, etc). This method strips all nsX: prefixes recursively"""
if isinstance(data, dict):
return {
re.sub(r"^ns\d+:", "", key): self.normalize_record(value)
for key, value in data.items()
}
elif isinstance(data, list):
return [self.normalize_record(item) for item in data]
else:
return data
[docs]
class SRUItem(AttrMap):
"""Single item returned by a SRU search, for use with
:meth:`ViafAPI.search` and :class:`SRUResult`.
The `VIAFCluster` attribute was added to each property lookup in 2025 to
match updates to the /search API's JSON response."""
@property
def uri(self):
"""VIAF URI for this result"""
return self.recordData.VIAFCluster.Document["about"]
@property
def viaf_id(self):
"""VIAF numeric identifier"""
return self.recordData.VIAFCluster.viafID
@property
def nametype(self):
"""type of name (personal, corporate, title, etc)"""
return self.recordData.VIAFCluster.nameType
@property
def label(self):
"""first main heading for this item"""
try:
return self.recordData.VIAFCluster.mainHeadings.data[0].text
except (KeyError, IndexError):
return self.recordData.VIAFCluster.mainHeadings.data.text