Signposting

Stian Soiland-Reyes, The University of Manchester, UK

Motivation

How to find the way from persistent identifiers,

through a repository

to find data and metadata

A typical PID resolution

GET https://doi.org/10.abcd/j.mystery.14

HTTP/1.1 302 Found
Location: http://example.com/repo/item/14

Browsers follow redirection to the repository's landing page

GET http://example.com/repo/item/14

HTTP/1.1 200 OK
Content-Type: text/html

<html>
  <head> … </head>
  <body>
  …
  </body>
</html>

HTML for human readability and human navigation

A preview of the resource

Metadata (e.g. title, description) as text

Link to download (pdf, zip)

.. and many unrelated links

There may be multiple downloads, multiple metadata formats

Multiple metadata formats

{
  "@context": "http://schema.org",
  "id": "https://doi.org/10.5281/zenodo.10847062",
  "type": "https://schema.org/PresentationDigitalDocument",
  "author": [
    {
      "id": "https://orcid.org/0000-0001-9842-9718",
      "type": "Person",
      "affiliation": [
        {
          "id": "https://ror.org/027m9bs27",
          "type": "Organization",
          "name": "University of Manchester"
        },
        {
          "id": "https://ror.org/04dkp9463",
          "type": "Organization",
          "name": "University of Amsterdam"
        }
      ],
      "familyName": "Soiland-Reyes",
      "givenName": "Stian",
      "name": "Soiland-Reyes, Stian"
    },
    {
      "id": "https://orcid.org/0000-0002-0715-6126",
      "type": "Person",
      "familyName": "Van de Sompel",
      "givenName": "Herbert",
      "name": "Van de Sompel, Herbert"
    }
  ],
  "contentSize": "11.95 MB",
  "creator": [
    {
      "id": "https://orcid.org/0000-0001-9842-9718",
      "type": "Person",
      "affiliation": [
        {
          "id": "https://ror.org/027m9bs27",
          "type": "Organization",
          "name": "University of Manchester"
        },
        {
          "id": "https://ror.org/04dkp9463",
          "type": "Organization",
          "name": "University of Amsterdam"
        }
      ],
      "familyName": "Soiland-Reyes",
      "givenName": "Stian",
      "name": "Soiland-Reyes, Stian"
    },
    {
      "id": "https://orcid.org/0000-0002-0715-6126",
      "type": "Person",
      "familyName": "Van de Sompel",
      "givenName": "Herbert",
      "name": "Van de Sompel, Herbert"
    }
  ],
  "dateCreated": "2024-03-21T07:52:03.534671+00:00",
  "dateModified": "2024-07-06T20:35:20.427129+00:00",
  "datePublished": "2024-03-21",
  "description": "<p>Presented at FDO Summit 2024</p>",
  "identifier": "https://doi.org/10.5281/zenodo.10847062",
  "license": "https://creativecommons.org/licenses/by/4.0/legalcode",
  "name": "Signposting and RO-Crate: experiences and lessons learned",
  "publisher": {
    "type": "Organization",
    "name": "Zenodo"
  },
  "size": "11.95 MB",
  "url": "https://zenodo.org/records/10847062"
}
<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:bibo="http://purl.org/ontology/bibo/" xmlns:citedcat="https://w3id.org/citedcat-ap/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.10847062">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Text"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.10847062</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.10847062"/>
    <dct:creator>
      <rdf:Description rdf:about="https://orcid.org/0000-0001-9842-9718">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Person"/>
        <foaf:name>Soiland-Reyes, Stian</foaf:name>
        <foaf:givenName>Stian</foaf:givenName>
        <foaf:familyName>Soiland-Reyes</foaf:familyName>
        <org:memberOf>
          <foaf:Organization rdf:about="https://ror.org/https://ror.org/027m9bs27">
            <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://ror.org/027m9bs27</dct:identifier>
            <foaf:name>University of Manchester</foaf:name>
          </foaf:Organization>
        </org:memberOf>
        <org:memberOf>
          <foaf:Organization rdf:about="https://ror.org/https://ror.org/04dkp9463">
            <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://ror.org/04dkp9463</dct:identifier>
            <foaf:name>University of Amsterdam</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="https://orcid.org/0000-0002-0715-6126">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Person"/>
        <foaf:name>Van de Sompel, Herbert</foaf:name>
        <foaf:givenName>Herbert</foaf:givenName>
        <foaf:familyName>Van de Sompel</foaf:familyName>
      </rdf:Description>
    </dct:creator>
    <dct:title>Signposting and RO-Crate: experiences and lessons learned</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2024</dct:issued>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2024-03-21</dct:issued>
    <dct:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2024-07-06</dct:modified>
    <owl:sameAs rdf:resource="https://zenodo.org/records/10847062"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/records/10847062</skos:notation>
        <adms:schemeAgency>URL</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation>oai:zenodo.org:10847062</skos:notation>
        <adms:schemeAgency>oai</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <dct:isVersionOf>
      <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.10847061">
        <dct:identifier>https://doi.org/10.5281/zenodo.10847061</dct:identifier>
      </rdf:Description>
    </dct:isVersionOf>
    <dct:isPartOf>
      <rdf:Description rdf:about="https://zenodo.org/communities/ro">
        <dct:identifier>https://zenodo.org/communities/ro</dct:identifier>
      </rdf:Description>
    </dct:isPartOf>
    <dct:isPartOf>
      <rdf:Description rdf:about="https://zenodo.org/communities/esciencelab">
        <dct:identifier>https://zenodo.org/communities/esciencelab</dct:identifier>
      </rdf:Description>
    </dct:isPartOf>
    <dct:description>Presented at FDO Summit 2024</dct:description>
    <dcat:distribution>
      <dcat:Distribution>
        <dct:license rdf:resource="https://creativecommons.org/licenses/by/4.0/legalcode"/>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.10847062"/>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:downloadURL rdf:resource="https://zenodo.org/records/10847062/files/slides_signposting-and-ro-crate-experience-and-lessons-learned.zip"/>
        <dcat:mediaType>application/zip</dcat:mediaType>
        <dcat:byteSize>6896383</dcat:byteSize>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.10847062"/>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:downloadURL rdf:resource="https://zenodo.org/records/10847062/files/signposting-and-ro-crate-experience-and-lessons-learned.pdf"/>
        <dcat:mediaType>application/pdf</dcat:mediaType>
        <dcat:byteSize>5631529</dcat:byteSize>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.10847062"/>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
</rdf:RDF>

.. even different profiles of same syntax

What about APIs?

  • Site-specific JSON, schema must already be known
  • "Secret" API -- do some URL rewriting
  • API documented as URL patterns, e.g.
    http://api.example.com/items/{item}.json
    • But what is {item} ?
  • Links to download are often hidden behind another API call
GET http://api.example.com/item/14.json

HTTP/1.1 200 OK
Content-Type: application/json

{ "title": "Data 14", 
  "download": "http://cdn.example.net/item14.zip" }

Lots of APIs

What is Signposting?

Provide from any Web resource (e.g. typical landing page), how to find:

  • Persistent identifier (how do I find it again?)
  • Type (what is it?)
  • Download links (how do I get it?)
  • Metadata (what is inside?)
  • License (can I reuse it?)
  • Authors (who should I credit?)

 

... assuming we want to do this reliably in software -- for any repository or websit

(machine actionable)

Reuse Web standards

 

Principles: Simplicity, navigation, (almost) no semantics

JSON

HTTP

GET http://www.example.com/repo/item/14

HTTP/1.1 200 OK 
Link: <http://cdn.example.net/item14.zip>;     rel="item"
Link: <http://api.example.com/items/14.json>;  rel="describedby"
Link: <https://doi.org/10.abcd/j.mystery.14>;  rel="cite-as"
Link: <https://schema.org/TrainingMaterial>;   rel="item"
Content-Type: text/html
Content-Length: 21398

<html>...

HTML

<html>
<head>
<link rel="stylesheet"   href="https://cdn.jsdelivr.net/npm/bootstrap..." />
<link rel="cite-as"      href="https://doi.org/10.abcd/j.mystery.14"  />
<link rel="item"         href="http://cdn.example.net/item14.zip"  />
<link rel="describedby"  href="http://api.example.com/items/14.json" />
<link rel="type"         href="https://schema.org/TrainingMaterial"   />

</head>
<body>

JSON

{ "linkset": [
  { "anchor": "http://www.example.com/repo/item/14",
    "cite-as": [
      { "href": "https://doi.org/10.abcd/j.mystery.14" }
    ],
    "item": [
      { "href": "http://cdn.example.net/item14.zip" }
    ],
    "describedby": [
      { "href": "http://api.example.com/items/14.json" }
    ]
  },
  { "anchor": "..." }
 ]
}

FAIR Signposting

Making the links meaningful

Specify the content type;

Link: <http://cdn.example.net/item14.zip>; rel="item"; type="application/zip"

Useful for alternative formats

 

For generic types, also specify a profile:

Link: <http:/api.example.com/marc/14>; rel="describedby";

type="application/xml";

profile="http://www.loc.gov/MARC21/slim"

Useful for JSONs with different schemas and linking to APIs

Webby FAIR Digital Objects

  • Recognises that Web (aka HTTP) is ubiquitous, not going away soon
  • Builds on existing Web standards
    • Signposting for navigation (PID record)
    • Linked Data for metadata
    • RO-Crate as basis for metadata profiles
  • Resource

  • Persistent identifier (PID).

  • FDO record

  • FDO attributes

    • mandatory FDO attributes 0.FDO/*

    • optional FDO attributes

  • FDO attribute definitions

  • FDO profiles

  • FDO type

  • Registry

FDO conceptual Structure

Signposting for Webby FDOs

Link relation/attribute --> FDO concept

Any rel= after PID redirect FDO/PID Record (PID Profile: Signposting)
rel=cite-as PID (persistent identifier)
rel=linkset FDO Record (PID Profile: Linkset)
rel=describedby Metadata, or Metadata FDO if PID
rel=describes FDO Record that has this metadata
rel=item Bit-sequence
rel=collection FDO Collection, or FDO Record for a bitstream
rel=type FDO Type
type= on rel=item or rel=describes MIME type of bitstream
rel=profile PID Profile
profile= on rel=describes FDO Type of Metadata (e.g. PID Profile: RO-Crate https://w3id.org/ro/crate)
rel=author or rel=license Kernel attributes
rel=api-catalogue to linkset FDO Collection of FDO Operations
rel=service-desc or rel=service-doc FDO Operation

https://pypi.org/project/signposting/

$ signposting https://zenodo.org/records/13225792
Signposting for https://zenodo.org/records/13225792 
Linkset: <https://zenodo.org/api/records/13225792> application/linkset+json

$ signposting --linkset https://zenodo.org/api/records/13225792
Signposting for https://zenodo.org/api/records/13225792 
CiteAs: <https://doi.org/10.5281/zenodo.13225792>
Type: <https://schema.org/CreativeWork>
      <https://schema.org/AboutPage>
Collection: <https://zenodo.org/records/13225792>
License: <https://creativecommons.org/licenses/by/4.0/legalcode>
Author: <https://orcid.org/0000-0002-0035-6475>
        <https://orcid.org/0000-0002-4806-5140>
        <https://orcid.org/0000-0002-0048-3300>
        <https://orcid.org/0000-0003-4911-9056>
        <https://orcid.org/0000-0002-3079-6586>
        <https://orcid.org/0000-0001-9842-9718>
Describes: <https://zenodo.org/records/13225792> text/html
DescribedBy: <https://zenodo.org/api/records/13225792> application/vnd.citationstyles.csl+json
             <https://zenodo.org/api/records/13225792> text/x-bibliography
             <https://zenodo.org/api/records/13225792> application/dcat+xml
             <https://zenodo.org/api/records/13225792> application/linkset+json
             <https://zenodo.org/api/records/13225792> application/x-bibtex
             <https://zenodo.org/api/records/13225792> application/vnd.datacite.datacite+json
             <https://zenodo.org/api/records/13225792> application/vnd.inveniordm.v1.full+csv
             <https://zenodo.org/api/records/13225792> application/vnd.datacite.datacite+xml
             <https://zenodo.org/api/records/13225792> application/x-dc+xml
             <https://zenodo.org/api/records/13225792> application/vnd.inveniordm.v1+json
             <https://zenodo.org/api/records/13225792> application/vnd.inveniordm.v1.simple+csv
             <https://zenodo.org/api/records/13225792> application/vnd.geo+json
             <https://zenodo.org/api/records/13225792> application/ld+json
             <https://zenodo.org/api/records/13225792> application/json
             <https://zenodo.org/api/records/13225792> application/marcxml+xml
Item: <https://zenodo.org/records/13225792/files/d2.1-reproducible-fair-digital-objects-for-workflows.pdf> application/pdf
      <https://zenodo.org/records/13225792/files/d2.1-reproducible-fair-digital-objects-for-workflows.odt> application/octet-stream

References