Signposting

Stian Soiland-Reyes, The University of Manchester, UK

Motivation

How to find the way from persistent identifiers,

through a repository

to find data and metadata

A typical PID resolution

GET https://doi.org/10.abcd/j.mystery.14

HTTP/1.1 302 Found
Location: http://example.com/repo/item/14

Browsers follow redirection to the repository's landing page

GET http://example.com/repo/item/14

HTTP/1.1 200 OK
Content-Type: text/html

<html>
  <head> … </head>
  <body>
  …
  </body>
</html>

HTML for human readability and human navigation

A preview of the resource

Metadata (e.g. title, description) as text

Link to download (pdf, zip)

.. and many unrelated links

There may be multiple downloads, multiple metadata formats

Multiple metadata formats

{
  "@context": "http://schema.org",
  "id": "https://doi.org/10.5281/zenodo.10847062",
  "type": "https://schema.org/PresentationDigitalDocument",
  "author": [
    {
      "id": "https://orcid.org/0000-0001-9842-9718",
      "type": "Person",
      "affiliation": [
        {
          "id": "https://ror.org/027m9bs27",
          "type": "Organization",
          "name": "University of Manchester"
        },
        {
          "id": "https://ror.org/04dkp9463",
          "type": "Organization",
          "name": "University of Amsterdam"
        }
      ],
      "familyName": "Soiland-Reyes",
      "givenName": "Stian",
      "name": "Soiland-Reyes, Stian"
    },
    {
      "id": "https://orcid.org/0000-0002-0715-6126",
      "type": "Person",
      "familyName": "Van de Sompel",
      "givenName": "Herbert",
      "name": "Van de Sompel, Herbert"
    }
  ],
  "contentSize": "11.95 MB",
  "creator": [
    {
      "id": "https://orcid.org/0000-0001-9842-9718",
      "type": "Person",
      "affiliation": [
        {
          "id": "https://ror.org/027m9bs27",
          "type": "Organization",
          "name": "University of Manchester"
        },
        {
          "id": "https://ror.org/04dkp9463",
          "type": "Organization",
          "name": "University of Amsterdam"
        }
      ],
      "familyName": "Soiland-Reyes",
      "givenName": "Stian",
      "name": "Soiland-Reyes, Stian"
    },
    {
      "id": "https://orcid.org/0000-0002-0715-6126",
      "type": "Person",
      "familyName": "Van de Sompel",
      "givenName": "Herbert",
      "name": "Van de Sompel, Herbert"
    }
  ],
  "dateCreated": "2024-03-21T07:52:03.534671+00:00",
  "dateModified": "2024-07-06T20:35:20.427129+00:00",
  "datePublished": "2024-03-21",
  "description": "<p>Presented at FDO Summit 2024</p>",
  "identifier": "https://doi.org/10.5281/zenodo.10847062",
  "license": "https://creativecommons.org/licenses/by/4.0/legalcode",
  "name": "Signposting and RO-Crate: experiences and lessons learned",
  "publisher": {
    "type": "Organization",
    "name": "Zenodo"
  },
  "size": "11.95 MB",
  "url": "https://zenodo.org/records/10847062"
}
<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:bibo="http://purl.org/ontology/bibo/" xmlns:citedcat="https://w3id.org/citedcat-ap/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.10847062">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Text"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.10847062</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.10847062"/>
    <dct:creator>
      <rdf:Description rdf:about="https://orcid.org/0000-0001-9842-9718">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Person"/>
        <foaf:name>Soiland-Reyes, Stian</foaf:name>
        <foaf:givenName>Stian</foaf:givenName>
        <foaf:familyName>Soiland-Reyes</foaf:familyName>
        <org:memberOf>
          <foaf:Organization rdf:about="https://ror.org/https://ror.org/027m9bs27">
            <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://ror.org/027m9bs27</dct:identifier>
            <foaf:name>University of Manchester</foaf:name>
          </foaf:Organization>
        </org:memberOf>
        <org:memberOf>
          <foaf:Organization rdf:about="https://ror.org/https://ror.org/04dkp9463">
            <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://ror.org/04dkp9463</dct:identifier>
            <foaf:name>University of Amsterdam</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="https://orcid.org/0000-0002-0715-6126">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Person"/>
        <foaf:name>Van de Sompel, Herbert</foaf:name>
        <foaf:givenName>Herbert</foaf:givenName>
        <foaf:familyName>Van de Sompel</foaf:familyName>
      </rdf:Description>
    </dct:creator>
    <dct:title>Signposting and RO-Crate: experiences and lessons learned</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2024</dct:issued>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2024-03-21</dct:issued>
    <dct:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2024-07-06</dct:modified>
    <owl:sameAs rdf:resource="https://zenodo.org/records/10847062"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/records/10847062</skos:notation>
        <adms:schemeAgency>URL</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation>oai:zenodo.org:10847062</skos:notation>
        <adms:schemeAgency>oai</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <dct:isVersionOf>
      <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.10847061">
        <dct:identifier>https://doi.org/10.5281/zenodo.10847061</dct:identifier>
      </rdf:Description>
    </dct:isVersionOf>
    <dct:isPartOf>
      <rdf:Description rdf:about="https://zenodo.org/communities/ro">
        <dct:identifier>https://zenodo.org/communities/ro</dct:identifier>
      </rdf:Description>
    </dct:isPartOf>
    <dct:isPartOf>
      <rdf:Description rdf:about="https://zenodo.org/communities/esciencelab">
        <dct:identifier>https://zenodo.org/communities/esciencelab</dct:identifier>
      </rdf:Description>
    </dct:isPartOf>
    <dct:description>Presented at FDO Summit 2024</dct:description>
    <dcat:distribution>
      <dcat:Distribution>
        <dct:license rdf:resource="https://creativecommons.org/licenses/by/4.0/legalcode"/>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.10847062"/>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:downloadURL rdf:resource="https://zenodo.org/records/10847062/files/slides_signposting-and-ro-crate-experience-and-lessons-learned.zip"/>
        <dcat:mediaType>application/zip</dcat:mediaType>
        <dcat:byteSize>6896383</dcat:byteSize>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.10847062"/>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:downloadURL rdf:resource="https://zenodo.org/records/10847062/files/signposting-and-ro-crate-experience-and-lessons-learned.pdf"/>
        <dcat:mediaType>application/pdf</dcat:mediaType>
        <dcat:byteSize>5631529</dcat:byteSize>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.10847062"/>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
</rdf:RDF>

.. even different profiles of same syntax

What about APIs?

  • Site-specific JSON, schema must already be known
  • "Secret" API -- do some URL rewriting
  • API documented as URL patterns, e.g.
    http://api.example.com/items/{item}.json
    • But what is {item} ?
  • Links to download are often hidden behind another API call
GET http://api.example.com/item/14.json

HTTP/1.1 200 OK
Content-Type: application/json

{ "title": "Data 14", 
  "download": "http://cdn.example.net/item14.zip" }

Lots of APIs

Webby attempt #1

content negotiation

$ curl -s -L -H "Accept: text/html" https://workflowhub.eu/workflows/29 | head

<!doctype html>

<html lang="en">
<head>
  <meta http-equiv="content-type" content="text/html;charset=UTF-8"/>


$ curl -s -L -H "Accept: application/json" https://workflowhub.eu/workflows/29  | head

{"data":{"id":"29","type":"workflows","attributes":{"discussion_links":[{"id":"100","label":"BioExcel Workflows",
       "url":"http://mmb.irbbarcelona.org/biobb/workflows"},{"id":"101","label":"Tutorial","url":"http://mmb.irbbarcelona.org/biobb/workflows/tutorials/md_setup"},{"id":"102","label":"Documentation","url":"https://biobb-wf-md-setup.readthedocs.io/en/latest/index.html"}],"title":"Protein MD Setup tutorial using BioExcel Building Blocks (biobb) in CWL","license":"Apache-2.0","description":"Common Workflow Language example that illustrate the process of setting up a simulation system containing a protein, step by step, using the [BioExcel Building Blocks](/projects/11) library (biobb). The particular ex
                                                    
                                                    

Webby attempt #2

DCAT

Webby attempt #3

schema.org

What is Signposting?

Provide from any Web resource (e.g. typical landing page), how to find:

  • Persistent identifier (how do I find it again?)
  • Type (what is it?)
  • Download links (how do I get it?)
  • Metadata (what is inside?)
  • License (can I reuse it?)
  • Authors (who should I credit?)

 

... assuming we want to do this reliably in software -- for any repository or websit

(machine actionable)

Reuse Web standards

 

Principles: Simplicity, navigation, (almost) no semantics

JSON

HTTP

GET http://www.example.com/repo/item/14

HTTP/1.1 200 OK 
Link: <http://cdn.example.net/item14.zip>;     rel="item"
Link: <http://api.example.com/items/14.json>;  rel="describedby"
Link: <https://doi.org/10.abcd/j.mystery.14>;  rel="cite-as"
Link: <https://schema.org/TrainingMaterial>;   rel="item"
Content-Type: text/html
Content-Length: 21398

<html>...

HTML

<html>
<head>
<link rel="stylesheet"   href="https://cdn.jsdelivr.net/npm/bootstrap..." />
<link rel="cite-as"      href="https://doi.org/10.abcd/j.mystery.14"  />
<link rel="item"         href="http://cdn.example.net/item14.zip"  />
<link rel="describedby"  href="http://api.example.com/items/14.json" />
<link rel="type"         href="https://schema.org/TrainingMaterial"   />

</head>
<body>

JSON

{ "linkset": [
  { "anchor": "http://www.example.com/repo/item/14",
    "cite-as": [
      { "href": "https://doi.org/10.abcd/j.mystery.14" }
    ],
    "item": [
      { "href": "http://cdn.example.net/item14.zip" }
    ],
    "describedby": [
      { "href": "http://api.example.com/items/14.json" }
    ]
  },
  { "anchor": "..." }
 ]
}

FAIR Signposting

Making the links meaningful

Specify the content type;

Link: <http://cdn.example.net/item14.zip>; rel="item"; type="application/zip"

Useful for alternative formats

 

For generic types, also specify a profile:

Link: <http:/api.example.com/marc/14>; rel="describedby";

type="application/xml";

profile="http://www.loc.gov/MARC21/slim"

Useful for JSONs with different schemas and linking to APIs

Webby FAIR Digital Objects

  • Recognises that Web (aka HTTP) is ubiquitous, not going away soon
  • Builds on existing Web standards
    • Signposting for navigation (PID record)
    • Linked Data for metadata
    • RO-Crate as basis for metadata profiles

https://pypi.org/project/signposting/

$ signposting https://zenodo.org/records/13225792
Signposting for https://zenodo.org/records/13225792 
Linkset: <https://zenodo.org/api/records/13225792> application/linkset+json

$ signposting --linkset https://zenodo.org/api/records/13225792
Signposting for https://zenodo.org/api/records/13225792 
CiteAs: <https://doi.org/10.5281/zenodo.13225792>
Type: <https://schema.org/CreativeWork>
      <https://schema.org/AboutPage>
Collection: <https://zenodo.org/records/13225792>
License: <https://creativecommons.org/licenses/by/4.0/legalcode>
Author: <https://orcid.org/0000-0002-0035-6475>
        <https://orcid.org/0000-0002-4806-5140>
        <https://orcid.org/0000-0002-0048-3300>
        <https://orcid.org/0000-0003-4911-9056>
        <https://orcid.org/0000-0002-3079-6586>
        <https://orcid.org/0000-0001-9842-9718>
Describes: <https://zenodo.org/records/13225792> text/html
DescribedBy: <https://zenodo.org/api/records/13225792> application/vnd.citationstyles.csl+json
             <https://zenodo.org/api/records/13225792> text/x-bibliography
             <https://zenodo.org/api/records/13225792> application/dcat+xml
             <https://zenodo.org/api/records/13225792> application/linkset+json
             <https://zenodo.org/api/records/13225792> application/x-bibtex
             <https://zenodo.org/api/records/13225792> application/vnd.datacite.datacite+json
             <https://zenodo.org/api/records/13225792> application/vnd.inveniordm.v1.full+csv
             <https://zenodo.org/api/records/13225792> application/vnd.datacite.datacite+xml
             <https://zenodo.org/api/records/13225792> application/x-dc+xml
             <https://zenodo.org/api/records/13225792> application/vnd.inveniordm.v1+json
             <https://zenodo.org/api/records/13225792> application/vnd.inveniordm.v1.simple+csv
             <https://zenodo.org/api/records/13225792> application/vnd.geo+json
             <https://zenodo.org/api/records/13225792> application/ld+json
             <https://zenodo.org/api/records/13225792> application/json
             <https://zenodo.org/api/records/13225792> application/marcxml+xml
Item: <https://zenodo.org/records/13225792/files/d2.1-reproducible-fair-digital-objects-for-workflows.pdf> application/pdf
      <https://zenodo.org/records/13225792/files/d2.1-reproducible-fair-digital-objects-for-workflows.odt> application/octet-stream

New implementations

Stian Soiland-Reyes, Alban Gaignard, Wilko Steinhoff, Mark Wilkinson, Herbert Van de Sompel (2024):
Benchmarks for Apples-to-Apples FAIR Signposting.
https://w3id.org/a2a-fair-metrics/

Signposting to service descriptions

Find repository-wide APIs (e.g. OpenAPI endpoints)

 

Find object-specific APIs

 

Human and machine-readable documentation

References

Signposting

By Stian Soiland-Reyes

Signposting

Presented for Croissant working group

  • 99