View source with raw comments or as raw
   1/*  Part of ClioPatria SeRQL and SPARQL server
   2
   3    Author:        Michiel Hildebrand
   4    Author:        Jan Wielemaker
   5    E-mail:        michielh@few.vu.nl
   6    WWW:           http://www.swi-prolog.org
   7    Copyright (C): 2016, VU University Amsterdam
   8
   9    This program is free software; you can redistribute it and/or
  10    modify it under the terms of the GNU General Public License
  11    as published by the Free Software Foundation; either version 2
  12    of the License, or (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public
  20    License along with this library; if not, write to the Free Software
  21    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  22
  23    As a special exception, if you link this library with other files,
  24    compiled with a Free Software compiler, to produce an executable, this
  25    library does not by itself cause the resulting executable to be covered
  26    by the GNU General Public License. This exception does not however
  27    invalidate any other reasons why the executable file might be covered by
  28    the GNU General Public License.
  29*/
  30
  31:- module(api_lod,
  32	  [ lod_api/2			% +Request
  33	  ]).
  34
  35:- use_module(library(http/http_dispatch)).
  36:- use_module(library(http/http_json)).
  37:- use_module(library(http/http_host)).
  38:- use_module(library(http/http_request_value)).
  39:- use_module(library(http/http_cors)).
  40:- use_module(library(semweb/rdf_db)).
  41:- use_module(library(semweb/rdf_json)).
  42:- use_module(library(semweb/rdf_describe)).
  43:- use_module(library(settings)).
  44:- use_module(library(option)).
  45:- use_module(library(rdf_write)).
  46:- use_module(library(semweb/rdf_turtle_write)).
  47:- use_module(library(uri)).
  48:- use_module(library(debug)).
  49:- use_module(library(apply)).
  50:- use_module(library(dcg/basics)).
  51:- use_module(library(base64)).
  52:- use_module(library(utf8)).
  53
  54
  55/** <module> LOD - Linked Open Data server
  56
  57Linked (Open) Data turns RDF URIs   (indentifiers) into URLs (locators).
  58Requesting the data  behind  the  URL   returns  a  description  of  the
  59resource. So, if we see   a resource http://example.com/employe/bill, we
  60get do an HTTP GET  request  and   expect  to  receive  a description of
  61_bill_.  This module adds LOD facilities to ClioPatria.
  62
  63---++ Running the LOD server
  64
  65There are several ways to run the LOD server.
  66
  67    1. The simplest way to realise LOD is to run ClioPatria there where
  68    the authority component of the URL points to (see uri_components/2
  69    for decomposing URIs).  This implies you must be able to create a
  70    DNS binding for the host and be able to run ClioPatria there.
  71
  72    2. Sometimes the above does not work, because the port is already
  73    assigned to another machine, you are not allowed to run ClioPatria
  74    on the target host, the target is behind a firewall, etc. In that
  75    case, notable if the host runs Apache, you can exploit the Apache
  76    module =mod_proxy= and proxy the connections to a location where
  77    ClioPatria runs. If you ensure that the path on Apache is the same
  78    as the path on ClioPatria, the following Apache configuration rule
  79    solves the problem:
  80
  81    ==
  82    ProxyPass /rdf/ http://cliopatria-host:3020/rdf/
  83    ==
  84
  85    3. Both above methods require no further configuration.
  86    Unfortunately, they require a registered domain control over DNS
  87    and administrative rights over certain machines.  A solution that
  88    doesn't require this is to use www.purl.org.  This allows you to
  89    redirect URLs within the purl domain to any location you control.
  90    The redirection method can be defined with purl.  In the semantic
  91    web community, we typically use *|See other|* (303).  The catch
  92    is that if the address arrives at ClioPatria, we no longer know
  93    where it came from.  This is not a problem in (1), as there was
  94    no redirect.  It is also not a problem in (2), because Apache
  95    adds a header =|x-forwarded-host|=.  Unfortunately, there is
  96    no way to tell you are activated through a redirect, let alone
  97    where the redirect came from.
  98
  99    To deal with this situation, we use the redirected_from option of
 100    lod_api/2. For example, if http://www.purl.org/vocabularies/myvoc/
 101    is redirected to /myvoc/ on ClioPatria, we use:
 102
 103    ==
 104    :- http_handler('/myvoc/',
 105		    lod_api([ redirected_from('http://www.purl.org/vocabularies/myvoc/')
 106			    ]),
 107		    [ prefix ]).
 108    ==
 109
 110By default, there is no HTTP handler pointing to lod_api/2. The example
 111above describes how to deal with redirected URIs. The cases (1) and (2)
 112must also be implemented by registering a handler. This can be as blunt
 113as registering a handler for the root of the server, but typically one
 114would use one or more handlers that deal with sub-trees that act as
 115Linked Data repositories. Handler declarations should use absolute
 116addresses to guarantee a match with the RDF URIs, even if the server is
 117relocated by means of the http:prefix setting. For example:
 118
 119    ==
 120    :- http_handler('/rdf/', lod_api([]), [prefix]).
 121    ==
 122
 123@see http://linkeddata.org/
 124*/
 125
 126:- setting(lod:redirect, boolean, false,
 127	   'If true, redirect from accept-header to extension').
 128
 129%%	lod_api(+Options, +Request)
 130%
 131%	Reply to a Linked Data request. The  handler is capable of three
 132%	output formats. It decides on the   desired  format based on the
 133%	HTTP =Accept= header-field. If no acceptable format is found, it
 134%	replies with a human-readable description  of the resource using
 135%	ClioPatria RDF browser-page as defined by list_resource//2.
 136%
 137%	Options:
 138%
 139%	    * redirected_from(+URL)
 140%	    This option must be provided when using a purl.org or
 141%	    similar redirect.  See overall documentation of this
 142%	    library.
 143%
 144%	    * bounded_description(+Type)
 145%	    Description style to use.  See rdf_bounded_description/4.
 146%	    The default is =cbd= (Concise Bounded Description)
 147
 148lod_api(_Options, Request) :-
 149	\+ memberchk(path_info(_), Request), !,
 150	accepts(Request, AcceptList),
 151	preferred_format(AcceptList, Format),
 152	(   Format == html
 153	->  http_link_to_id(home, [], Redirect)
 154	;   http_link_to_id(well_known_void, [], Redirect)
 155	),
 156	http_redirect(see_other, Redirect, Request).
 157lod_api(_Options, Request) :-
 158	memberchk(path_info('/.well-known/void'), Request), !,
 159	http_link_to_id(well_known_void, [], Redirect),
 160	http_redirect(see_other, Redirect, Request).
 161lod_api(Options, Request) :-
 162	lod_uri(Request, URI, Options),
 163	debug(lod, 'LOD URI: ~q', [URI]),
 164	accepts(Request, AcceptList),
 165	triple_filter(Request, Filter),
 166	cors_enable,
 167	lod_request(URI, AcceptList, Request, Filter, Options).
 168
 169accepts(Request, AcceptList) :-
 170	(   memberchk(accept(AcceptHeader), Request)
 171	->  (   atom(AcceptHeader)	% compatibility
 172	    ->	http_parse_header_value(accept, AcceptHeader, AcceptList)
 173	    ;	AcceptList = AcceptHeader
 174	    )
 175	;   AcceptList = []
 176	).
 177
 178%%	triple_filter(+Request, -Filter) is det.
 179%
 180%	Extract Triple-Filter from Request.  Ignores the filter if it
 181%	is invalid.
 182
 183triple_filter(Request, Filter) :-
 184	catch(phrase(triple_filter(Request), Filter), E,
 185	      (print_message(warning, E),fail)), !.
 186triple_filter(_, []).
 187
 188
 189%%	triple_filter(+Text)//
 190%
 191%	Translate an RDF triple pattern into a list of rdf(S,P,O) terms.
 192
 193triple_filter([]) -->
 194	[].
 195triple_filter([triple_filter(Filter)|T]) --> !,
 196	one_triple_filter(Filter),
 197	triple_filter(T).
 198triple_filter([_|T]) -->
 199	triple_filter(T).
 200
 201one_triple_filter(Encoded) -->
 202	{ string_codes(Encoded, EncCodes),
 203	  phrase(base64(UTF8Bytes), EncCodes),
 204	  phrase(utf8_codes(PlainCodes), UTF8Bytes),
 205	  string_codes(Filter, PlainCodes),
 206	  split_string(Filter, "\r\n", "\r\n", Filters),
 207	  maplist(map_triple_filter, Filters, Triples)
 208	},
 209	string(Triples).
 210
 211map_triple_filter(String, rdf(S,P,O)) :-
 212	split_string(String, "\s\t", "\s\t", [SS,SP,SO]),
 213	triple_term(SS, S),
 214	triple_term(SP, P),
 215	triple_term(SO, O).
 216
 217triple_term("?", _) :- !.
 218triple_term(S, N) :-
 219	string_codes(S, Codes),
 220	phrase(sparql_grammar:graph_term(N), Codes).
 221
 222%%	lod_request(+URI, +AcceptList, +Request, +Filter, +Options)
 223%
 224%	Handle an LOD request.
 225
 226lod_request(URI, AcceptList, Request, Filter, Options) :-
 227	lod_resource(URI), !,
 228	preferred_format(AcceptList, Format),
 229	debug(lod, 'LOD Format: ~q', [Format]),
 230	(   cliopatria:redirect_uri(Format, URI, SeeOther)
 231	->  http_redirect(see_other, SeeOther, Request)
 232	;   setting(lod:redirect, true),
 233	    redirect(URI, AcceptList, SeeOther)
 234	->  http_redirect(see_other, SeeOther, Request)
 235	;   lod_describe(Format, URI, Request, Filter, Options)
 236	).
 237lod_request(URL, _AcceptList, Request, Filter, Options) :-
 238	format_request(URL, URI, Format), !,
 239	lod_describe(Format, URI, Request, Filter, Options).
 240lod_request(URI, _AcceptList, _Request, _Filter, _) :-
 241	throw(http_reply(not_found(URI))).
 242
 243
 244%%	lod_uri(+Request, -URI, +Options)
 245%
 246%	URI is the originally requested URI.   This predicate deals with
 247%	redirections if the HTTP handler was registered using the option
 248%	redirected_from(URL). Otherwise it resolves   the correct global
 249%	URI using http_current_host/4.
 250
 251lod_uri(Request, URI, Options) :-
 252	memberchk(redirected_from(Org), Options),
 253	memberchk(request_uri(ReqURI), Request),
 254	handler_location(Request, Location),
 255	atom_concat(Location, Rest, ReqURI),
 256	atom_concat(Org, Rest, URI).
 257lod_uri(Request, URI, _) :-
 258	memberchk(request_uri(ReqURI), Request),
 259	http_current_host(Request, Host, Port,
 260			  [ global(true)
 261			  ]),
 262	(   Port == 80
 263	->  atomic_list_concat(['http://', Host, ReqURI], URI)
 264	;   atomic_list_concat(['http://', Host, :, Port, ReqURI], URI)
 265	).
 266
 267
 268%%	handler_location(+Request, -Location) is det.
 269%
 270%	Location is the requested location on  the server. This includes
 271%	the handler location, normally concatenated with the path_info.
 272
 273handler_location(Request, Location) :-
 274	memberchk(path(Path), Request),
 275	(   memberchk(path_info(Rest), Request),
 276	    atom_concat(Location, Rest, Path)
 277	->  true
 278	;   Location = Path
 279	).
 280
 281
 282%%	redirect(+URI, +AcceptList, -RedirectURL)
 283%
 284%	Succeeds if URI is in the store and a RedirectURL is found for
 285%	it.
 286
 287redirect(URI, AcceptList, To) :-
 288	lod_resource(URI),
 289	preferred_format(AcceptList, Format),
 290	(   cliopatria:redirect_uri(Format, URI, To)
 291	->  true
 292	;   uri_components(URI, URIComponents),
 293	    uri_data(path, URIComponents, Path0),
 294	    format_suffix(Format, Suffix),
 295	    file_name_extension(Path0, Suffix, Path),
 296	    uri_data(path, URIComponents, Path, ToComponents),
 297	    uri_components(To, ToComponents)
 298	).
 299
 300
 301%%	preferred_format(+AcceptList, -Format) is det.
 302%
 303%	Format is the highest ranked mimetype found in the Acceptlist of
 304%	the request and that  we  can   support.  Expects  an AcceptList
 305%	sorted by rank.
 306
 307preferred_format(AcceptList, Format) :-
 308	member(media(MimeType,_,_,_), AcceptList),
 309	ground(MimeType),
 310	mimetype_format(MimeType, Format), !.
 311preferred_format(_, html).
 312
 313
 314%%	format_request(+URL, -URI, -Format) is semidet.
 315%
 316%	True if URL contains a suffix   that  corresponds to a supported
 317%	output format, and the global URI occurs in the database.
 318
 319format_request(URL, URI, Format) :-
 320	uri_components(URL, URLComponents),
 321	uri_data(path, URLComponents, Path),
 322	file_name_extension(Base, Ext, Path),
 323	(   format_suffix(Format, Ext),
 324	    mimetype_format(_, Format)
 325	->  true
 326	),
 327	uri_data(path, URLComponents, Base, PlainComponents),
 328	uri_components(URI, PlainComponents),
 329	lod_resource(URI).
 330
 331
 332%%	lod_describe(+Format, +URI, +Request, +Filter, +Options) is det.
 333%
 334%	Write an HTTP document  describing  URI   to  in  Format  to the
 335%	current output. Format is defined by mimetype_format/2.
 336
 337lod_describe(html, URI, Request, _, _) :- !,
 338	(   rdf_graph(URI)
 339	->  http_link_to_id(list_graph, [graph=URI], Redirect)
 340	;   http_link_to_id(list_resource, [r=URI], Redirect)
 341	),
 342	http_redirect(see_other, Redirect, Request).
 343lod_describe(Format, URI, _Request, Filter, Options) :-
 344	lod_description(URI, RDF, Filter, Options),
 345	send_graph(Format, RDF).
 346
 347send_graph(xmlrdf, RDF) :-
 348	format('Content-type: application/rdf+xml; charset=UTF-8~n~n'),
 349	rdf_write_xml(current_output, RDF).
 350send_graph(json, RDF) :-
 351	graph_json(RDF, JSON),
 352	reply_json(JSON).
 353send_graph(turtle, RDF) :-
 354	format('Content-type: text/turtle; charset=UTF-8~n~n'),
 355	rdf_save_turtle(stream(current_output),
 356			[ expand(triple_in(RDF)),
 357			  only_known_prefixes(true),
 358			  silent(true)
 359			]).
 360
 361%%	triple_in(+RDF, ?S,?P,?O, ?G) is nondet.
 362%
 363%	Lookup a triple in the graph RDF, represented as a list of
 364%	rdf(S,P,O).
 365%
 366%	@tbd	Describe required indexing from rdf_save_turtle/2 and
 367%		implement that if the graph is big.
 368
 369:- public triple_in/5.			% called from send_graph/2.
 370
 371triple_in(RDF, S,P,O,_G) :-
 372	member(rdf(S,P,O), RDF).
 373
 374
 375%%	lod_description(+URI, -RDF, +Filter, +Options) is det.
 376%
 377%	RDF is a  graph  represented  as   a  list  of  rdf(S,P,O)  that
 378%	describes URI.
 379%
 380%	This predicate is hooked   by  cliopatria:lod_description/2. The
 381%	default is implemented by resource_CBD/3.
 382%
 383%	@see SPARQL DESCRIBE
 384
 385lod_description(URI, RDF, _, _) :-
 386	cliopatria:lod_description(URI, RDF), !.
 387lod_description(URI, RDF, Filter, Options) :-
 388	option(bounded_description(Type), Options, cbd),
 389	echo_filter(Filter),
 390	rdf_bounded_description(rdf, Type, Filter, URI, RDF).
 391
 392echo_filter([]) :- !.
 393echo_filter(Filters) :-
 394	copy_term(Filters, Filters1),
 395	term_variables(Filters1, Vars),
 396	maplist(=(?), Vars),
 397	filters_to_ntriples(Filters1, NTriples),
 398	split_string(NTriples, "\n", "\n.\s", Strings0),
 399	maplist(insert_q, Strings0, Strings),
 400	atomics_to_string(Strings, "\n", String),
 401	base64(String, Encoded),
 402	format('Triple-Filter: ~w\r\n', [Encoded]).
 403
 404insert_q(String, QString) :-
 405	split_string(String, " ", "", [S,P,O|M]),
 406	map_q(S, QS),
 407	map_q(P, QP),
 408	map_q(O, QO),
 409	atomics_to_string([QS,QP,QO|M], " ", QString).
 410
 411map_q("<?>", "?") :- !.
 412map_q(S, S).
 413
 414filters_to_ntriples(Filters, String) :-
 415	with_output_to(
 416	    string(String),
 417	    rdf_save_ntriples(stream(current_output),
 418			      [ expand(api_lod:triple_in(Filters))])).
 419
 420
 421%%	mimetype_format(?MimeType, ?Format) is nondet.
 422%
 423%	Conversion between mimetypes and formats.
 424
 425mimetype_format(application/'rdf+xml',	xmlrdf).
 426mimetype_format(application/json,	json).
 427mimetype_format(application/'x-turtle',	turtle).
 428mimetype_format(text/turtle,		turtle).
 429mimetype_format(text/html,		html).
 430
 431%%	format_suffix(?Format, ?Suffix) is nondet.
 432%
 433%	Suffix is the file name extension used for Format.
 434
 435format_suffix(xmlrdf, rdf).
 436format_suffix(json,   json).
 437format_suffix(html,   html).
 438format_suffix(turtle, ttl).
 439
 440
 441%%	lod_resource(+Resource) is semidet.
 442%
 443%	True if Resource is an  existing   resource  for the LOD server.
 444%	Typically,  this  means  it  appears  as  a  subject,  but  when
 445%	considering symmetric bounded descriptions,  it should certainly
 446%	also hold for resources that only appear as object.
 447
 448lod_resource(Resource) :-
 449	(   rdf(Resource, _, _)
 450	;   rdf(_, Resource, _)
 451	;   rdf(_, _, Resource)
 452	;   rdf_graph(Resource)
 453	), !.
 454
 455
 456		 /*******************************
 457		 *	       HOOKS		*
 458		 *******************************/
 459
 460:- multifile
 461	cliopatria:redirect_uri/3,
 462	cliopatria:lod_description/2.
 463
 464%%	cliopatria:redirect_uri(+Format, +URI, -RedirectURL)
 465%
 466%	Compose a RedirectionURL based on the  output Format and the URI
 467%	that is in our RDF database. For example, this could map the URI
 468%	http://example.com/employe/bill   into   Bill's    homepage   at
 469%	http://example.com/~bill if Format is =html=.  The default is to
 470%	a format-specific extension  to  the   path  component  of  URI,
 471%	returning  e.g.,  http://example.com/employe/bill.rdf    if  the
 472%	requested format is RDF.
 473%
 474%       @see This hook is used by redirect/3.
 475%       @param Format is one of =xmlrdf=, =turtle, =json= or =html=.
 476
 477
 478%%	cliopatria:lod_description(+URI, -RDF:list(rdf(s,p,o)))
 479%
 480%	RDF is list of triples describing URI. The default is to use the
 481%	Concise Bounded Description as implemented by resource_CBD/3.
 482%
 483%	@see This hook is used by lod_description/2
 484%	@see library(semweb/rdf_describe) provides several definitions
 485%	of bounded descriptions.