View source with raw comments or as raw
   1/*  Part of SWI-Prolog
   2
   3    Author:        Jan Wielemaker
   4    E-mail:        J.Wielemaker@vu.nl
   5    WWW:           http://www.swi-prolog.org
   6    Copyright (c)  2010-2017, University of Amsterdam
   7                              VU University Amsterdam
   8    All rights reserved.
   9
  10    Redistribution and use in source and binary forms, with or without
  11    modification, are permitted provided that the following conditions
  12    are met:
  13
  14    1. Redistributions of source code must retain the above copyright
  15       notice, this list of conditions and the following disclaimer.
  16
  17    2. Redistributions in binary form must reproduce the above copyright
  18       notice, this list of conditions and the following disclaimer in
  19       the documentation and/or other materials provided with the
  20       distribution.
  21
  22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33    POSSIBILITY OF SUCH DAMAGE.
  34*/
  35
  36:- module(sparql_client,
  37          [ sparql_query/3,             % +Query, -Row, +Options
  38            sparql_set_server/1,        % +Options
  39            sparql_read_xml_result/2,   % +Stream, -Result
  40            sparql_read_json_result/2   % +Input, -Result
  41          ]).
  42:- use_module(library(http/http_open)).
  43:- use_module(library(http/json)).
  44:- use_module(library(lists)).
  45:- use_module(library(rdf)).
  46:- use_module(library(semweb/turtle)).
  47:- use_module(library(option)).
  48
  49/** <module> SPARQL client library
  50
  51This module provides a SPARQL client.  For example:
  52
  53    ==
  54    ?- sparql_query('select * where { ?x rdfs:label "Amsterdam" }', Row,
  55                    [ host('dbpedia.org'), path('/sparql/')]).
  56
  57    Row = row('http://www.ontologyportal.org/WordNet#WN30-108949737') ;
  58    false.
  59    ==
  60
  61Or, querying a local server using an =ASK= query:
  62
  63    ==
  64    ?- sparql_query('ask { owl:Class rdfs:label "Class" }', Row,
  65                    [ host('localhost'), port(3020), path('/sparql/')]).
  66    Row = true.
  67    ==
  68*/
  69
  70
  71%!  sparql_query(+Query, -Result, +Options) is nondet.
  72%
  73%   Execute a SPARQL query on an HTTP   SPARQL endpoint. Query is an
  74%   atom that denotes  the  query.  Result   is  unified  to  a term
  75%   rdf(S,P,O) for =CONSTRUCT= and =DESCRIBE=  queries, row(...) for
  76%   =SELECT= queries and  =true=  or   =false=  for  =ASK=  queries.
  77%   Options are
  78%
  79%       * host(+Host)
  80%       * port(+Port)
  81%       * path(+Path)
  82%       The above three options set the location of the server.
  83%       * search(+ListOfParams)
  84%       Provide additional query parameters, such as the graph.
  85%       * variable_names(-ListOfNames)
  86%       Unifies ListOfNames with a list of atoms that describe the
  87%       names of the variables in a =SELECT= query.
  88%
  89%   Remaining options are passed to   http_open/3.  The defaults for
  90%   Host, Port and Path can be   set  using sparql_set_server/1. The
  91%   initial default for port is 80 and path is =|/sparql/|=.
  92%
  93%   For example, the ClioPatria  server   understands  the parameter
  94%   =entailment=. The code  below  queries   for  all  triples using
  95%   _rdfs_entailment.
  96%
  97%     ==
  98%     ?- sparql_query('select * where { ?s ?p ?o }',
  99%                     Row,
 100%                     [ search([entailment=rdfs])
 101%                     ]).
 102%     ==
 103
 104sparql_query(Query, Row, Options) :-
 105    sparql_param(host(Host), Options,  Options1),
 106    sparql_param(port(Port), Options1, Options2),
 107    sparql_param(path(Path), Options2, Options3),
 108    select_option(search(Extra), Options3, Options4, []),
 109    select_option(variable_names(VarNames), Options4, Options5, _),
 110    sparql_extra_headers(HTTPOptions),
 111    http_open([ protocol(http),
 112                host(Host),
 113                port(Port),
 114                path(Path),
 115                search([ query = Query
 116                       | Extra
 117                       ])
 118              | Options5
 119              ], In,
 120              [ header(content_type, ContentType)
 121              | HTTPOptions
 122              ]),
 123    plain_content_type(ContentType, CleanType),
 124    read_reply(CleanType, In, VarNames, Row).
 125
 126%!  sparql_extra_headers(-List)
 127%
 128%   Send extra headers with the request. Note that, although we also
 129%   process RDF embedded in HTML, we do  not explicitely ask for it.
 130%   Doing so causes some   (e.g., http://w3.org/2004/02/skos/core to
 131%   reply with the HTML description rather than the RDF).
 132
 133sparql_extra_headers(
 134        [ request_header('Accept' = 'application/sparql-results+xml, \c
 135                                     application/n-triples, \c
 136                                     application/x-turtle, \c
 137                                     application/turtle, \c
 138                                     application/sparql-results+json, \c
 139                                     text/turtle; q=0.9, \c
 140                                     application/rdf+xml, \c
 141                                     text/rdf+xml; q=0.8, \c
 142                                     */*; q=0.1'),
 143          cert_verify_hook(ssl_verify)
 144        ]).
 145
 146:- public ssl_verify/5.
 147
 148%!  ssl_verify(+SSL, +ProblemCert, +AllCerts, +FirstCert, +Error)
 149%
 150%   Currently we accept  all  certificates.
 151
 152ssl_verify(_SSL,
 153           _ProblemCertificate, _AllCertificates, _FirstCertificate,
 154           _Error).
 155
 156
 157read_reply('application/rdf+xml', In, _, Row) :-
 158    !,
 159    call_cleanup(load_rdf(stream(In), RDF), close(In)),
 160    member(Row, RDF).
 161read_reply(MIME, In, _, Row) :-
 162    turtle_media_type(MIME),
 163    !,
 164    call_cleanup(rdf_read_turtle(stream(In), RDF, []), close(In)),
 165    member(Row, RDF).
 166read_reply(MIME, In, VarNames, Row) :-
 167    sparql_result_mime(MIME),
 168    !,
 169    call_cleanup(sparql_read_xml_result(stream(In), Result),
 170                 close(In)),
 171    varnames(Result, VarNames),
 172    xml_result(Result, Row).
 173read_reply(MIME, In, VarNames, Row) :-
 174    json_result_mime(MIME),
 175    !,
 176    call_cleanup(sparql_read_json_result(stream(In), Result),
 177                 close(In)),
 178    (   Result = select(VarNames, Rows)
 179    ->  member(Row, Rows)
 180    ;   Result = ask(True)
 181    ->  Row = True,
 182        VarNames = []
 183    ).
 184read_reply(Type, In, _, _) :-
 185    read_stream_to_codes(In, Codes),
 186    string_codes(Reply, Codes),
 187    close(In),
 188    throw(error(domain_error(sparql_result_document, Type),
 189                context(_, Reply))).
 190
 191turtle_media_type('application/x-turtle').
 192turtle_media_type('application/turtle').
 193turtle_media_type('application/n-triples').
 194turtle_media_type('text/rdf+n3').
 195turtle_media_type('text/turtle').
 196
 197sparql_result_mime('application/sparql-results+xml'). % official
 198sparql_result_mime('application/sparql-result+xml').
 199
 200json_result_mime('application/sparql-results+json').
 201
 202
 203plain_content_type(Type, Plain) :-
 204    sub_atom(Type, B, _, _, (;)),
 205    !,
 206    sub_string(Type, 0, B, _, Main),
 207    normalize_space(atom(Plain), Main).
 208plain_content_type(Type, Type).
 209
 210xml_result(ask(Bool), Result) :-
 211    !,
 212    Result = Bool.
 213xml_result(select(_VarNames, Rows), Result) :-
 214    member(Result, Rows).
 215
 216varnames(ask(_), _).
 217varnames(select(VarTerm, _Rows), VarNames) :-
 218    VarTerm =.. [_|VarNames].
 219
 220
 221                 /*******************************
 222                 *            SETTINGS          *
 223                 *******************************/
 224
 225:- dynamic
 226    sparql_setting/1.
 227
 228sparql_setting(port(80)).
 229sparql_setting(path('/sparql/')).
 230
 231sparql_param(Param, Options0, Options) :-
 232    select_option(Param, Options0, Options),
 233    !.
 234sparql_param(Param, Options, Options) :-
 235    sparql_setting(Param),
 236    !.
 237sparql_param(Param, Options, Options) :-
 238    functor(Param, Name, _),
 239    throw(error(existence_error(option, Name), _)).
 240
 241%!  sparql_set_server(+OptionOrList)
 242%
 243%   Set sparql server default options.  Provided defaults are:
 244%   host, port and repository.  For example:
 245%
 246%       ==
 247%           sparql_set_server([ host(localhost),
 248%                               port(8080)
 249%                               path(world)
 250%                             ])
 251%       ==
 252%
 253%   The default for port is 80 and path is =|/sparql/|=.
 254
 255sparql_set_server([]) :- !.
 256sparql_set_server([H|T]) :-
 257    !,
 258    sparql_set_server(H),
 259    sparql_set_server(T).
 260sparql_set_server(Term) :-
 261    functor(Term, Name, Arity),
 262    functor(Unbound, Name, Arity),
 263    retractall(sparql_setting(Unbound)),
 264    assert(sparql_setting(Term)).
 265
 266
 267                 /*******************************
 268                 *             RESULT           *
 269                 *******************************/
 270
 271ns(sparql, 'http://www.w3.org/2005/sparql-results#').
 272
 273/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 274Read    the    SPARQL    XML    result     format    as    defined    in
 275http://www.w3.org/TR/rdf-sparql-XMLres/, version 6 April 2006.
 276- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 277
 278                 /*******************************
 279                 *        MACRO HANDLING        *
 280                 *******************************/
 281
 282%       substitute 'sparql' by the namespace   defined  above for better
 283%       readability of the remainder of the code.
 284
 285term_subst(V, _, _, V) :-
 286    var(V),
 287    !.
 288term_subst(F, F, T, T) :- !.
 289term_subst(C, F, T, C2) :-
 290    compound(C),
 291    !,
 292    functor(C, Name, Arity),
 293    functor(C2, Name, Arity),
 294    term_subst(0, Arity, C, F, T, C2).
 295term_subst(T, _, _, T).
 296
 297term_subst(A, A, _, _, _, _) :- !.
 298term_subst(I0, Arity, C0, F, T, C) :-
 299    I is I0 + 1,
 300    arg(I, C0, A0),
 301    term_subst(A0, F, T, A),
 302    arg(I, C, A),
 303    term_subst(I, Arity, C0, F, T, C).
 304
 305term_expansion(T0, T) :-
 306    ns(sparql, NS),
 307    term_subst(T0, sparql, NS, T).
 308
 309
 310                 /*******************************
 311                 *           READING            *
 312                 *******************************/
 313
 314%!  sparql_read_xml_result(+Input, -Result)
 315%
 316%   Specs from http://www.w3.org/TR/rdf-sparql-XMLres/.  The returned
 317%   Result term is of the format:
 318%
 319%           * select(VarNames, Rows)
 320%           Where VarNames is a term v(Name, ...) and Rows is a
 321%           list of row(....) containing the column values in the
 322%           same order as the variable names.
 323%
 324%           * ask(Bool)
 325%           Where Bool is either =true= or =false=
 326
 327:- thread_local
 328    bnode_map/2.
 329
 330sparql_read_xml_result(Input, Result) :-
 331    load_structure(Input, DOM,
 332                   [ dialect(xmlns),
 333                     space(remove)
 334                   ]),
 335    call_cleanup(dom_to_result(DOM, Result),
 336                 retractall(bnode_map(_,_))).
 337
 338dom_to_result(DOM, Result) :-
 339    (   sub_element(DOM, sparql:head, _HAtt, Content)
 340    ->  variables(Content, Vars)
 341    ;   Vars = []
 342    ),
 343    (   Vars == [],
 344        sub_element(DOM, sparql:boolean, _, [TrueFalse])
 345    ->  Result = ask(TrueFalse)
 346    ;   VarTerm =.. [v|Vars],
 347        Result = select(VarTerm, Rows),
 348        sub_element(DOM, sparql:results, _RAtt, RContent)
 349    ->  rows(RContent, Vars, Rows)
 350    ),
 351    !.                                   % Guarantee finalization
 352
 353%!  variables(+DOM, -Varnames)
 354%
 355%   Deals with <variable name=Name>.  Head   also  may contain <link
 356%   href="..."/>. This points to additional   meta-data.  Not really
 357%   clear what we can do with that.
 358
 359variables([], []).
 360variables([element(sparql:variable, Att, [])|T0], [Name|T]) :-
 361    !,
 362    memberchk(name=Name, Att),
 363    variables(T0, T).
 364variables([element(sparql:link, _, _)|T0], T) :-
 365    variables(T0, T).
 366
 367
 368rows([], _, []).
 369rows([R|T0], Vars, [Row|T]) :-
 370    row_values(Vars, R, Values),
 371    Row =.. [row|Values],
 372    rows(T0, Vars, T).
 373
 374row_values([], _, []).
 375row_values([Var|VarT], DOM, [Value|ValueT]) :-
 376    (   sub_element(DOM, sparql:binding, Att, Content),
 377        memberchk(name=Var, Att)
 378    ->  value(Content, Value)
 379    ;   Value = '$null$'
 380    ),
 381    row_values(VarT, DOM, ValueT).
 382
 383value([element(sparql:literal, Att, Content)], literal(Lit)) :-
 384    !,
 385    lit_value(Content, Value),
 386    (   memberchk(datatype=Type, Att)
 387    ->  Lit = type(Type, Value)
 388    ;   memberchk(xml:lang=Lang, Att)
 389    ->  Lit = lang(Lang, Value)
 390    ;   Lit = Value
 391    ).
 392value([element(sparql:uri, [], [URI])], URI) :- !.
 393value([element(sparql:bnode, [], [NodeID])], URI) :-
 394    !,
 395    bnode(NodeID, URI).
 396value([element(sparql:unbound, [], [])], '$null$').
 397
 398
 399lit_value([], '').
 400lit_value([Value], Value).
 401
 402
 403%!  sub_element(+DOM, +Name, -Atttribs, -Content)
 404
 405sub_element(element(Name, Att, Content), Name, Att, Content).
 406sub_element(element(_, _, List), Name, Att, Content) :-
 407    sub_element(List, Name, Att, Content).
 408sub_element([H|T], Name, Att, Content) :-
 409    (   sub_element(H, Name, Att, Content)
 410    ;   sub_element(T, Name, Att, Content)
 411    ).
 412
 413
 414bnode(Name, URI) :-
 415    bnode_map(Name, URI),
 416    !.
 417bnode(Name, URI) :-
 418    gensym('__bnode', URI0),
 419    assertz(bnode_map(Name, URI0)),
 420    URI = URI0.
 421
 422
 423%!  sparql_read_json_result(+Input, -Result) is det.
 424%
 425%   The returned Result term is of the format:
 426%
 427%           * select(VarNames, Rows)
 428%           Where VarNames is a term v(Name, ...) and Rows is a
 429%           list of row(....) containing the column values in the
 430%           same order as the variable names.
 431%
 432%           * ask(Bool)
 433%           Where Bool is either =true= or =false=
 434%
 435%   @see http://www.w3.org/TR/rdf-sparql-json-res/
 436
 437sparql_read_json_result(Input, Result) :-
 438    setup_call_cleanup(
 439        open_input(Input, In, Close),
 440        read_json_result(In, Result),
 441        close_input(Close)).
 442
 443open_input(stream(In), In, Close) :-
 444    !,
 445    encoding(In, utf8, Close).
 446open_input(In, In, Close) :-
 447    is_stream(In),
 448    !,
 449    encoding(In, utf8, Close).
 450open_input(File, In, close(In)) :-
 451    open(File, read, In, [encoding(utf8)]).
 452
 453encoding(In, Encoding, Close) :-
 454    stream_property(In, encoding(Old)),
 455    (   Encoding == Old
 456    ->  Close = true
 457    ;   set_stream(In, encoding(Encoding)),
 458        Close = set_stream(In, Encoding, Old)
 459    ).
 460
 461close_input(close(In)) :-
 462    !,
 463    retractall(bnode_map(_,_)),
 464    close(In).
 465close_input(_) :-
 466    retractall(bnode_map(_,_)).
 467
 468read_json_result(In, Result) :-
 469    json_read(In, JSON),
 470    json_to_result(JSON, Result).
 471
 472json_to_result(json([ head    = json(Head),
 473                      results = json(Body)
 474                    ]),
 475               select(Vars, Rows)) :-
 476    memberchk(vars=VarList, Head),
 477    Vars =.. [v|VarList],
 478    memberchk(bindings=Bindings, Body),
 479    !,
 480    maplist(json_row(VarList), Bindings, Rows).
 481json_to_result(json(JSon), ask(Boolean)) :-
 482    memberchk(boolean = @(Boolean), JSon).
 483
 484
 485json_row(Vars, json(Columns), Row) :-
 486    maplist(json_cell, Vars, Columns, Values),
 487    !,
 488    Row =.. [row|Values].
 489json_row(Vars, json(Columns), Row) :-
 490    maplist(json_cell_or_null(Columns), Vars, Values),
 491    Row =.. [row|Values].
 492
 493json_cell(Var, Var=json(JValue), Value) :-
 494    memberchk(type=Type, JValue),
 495    jvalue(Type, JValue, Value).
 496
 497json_cell_or_null(Columns, Var, Value) :-
 498    memberchk(Var=json(JValue), Columns),
 499    !,
 500    memberchk(type=Type, JValue),
 501    jvalue(Type, JValue, Value).
 502json_cell_or_null(_, _, '$null$').
 503
 504jvalue(uri, JValue, URI) :-
 505    memberchk(value=URI, JValue).
 506jvalue(literal, JValue, literal(Literal)) :-
 507    memberchk(value=Value, JValue),
 508    (   memberchk('xml:lang'=Lang, JValue)
 509    ->  Literal = lang(Lang, Value)
 510    ;   memberchk('datatype'=Type, JValue)
 511    ->  Literal = type(Type, Value)
 512    ;   Literal = Value
 513    ).
 514jvalue('typed-literal', JValue, literal(type(Type, Value))) :-
 515    memberchk(value=Value, JValue),
 516    memberchk('datatype'=Type, JValue).
 517jvalue(bnode, JValue, URI) :-
 518    memberchk(value=NodeID, JValue),
 519    bnode(NodeID, URI).