View source with raw comments or as raw
   1/*  Part of SWI-Prolog
   2
   3    Author:        Jan Wielemaker
   4    E-mail:        J.Wielemaker@vu.nl
   5    WWW:           http://www.swi-prolog.org
   6    Copyright (c)  2013-2015, University of Amsterdam
   7                              VU University Amsterdam
   8    All rights reserved.
   9
  10    Redistribution and use in source and binary forms, with or without
  11    modification, are permitted provided that the following conditions
  12    are met:
  13
  14    1. Redistributions of source code must retain the above copyright
  15       notice, this list of conditions and the following disclaimer.
  16
  17    2. Redistributions in binary form must reproduce the above copyright
  18       notice, this list of conditions and the following disclaimer in
  19       the documentation and/or other materials provided with the
  20       distribution.
  21
  22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33    POSSIBILITY OF SUCH DAMAGE.
  34*/
  35
  36:- module(turtle,
  37          [ rdf_load_turtle/3,                  % +Input, -Triples, +Options
  38            rdf_read_turtle/3,                  % +Input, -Triples, +Options
  39            rdf_process_turtle/3,               % +Input, :OnObject, +Options
  40                                                % re-exports
  41            rdf_save_turtle/2,                  % +File, +Options
  42            rdf_save_canonical_turtle/2,        % +File, +Options
  43            rdf_save_trig/2,                    % +File, +Options
  44            rdf_save_canonical_trig/2,          % +File, +Options
  45            rdf_save_ntriples/2                 % +File, +Options
  46          ]).
  47:- use_module(library(option)).
  48:- use_module(library(semweb/rdf_db)).
  49:- use_module(library(semweb/rdf_turtle_write)). % re-exports
  50:- use_module(library(uri)).
  51:- use_module(library(http/http_open)).
  52
  53:- meta_predicate
  54    rdf_process_turtle(+,2,+).
  55
  56:- predicate_options(rdf_load_turtle/3, 3,
  57                     [pass_to(rdf_read_turtle/3, 3)]).
  58:- predicate_options(rdf_process_turtle/3, 3,
  59                     [ anon_prefix(atom),
  60                       base_uri(atom),
  61                       base_used(-atom),
  62                       db(atom),
  63                       error_count(-integer),
  64                       namespaces(-list),
  65                       on_error(oneof([warning,error])),
  66                       prefixes(-list),
  67                       resources(oneof([uri,iri]))
  68                     ]).
  69:- predicate_options(rdf_read_turtle/3, 3,
  70                     [ anon_prefix(atom),
  71                       base_uri(atom),
  72                       base_used(-atom),
  73                       db(atom),
  74                       error_count(-integer),
  75                       namespaces(-list),
  76                       on_error(oneof([warning,error])),
  77                       prefixes(-list),
  78                       resources(oneof([uri,iri]))
  79                     ]).
  80
  81:- use_foreign_library(foreign(turtle)).
  82:- public                               % used by the writer
  83    turtle_pn_local/1,
  84    turtle_write_quoted_string/2,
  85    turtle_write_uri/2.
  86
  87/** <module> Turtle: Terse RDF Triple Language
  88
  89This module implements the Turtle  language   for  representing  the RDF
  90triple model as defined by Dave Beckett  from the Institute for Learning
  91and Research Technology University of Bristol  and later standardized by
  92the W3C RDF working group.
  93
  94This module acts as a plugin to   rdf_load/2,  for processing files with
  95one of the extensions =|.ttl|= or =|.n3|=.
  96
  97@see    http://www.w3.org/TR/turtle/ (used W3C Recommendation 25
  98        February 2014)
  99*/
 100
 101%!  rdf_read_turtle(+Input, -Triples, +Options)
 102%
 103%   Read a stream or file into a set of triples or quadruples (if
 104%   faced with TRiG input) of the format
 105%
 106%           rdf(Subject, Predicate, Object [, Graph])
 107%
 108%   The representation is consistent with the SWI-Prolog RDF/XML
 109%   and ntriples parsers.  Provided options are:
 110%
 111%           * base_uri(+BaseURI)
 112%           Initial base URI.  Defaults to file://<file> for loading
 113%           files.
 114%
 115%           * anon_prefix(+Prefix)
 116%           Blank nodes are generated as <Prefix>1, <Prefix>2, etc.
 117%           If Prefix is not an atom blank nodes are generated as
 118%           node(1), node(2), ...
 119%
 120%           * format(+Format)
 121%           One of =auto= (default), =turtle= or =trig=.  The
 122%           auto mode switches to TRiG format of there is a
 123%           =|{|= before the first triple.  Finally, of the
 124%           format is explicitly stated as =turtle= and the
 125%           file appears to be a TRiG file, a warning is printed
 126%           and the data is loaded while ignoring the graphs.
 127%
 128%           * resources(URIorIRI)
 129%           Officially, Turtle resources are IRIs.  Quite a
 130%           few applications however send URIs.  By default we
 131%           do URI->IRI mapping because this rarely causes errors.
 132%           To force strictly conforming mode, pass =iri=.
 133%
 134%           * prefixes(-Pairs)
 135%           Return encountered prefix declarations as a
 136%           list of Alias-URI
 137%
 138%           * namespaces(-Pairs)
 139%           Same as prefixes(Pairs).  Compatibility to rdf_load/2.
 140%
 141%           * base_used(-Base)
 142%           Base URI used for processing the data.  Unified to
 143%           [] if there is no base-uri.
 144%
 145%           * on_error(+ErrorMode)
 146%           In =warning= (default), print the error and continue
 147%           parsing the remainder of the file.  If =error=, abort
 148%           with an exception on the first error encountered.
 149%
 150%           * error_count(-Count)
 151%           If on_error(warning) is active, this option cane be
 152%           used to retrieve the number of generated errors.
 153%
 154%   @param  Input is one of stream(Stream), atom(Atom), a =http=,
 155%           =https= or =file= url or a filename specification as
 156%           accepted by absolute_file_name/3.
 157
 158rdf_read_turtle(In, Triples, Options) :-
 159    base_uri(In, BaseURI, Options),
 160    setup_call_cleanup(
 161        ( open_input(In, Stream, Close),
 162          create_turtle_parser(Parser, Stream,
 163                               [ base_uri(BaseURI)
 164                               | Options
 165                               ])
 166        ),
 167        ( turtle_parse(Parser, Triples,
 168                       [ parse(document)
 169                       | Options
 170                       ]),
 171          post_options(Parser, Options)
 172        ),
 173        ( destroy_turtle_parser(Parser),
 174          call(Close)
 175        )).
 176
 177%!  rdf_load_turtle(+Input, -Triples, +Options)
 178%
 179%   @deprecated Use rdf_read_turtle/3
 180
 181rdf_load_turtle(Input, Triples, Options) :-
 182    rdf_read_turtle(Input, Triples, Options).
 183
 184
 185%!  rdf_process_turtle(+Input, :OnObject, +Options) is det.
 186%
 187%   Streaming  Turtle  parser.  The  predicate  rdf_process_turtle/3
 188%   processes Turtle data from Input, calling   OnObject with a list
 189%   of triples for every Turtle _statement_ found in Input. OnObject
 190%   is  called  as  below,  where  `ListOfTriples`   is  a  list  of
 191%   rdf(S,P,O) terms for a normal Turtle  file or rdf(S,P,O,G) terms
 192%   if the =GRAPH= keyword is used to  associate a set of triples in
 193%   the document with  a  particular   graph.  The  `Graph` argument
 194%   provides the default graph for storing the triples and _Line_ is
 195%   the line number where the statement started.
 196%
 197%     ==
 198%     call(OnObject, ListOfTriples, Graph:Line)
 199%     ==
 200%
 201%   This predicate supports the same Options as rdf_load_turtle/3.
 202%
 203%   Errors encountered are sent to  print_message/2, after which the
 204%   parser tries to recover and parse the remainder of the data.
 205%
 206%   @see  This  predicate  is  normally    used  by  load_rdf/2  for
 207%   processing RDF data.
 208
 209rdf_process_turtle(In, OnObject, Options) :-
 210    base_uri(In, BaseURI, Options),
 211    option(graph(Graph), Options, BaseURI),
 212    setup_call_cleanup(
 213        ( open_input(In, Stream, Close),
 214          create_turtle_parser(Parser, Stream, Options)
 215        ),
 216        ( process_turtle(Parser, Stream, OnObject, Graph,
 217                         [ parse(statement)
 218                         ]),
 219          post_options(Parser, Options)
 220        ),
 221        ( destroy_turtle_parser(Parser),
 222          call(Close)
 223        )).
 224
 225post_options(Parser, Options) :-
 226    prefix_option(Parser, Options),
 227    namespace_option(Parser, Options),
 228    base_option(Parser, Options),
 229    error_option(Parser, Options).
 230
 231prefix_option(Parser, Options) :-
 232    (   option(prefixes(Pairs), Options)
 233    ->  turtle_prefixes(Parser, Pairs)
 234    ;   true
 235    ).
 236namespace_option(Parser, Options) :-
 237    (   option(namespaces(Pairs), Options)
 238    ->  turtle_prefixes(Parser, Pairs)
 239    ;   true
 240    ).
 241base_option(Parser, Options) :-
 242    (   option(base_used(Base), Options)
 243    ->  turtle_base(Parser, Base)
 244    ;   true
 245    ).
 246error_option(Parser, Options) :-
 247    (   option(error_count(Count), Options)
 248    ->  turtle_error_count(Parser, Count)
 249    ;   true
 250    ).
 251
 252
 253process_turtle(_Parser, Stream, _OnObject, _Graph, _Options) :-
 254    at_end_of_stream(Stream),
 255    !.
 256process_turtle(Parser, Stream, OnObject, Graph, Options) :-
 257    line_count(Stream, LineNo),
 258    turtle_parse(Parser, Triples,
 259                 [ parse(statement)
 260                 | Options
 261                 ]),
 262    call(OnObject, Triples, Graph:LineNo),
 263    process_turtle(Parser, Stream, OnObject, Graph, Options).
 264
 265
 266%!  open_input(+Input, -Stream, -Close) is det.
 267%
 268%   Open given input.
 269%
 270%   @param  Close goal to undo the open action
 271%   @tbd    Synchronize with input handling of rdf_db.pl.
 272%   @error  existence_error, permission_error
 273
 274open_input(stream(Stream), Stream, Close) :-
 275    !,
 276    stream_property(Stream, encoding(Old)),
 277    (   unicode_encoding(Old)
 278    ->  Close = true
 279    ;   set_stream(Stream, encoding(utf8)),
 280        Close = set_stream(Stream, encoding(Old))
 281    ).
 282open_input(Stream, Stream, Close) :-
 283    is_stream(Stream),
 284    !,
 285    open_input(stream(Stream), Stream, Close).
 286open_input(atom(Atom), Stream, close(Stream)) :-
 287    !,
 288    atom_to_memory_file(Atom, MF),
 289    open_memory_file(MF, read, Stream, [free_on_close(true)]).
 290open_input(URL, Stream, close(Stream)) :-
 291    (   sub_atom(URL, 0, _, _, 'http://')
 292    ;   sub_atom(URL, 0, _, _, 'https://')
 293    ),
 294    !,
 295    http_open(URL, Stream, []),
 296    set_stream(Stream, encoding(utf8)).
 297open_input(URL, Stream, close(Stream)) :-
 298    uri_file_name(URL, Path),
 299    !,
 300    open(Path, read, Stream, [encoding(utf8)]).
 301open_input(File, Stream, close(Stream)) :-
 302    absolute_file_name(File, Path,
 303                       [ access(read),
 304                         extensions([ttl, ''])
 305                       ]),
 306    open(Path, read, Stream, [encoding(utf8)]).
 307
 308unicode_encoding(utf8).
 309unicode_encoding(wchar_t).
 310unicode_encoding(unicode_be).
 311unicode_encoding(unicode_le).
 312
 313%!  base_uri(+Input, -BaseURI, +Options)
 314%
 315%   Determine the base uri to use for processing.
 316
 317base_uri(_Input, BaseURI, Options) :-
 318    option(base_uri(BaseURI), Options),
 319    !.
 320base_uri(_Input, BaseURI, Options) :-
 321    option(graph(BaseURI), Options),
 322    !.
 323base_uri(stream(Input), BaseURI, _Options) :-
 324    stream_property(Input, file_name(Name)),
 325    !,
 326    name_uri(Name, BaseURI).
 327base_uri(Stream, BaseURI, Options) :-
 328    is_stream(Stream),
 329    !,
 330    base_uri(stream(Stream), BaseURI, Options).
 331base_uri(Name, BaseURI, _Options) :-
 332    atom(Name),
 333    !,
 334    name_uri(Name, BaseURI).
 335base_uri(_, 'http://www.example.com/', _).
 336
 337name_uri(Name, BaseURI) :-
 338    uri_is_global(Name),
 339    !,
 340    uri_normalized(Name, BaseURI).
 341name_uri(Name, BaseURI) :-
 342    uri_file_name(BaseURI, Name).
 343
 344
 345                 /*******************************
 346                 *          WRITE SUPPORT       *
 347                 *******************************/
 348
 349%!  turtle_pn_local(+Atom:atom) is semidet.
 350%
 351%   True if Atom is a  valid   Turtle  _PN_LOCAL_ name. The PN_LOCAL
 352%   name is what can follow the : in  a resource. In the new Turtle,
 353%   this can be anything and this   function becomes meaningless. In
 354%   the old turtle, PN_LOCAL is defined   similar (but not equal) to
 355%   an XML name. This predicate  is   used  by  rdf_save_turtle/2 to
 356%   write files such that can be read by old parsers.
 357%
 358%   @see xml_name/2.
 359
 360%!  turtle_write_quoted_string(+Out, +Value, ?WriteLong) is det.
 361%
 362%   Write Value (an atom)  as  a   valid  Turtle  string.  WriteLong
 363%   determines wether the string is written   as a _short_ or _long_
 364%   string.  It takes the following values:
 365%
 366%     * true
 367%     Use Turtle's long string syntax. Embeded newlines and
 368%     single or double quotes are are emitted verbatim.
 369%     * false
 370%     Use Turtle's shotr string syntax.
 371%     * Var
 372%     If WriteLong is unbound, this predicate uses long syntax
 373%     if newlines appear in the string and short otherwise.  WriteLong
 374%     is unified with the decision taken.
 375
 376%!  turtle_write_quoted_string(+Out, +Value) is det.
 377%
 378%   Same as turtle_write_quoted_string(Out, Value, false), writing a
 379%   string with only a single =|"|=.   Embedded newlines are escapes
 380%   as =|\n|=.
 381
 382turtle_write_quoted_string(Out, Text) :-
 383    turtle_write_quoted_string(Out, Text, false).
 384
 385%!  turtle_write_uri(+Out, +Value) is det.
 386%
 387%   Write a URI as =|<...>|=
 388
 389
 390                 /*******************************
 391                 *          RDF-DB HOOK         *
 392                 *******************************/
 393
 394:- multifile
 395    rdf_db:rdf_load_stream/3,
 396    rdf_db:rdf_file_type/2.
 397
 398%!  rdf_db:rdf_load_stream(+Format, +Stream, :Options)
 399%
 400%   (Turtle clauses)
 401
 402rdf_db:rdf_load_stream(turtle, Stream, Options) :-
 403    load_turtle_stream(Stream, Options).
 404rdf_db:rdf_load_stream(trig, Stream, Options) :-
 405    load_turtle_stream(Stream, Options).
 406
 407load_turtle_stream(Stream, _Module:Options) :-
 408    rdf_db:graph(Options, Graph),
 409    atom_concat('_:', Graph, BNodePrefix),
 410    rdf_transaction((  rdf_process_turtle(Stream, assert_triples,
 411                                          [ anon_prefix(BNodePrefix)
 412                                          | Options
 413                                          ]),
 414                       rdf_set_graph(Graph, modified(false))
 415                    ),
 416                    parse(Graph)).
 417
 418assert_triples([], _).
 419assert_triples([H|T], Location) :-
 420    assert_triple(H, Location),
 421    assert_triples(T, Location).
 422
 423assert_triple(rdf(S,P,O), Location) :-
 424    rdf_assert(S,P,O,Location).
 425assert_triple(rdf(S,P,O,G), _) :-
 426    rdf_assert(S,P,O,G).
 427
 428
 429rdf_db:rdf_file_type(ttl,  turtle).
 430rdf_db:rdf_file_type(n3,   turtle).     % not really, but good enough
 431rdf_db:rdf_file_type(trig, trig).
 432
 433
 434                 /*******************************
 435                 *             MESSAGES         *
 436                 *******************************/
 437
 438:- multifile prolog:error_message//1.
 439
 440prolog:error_message(existence_error(turtle_prefix, '')) -->
 441    [ 'Turtle empty prefix (:) is not defined' ].