View source with raw comments or as raw
   1/*  Part of SWI-Prolog
   2
   3    Author:        Jan Wielemaker
   4    E-mail:        J.Wielemaker@vu.nl
   5    WWW:           http://www.swi-prolog.org
   6    Copyright (c)  2009-2017, University of Amsterdam
   7                              VU University Amsterdam
   8    All rights reserved.
   9
  10    Redistribution and use in source and binary forms, with or without
  11    modification, are permitted provided that the following conditions
  12    are met:
  13
  14    1. Redistributions of source code must retain the above copyright
  15       notice, this list of conditions and the following disclaimer.
  16
  17    2. Redistributions in binary form must reproduce the above copyright
  18       notice, this list of conditions and the following disclaimer in
  19       the documentation and/or other materials provided with the
  20       distribution.
  21
  22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33    POSSIBILITY OF SUCH DAMAGE.
  34*/
  35
  36:- module(rdf_turtle_write,
  37          [ rdf_save_turtle/2,                  % +File, +Options
  38            rdf_save_canonical_turtle/2,        % +File, +Options
  39            rdf_save_trig/2,                    % +File, +Options
  40            rdf_save_canonical_trig/2,          % +File, +Options
  41            rdf_save_ntriples/2                 % +File, +Options
  42          ]).
  43:- use_module(library(semweb/rdf_db)).
  44:- use_module(library(semweb/turtle), []). % we make calls to public preds here
  45:- use_module(library(option)).
  46:- use_module(library(record)).
  47:- use_module(library(error)).
  48:- use_module(library(lists)).
  49:- use_module(library(rbtrees)).
  50:- use_module(library(apply)).
  51:- use_module(library(url)).
  52:- use_module(library(pairs)).
  53:- use_module(library(debug)).
  54:- use_module(library(sgml_write)).
  55:- use_module(library(sgml)).
  56
  57:- predicate_options(rdf_save_turtle/2, 2,
  58                     [ graph(atom),
  59                       base(atom),
  60                       encoding(oneof([utf8])),
  61                       indent(nonneg),
  62                       tab_distance(nonneg),
  63                       silent(boolean),
  64                       subject_white_lines(nonneg),
  65                       align_prefixes(boolean),
  66                       user_prefixes(boolean),
  67                       prefixes(list),
  68                       only_known_prefixes(boolean),
  69                       comment(boolean),
  70                       group(boolean),
  71                       inline_bnodes(boolean),
  72                       single_line_bnodes(boolean),
  73                       abbreviate_literals(boolean),
  74                       canonize_numbers(boolean),
  75                       canonical(boolean),
  76                       a(boolean),
  77                       expand(any)
  78                     ]).
  79:- predicate_options(rdf_save_canonical_turtle/2, 2,
  80                     [ pass_to(rdf_save_turtle/2, 2)
  81                     ]).
  82
  83/** <module> Turtle - Terse RDF Triple Language writer
  84
  85This module implements the Turtle  language   for  representing  the RDF
  86triple model as defined by Dave Beckett  from the Institute for Learning
  87and Research Technology University of Bristol in the document:
  88
  89  * http://www.w3.org/TeamSubmission/turtle/
  90  * http://www.w3.org/TeamSubmission/2008/SUBM-turtle-20080114/#sec-conformance
  91
  92The Turtle format is designed as an   RDF  serialization that is easy to
  93read and write by both machines and  humans. Due to the latter property,
  94this library goes a long way in trying to produce human-readable output.
  95
  96In addition to the  human-readable  format,   this  library  can write a
  97_canonical_ representation of RDF graphs.   The canonical representation
  98has the following properties:
  99
 100  * Equivalent graphs result in the same document.  Graphs are
 101  considered equivalent iff they contain the same _set_ of
 102  triples, regardless of the labeling of blank nodes in the
 103  graph.
 104
 105  * Changes to the graph are diff-friendly.  This means
 106
 107    - Prefixes are combined in the header and thus changes
 108    to the namespaces only result in changes in the header.
 109    - Blank nodes that are used only once (including collections)
 110    are written in-line with the object they belong to.
 111    - For other blank nodes we to realise stable labeling that
 112    is based on property-values.
 113
 114@tbd    Low-level string output takes 28% of the time.  Move to C?
 115*/
 116
 117:- record
 118    tw_state(graph,                 % graph being saved
 119             graphs:list(atom),     % TriG graphs being saved
 120             base,                  % The base-URI
 121             encoding=utf8,         % Desired encoding
 122             indent:nonneg=8,       % Indent for ; and ,-lists
 123             tab_distance:nonneg=8, % Tab distance
 124             silent:boolean=false,  % If true, do not print a message
 125             subject_white_lines:nonneg=1,%Extra lines between subjects
 126             a:boolean=true,        % Use 'a' for rdf:type
 127             align_prefixes:boolean=true,%Align prefix declarations
 128             prefixes:list,         % Provide prefixes
 129             user_prefixes:boolean=true,% Use rdf_current_ns/2?
 130             only_known_prefixes:boolean=false,% Only use known prefixes
 131             comment:boolean=true,  % write some comments into the file
 132             group:boolean=true,    % Group using ; and ,
 133             inline_bnodes:boolean=true, % Inline single-used bnodes
 134             single_line_bnodes:boolean=false, % No newline after ;
 135             abbreviate_literals:boolean=true, % Abbreviate known types
 136             canonize_numbers:boolean=false, % How to write numbers
 137             canonical:boolean=false,
 138             expand:any=lookup,     % Access to the triples
 139                                    % Private fields
 140             bnode_id=0,            % Incrementing bnode-id
 141             nodeid_map,            % RBTree mapping NodeIDs to Refs
 142             bnode_hash,            % RBTree holding reuse-count of hashes
 143             subject_count=0,       % # subjects saved
 144             triple_count=0,        % # triples saved
 145             base_root,             % Root URL of base
 146             base_dir,              % Directory
 147             base_path,             % Path of base
 148             prefix_map).           % List of Prefix-Map
 149
 150
 151:- meta_predicate
 152    rdf_save_turtle(+, :),
 153    rdf_save_canonical_turtle(+, :),
 154    rdf_save_trig(+, :).
 155
 156%!  rdf_save_turtle(+Out, :Options) is det.
 157%
 158%   Save an RDF graph as Turtle.  Options processed are:
 159%
 160%       * a(+Boolean)
 161%       If =true= (default), use =a= for the predicate =rdf:type=.
 162%       Otherwise use the full resource.
 163%       * align_prefixes(+Boolean)
 164%       Nicely align the @prefix declarations
 165%       * base(+Base)
 166%       Save relative to the given Base
 167%       * canonize_numbers(+Boolean)
 168%       If =true= (default =false=), emit numeric datatypes using
 169%       Prolog's write to achieve canonical output.
 170%       * comment(+Boolean)
 171%       It =true= (default), write some informative comments
 172%       between the output segments
 173%       * encoding(+Encoding)
 174%       Encoding used for the output stream.  Default is UTF-8.
 175%       * expand(:Goal)
 176%       Query an alternative graph-representation.  See below.
 177%       * indent(+Column)
 178%       Indentation for ; -lists.  `0' does not indent, but
 179%       writes on the same line.  Default is 8.
 180%       * graph(+Graph)
 181%       Save only the named graph
 182%       * group(+Boolean)
 183%       If =true= (default), using P-O and O-grouping.
 184%       * inline_bnodes(+Boolean)
 185%       if =true= (default), inline bnodes that are used once.
 186%       * abbreviate_literals(+Boolean)
 187%       if =true= (default), omit the type if allowed by turtle.
 188%       * only_known_prefixes(+Boolean)
 189%       Only use prefix notation for known prefixes.  Without, some
 190%       documents produce _huge_ amounts of prefixes.
 191%       * prefixes(+List)
 192%       If provided, uses exactly these prefixes.  List is a list
 193%       of prefix specifications, where each specification is either
 194%       a term _Prefix_-_URI_ or a prefix that is known to
 195%       rdf_current_prefix/2.
 196%       * silent(+Boolean)
 197%       If =true= (default =false=), do not print the final
 198%       informational message.
 199%       * single_line_bnodes(+Bool)
 200%       If =true= (default =false=), write [...] and (...) on a
 201%       single line.
 202%       * subject_white_lines(+Count)
 203%       Extra white lines to insert between statements about a
 204%       different subject.  Default is 1.
 205%       * tab_distance(+Tab)
 206%       Distance between tab-stops.  `0' forces the library to
 207%       use only spaces for layout.  Default is 8.
 208%       * user_prefixes(+Boolean)
 209%       If =true= (default), use prefixes from rdf_current_prefix/2.
 210%
 211%   The option =expand= allows  for   serializing  alternative graph
 212%   representations. It is called through   call/5,  where the first
 213%   argument is the expand-option, followed  by   S,P,O,G.  G is the
 214%   graph-option (which is by  default   a  variable).  This notably
 215%   allows for writing RDF graphs   represented  as rdf(S,P,O) using
 216%   the following code fragment:
 217%
 218%       ==
 219%       triple_in(RDF, S,P,O,_G) :-
 220%           member(rdf(S,P,O), RDF).
 221%
 222%           ...,
 223%           rdf_save_turtle(Out, [ expand(triple_in(RDF)) ]),
 224%       ==
 225%
 226%   @param  Out is one of stream(Stream), a stream handle, a file-URL
 227%           or an atom that denotes a filename.
 228
 229rdf_save_turtle(Spec, QOptions) :-
 230    meta_options(is_meta, QOptions, Options),
 231    thread_self(Me),
 232    thread_statistics(Me, cputime, T0),
 233    must_be(list, Options),
 234    make_tw_state(Options, State0, _Rest),
 235    init_base(State0, State1),
 236    init_prefix_map(State1, State),
 237    tw_state_encoding(State, Enc),
 238    setup_call_cleanup(
 239        open_output(Spec, Enc, Stream, Cleanup),
 240        ( tw_prefix_map(State, Stream),
 241          tw_graph(State, Stream)
 242        ),
 243        Cleanup),
 244    thread_statistics(Me, cputime, T1),
 245    Time is T1-T0,
 246    tw_state_triple_count(State, SavedTriples),
 247    tw_state_subject_count(State, SavedSubjects),
 248    (   tw_state_silent(State, true)
 249    ->  true
 250    ;   print_message(informational,
 251                      rdf(saved(Spec, Time, SavedSubjects, SavedTriples)))
 252    ).
 253
 254is_meta(expand).
 255
 256%!  rdf_save_canonical_turtle(+Spec, :Options) is det.
 257%
 258%   Save triples in  a  canonical  format.   This  is  the  same  as
 259%   rdf_save_turtle/2, but using different defaults. In particular:
 260%
 261%       * encoding(utf8),
 262%       * indent(0),
 263%       * tab_distance(0),
 264%       * subject_white_lines(1),
 265%       * align_prefixes(false),
 266%       * user_prefixes(false)
 267%       * comment(false),
 268%       * group(false),
 269%       * single_line_bnodes(true)
 270%
 271%   @tbd Work in progress. Notably blank-node handling is
 272%   incomplete.
 273
 274rdf_save_canonical_turtle(Spec, M:Options) :-
 275    canonical_options(CannonicalOptions, Options),
 276    rdf_save_turtle(Spec, M:CannonicalOptions).
 277
 278canonical_options([ encoding(utf8),
 279                    indent(0),
 280                    tab_distance(0),
 281                    subject_white_lines(1),
 282                    align_prefixes(false),
 283                    user_prefixes(false),
 284                    comment(false),
 285                    group(false),
 286                    single_line_bnodes(true),
 287                    canonical(true)
 288                  | Options
 289                  ],
 290                  Options).
 291
 292
 293%!  rdf_save_ntriples(+Spec, :Options) is det.
 294%
 295%   Save RDF using ntriples format. The  ntriples format is a subset
 296%   of Turtle, writing each triple fully qualified on its own line.
 297
 298rdf_save_ntriples(File, Options):-
 299    rdf_save_turtle(File,
 300                    [ comment(false),
 301                      encoding(utf8),
 302                      group(false),
 303                      prefixes([]),
 304                      subject_white_lines(0),
 305                      a(false),
 306                      inline_bnodes(false),
 307                      abbreviate_literals(false)
 308                    | Options
 309                    ]).
 310
 311
 312%!  rdf_save_trig(+Spec, :Options) is det.
 313%
 314%   Save multiple RDF graphs into a TriG  file. Options are the same
 315%   as   for   rdf_save_turtle/2.   rdf_save_trig/2    ignores   the
 316%   graph(+Graph)  option  and  instead   processes  one  additional
 317%   option:
 318%
 319%     - graphs(+ListOfGraphs)
 320%     List of graphs to save. When omitted, all graphs in the RDF
 321%     store are stored in the TriG file.
 322
 323rdf_save_trig(Spec, QOptions) :-
 324    meta_options(is_meta, QOptions, Options),
 325    thread_self(Me),
 326    thread_statistics(Me, cputime, T0),
 327    must_be(list, Options),
 328    make_tw_state(Options, State0, _Rest),
 329    init_base(State0, State1),
 330    trig_graphs(State1, Graphs),
 331    init_prefix_map(State1, Graphs, State2),
 332    tw_state_encoding(State2, Enc),
 333    setup_call_cleanup(
 334        open_output(Spec, Enc, Stream, Cleanup),
 335        ( tw_prefix_map(State2, Stream),
 336          tw_trig_graphs(Graphs, Stream, State2, State)
 337        ),
 338        Cleanup),
 339    thread_statistics(Me, cputime, T1),
 340    Time is T1-T0,
 341    tw_state_triple_count(State, SavedTriples),
 342    tw_state_subject_count(State, SavedSubjects),
 343    length(Graphs, SavedGraphs),
 344    (   tw_state_silent(State, true)
 345    ->  true
 346    ;   print_message(informational,
 347                      rdf(saved(Spec, Time, SavedSubjects, SavedTriples, SavedGraphs)))
 348    ).
 349
 350%!  rdf_save_canonical_trig(+Spec, :Options) is det.
 351%
 352%   Save     triples     in     a      canonical     format.     See
 353%   rdf_save_canonical_turtle/2 foir details.
 354
 355
 356rdf_save_canonical_trig(Spec, M:Options) :-
 357    canonical_options(CannonicalOptions, Options),
 358    rdf_save_trig(Spec, M:CannonicalOptions).
 359
 360tw_trig_graphs([], _, State, State).
 361tw_trig_graphs([H|T], Stream, State0, State) :-
 362    set_graph_of_tw_state(H, State0, State1),
 363    nl(Stream),
 364    tw_resource(H, State1, Stream),
 365    format(Stream, ' = {~n', []),
 366    tw_graph(State1, Stream),
 367    format(Stream, '~N}~n', []),
 368    set_bnode_id_of_tw_state(0, State1, State2),
 369    set_nodeid_map_of_tw_state(_, State2, State3),
 370    set_bnode_hash_of_tw_state(_, State3, State4),
 371    tw_trig_graphs(T, Stream, State4, State).
 372
 373
 374%!  trig_graphs(+State, -Graphs) is det.
 375%
 376%   True when Graphs is the (sorted) list of graphs we must save. If
 377%   the _expand_ argument is used and   no  graphs are specified, it
 378%   enumerates all triples and extracts the graphs.
 379
 380trig_graphs(State, Graphs) :-
 381    tw_state_graphs(State, Graphs),
 382    (   nonvar(Graphs)
 383    ->  true
 384    ;   tw_state_expand(State, Expand),
 385        (   Expand == lookup
 386        ->  findall(G, rdf_graph(G), Graphs0)
 387        ;   findall(G, call(Expand,_S,_P,_O,G), Graphs0)
 388        ),
 389        sort(Graphs0, Graphs)
 390    ).
 391
 392
 393%!  open_output(+Spec, +Encoding, -Stream, -Cleanup) is det.
 394%
 395%   Open output Spec, returning a stream using Encoding.
 396%
 397%   @param  Cleanup is a goal that must be used to revert the side
 398%           effects of open_output/4.
 399
 400open_output(stream(Out), Encoding, Out, Cleanup) :-
 401    !,
 402    stream_property(Out, encoding(Old)),
 403    (   (   Old == Encoding
 404        ;   Old == wchar_t          % Internal encoding
 405        )
 406    ->  Cleanup = true
 407    ;   set_stream(Out, encoding(Encoding)),
 408        Cleanup = set_stream(Out, encoding(Old))
 409    ).
 410open_output(Stream, Encoding, Out, Cleanup) :-
 411    \+ atom(Stream),
 412    is_stream(Stream),
 413    !,
 414    open_output(stream(Stream), Encoding, Out, Cleanup).
 415open_output(Spec, Encoding, Out,
 416            close(Out)) :-
 417    out_to_file(Spec, File),
 418    open(File, write, Out, [encoding(Encoding)]).
 419
 420out_to_file(URL, File) :-
 421    atom(URL),
 422    file_name_to_url(File, URL),
 423    !.
 424out_to_file(File, File).
 425
 426
 427                 /*******************************
 428                 *            PREFIXES          *
 429                 *******************************/
 430
 431%!  init_prefix_map(+State, -State) is det.
 432%
 433%   Set  the  prefix_map  of  State.  The  prefix  map  is  list  of
 434%   Prefix-URI of prefixes to use for   emitting the graph requested
 435%   in State. If multiple prefixes are present   where  the one is a
 436%   prefix of the other, the longer one appears first in the list.
 437
 438init_prefix_map(State0, State) :-
 439    tw_state_prefixes(State0, Prefixes),
 440    nonvar(Prefixes),
 441    !,
 442    user_prefix_map(Prefixes, PrefixMap),
 443    set_prefix_map_of_tw_state(PrefixMap, State0, State).
 444init_prefix_map(State0, State) :-
 445    tw_state_graph(State0, Graph),
 446    graph_prefix_map(State0, Graph, PrefixMap),
 447    set_prefix_map_of_tw_state(PrefixMap, State0, State).
 448
 449init_prefix_map(State0, _Graphs, State) :-      % TriG version
 450    tw_state_prefixes(State0, Prefixes),
 451    nonvar(Prefixes),
 452    !,
 453    user_prefix_map(Prefixes, PrefixMap),
 454    set_prefix_map_of_tw_state(PrefixMap, State0, State).
 455init_prefix_map(State0, Graphs, State) :-       % TriG version
 456    maplist(graph_prefixes(State0), Graphs, NestedPrefixes),
 457    append(NestedPrefixes, Prefixes0),
 458    sort(Prefixes0, Prefixes),
 459    prefix_map(State0, Prefixes, PrefixMap),
 460    set_prefix_map_of_tw_state(PrefixMap, State0, State).
 461
 462graph_prefix_map(State, Graph, PrefixMap) :-
 463    graph_prefixes(State, Graph, Prefixes),
 464    prefix_map(State, Prefixes, PrefixMap).
 465
 466graph_prefixes(State0, Graph, Prefixes) :-
 467    tw_state_expand(State0, Expand),
 468    tw_state_only_known_prefixes(State0, OnlyKnown),
 469    rdf_graph_prefixes(Graph, Prefixes,
 470                       [ filter(turtle_prefix(OnlyKnown)),
 471                         expand(Expand),
 472                         min_count(2),
 473                         get_prefix(turtle:iri_turtle_prefix)
 474                       ]).
 475
 476prefix_map(State, Prefixes, PrefixMap) :-
 477    remove_base(State, Prefixes, Prefixes2),
 478    prefix_names(Prefixes2, State, Pairs),
 479    transpose_pairs(Pairs, URI_Abrevs),
 480    reverse(URI_Abrevs, RURI_Abrevs),
 481    flip_pairs(RURI_Abrevs, PrefixMap).
 482
 483%!  user_prefix_map(+Prefixes, -PrefixMap) is det.
 484%
 485%   Convert a list of prefix specifications   to  a list Prefix-URI,
 486%   longest URI first.
 487
 488user_prefix_map(Prefixes, PrefixMap) :-
 489    must_be(list, Prefixes),
 490    maplist(prefix_pair, Prefixes, Pairs),
 491    map_list_to_pairs(prefix_length, Pairs, LenPairs),
 492    sort(LenPairs, LenPairs1),
 493    pairs_values(LenPairs1, RevPrefixMap),
 494    reverse(RevPrefixMap, PrefixMap).
 495
 496prefix_pair(Prefix-URI, Prefix-URI) :-
 497    !,
 498    must_be(atom, Prefix),
 499    must_be(atom, URI).
 500prefix_pair(Prefix, Prefix-URI) :-
 501    must_be(atom, Prefix),
 502    (   rdf_current_prefix(Prefix, URI)
 503    ->  true
 504    ;   existence_error(prefix, Prefix)
 505    ).
 506
 507prefix_length(_-URI, Len) :- atom_length(URI, Len).
 508
 509%!  turtle_prefix(+OnlyKnown, +Where, +Prefix, +URI) is semidet.
 510%
 511%   Test whether we want  to  include   the  proposed  prefix in the
 512%   @prefix declaration.
 513
 514:- public turtle_prefix/4.              % called through rdf_graph_prefixes/3.
 515
 516turtle_prefix(true, _, Prefix, _) :-
 517    !,
 518    rdf_current_prefix(_, Prefix),
 519    !.
 520turtle_prefix(_, _, Prefix, URI) :-
 521    sub_atom(Prefix, _, 1, 0, Last),
 522    turtle_prefix_char(Last),
 523    atom_concat(Prefix, Local, URI),
 524    \+ sub_atom(Local, _, _, _, '.').
 525
 526turtle_prefix_char('#').
 527turtle_prefix_char('/').
 528
 529
 530remove_base(State, Prefixes, PrefixesNoBase) :-
 531    tw_state_base_dir(State, BaseDir),
 532    atom(BaseDir),
 533    !,
 534    delete(Prefixes, BaseDir, PrefixesNoBase).
 535remove_base(_State, Prefixes, Prefixes).
 536
 537flip_pairs([], []).
 538flip_pairs([Key-Val|Pairs], [Val-Key|Flipped]) :-
 539    flip_pairs(Pairs, Flipped).
 540
 541prefix_names(URIs, State, Prefixes) :-
 542    prefix_names(URIs, State, 1, Prefixes, []).
 543
 544prefix_names([], _, _, List, List) :- !.
 545prefix_names(URIs, State, Len, Prefixes, Tail) :-
 546    prefix_names(URIs, State, Len, Prefixes, PTail, Rest),
 547    Len1 is Len + 1,
 548    prefix_names(Rest, State, Len1, PTail, Tail).
 549
 550prefix_names(URIs, State, Len, Prefixes, PTail, Rest) :-
 551    map_list_to_pairs(propose_abbrev(State, Len), URIs, Pairs),
 552    !,
 553    keysort(Pairs, Sorted),
 554    unique(Sorted, Prefixes, PTail, Rest).
 555prefix_names(URIs, _, _, Prefixes, PTail, []) :-
 556    number_prefixes(URIs, 1, Prefixes, PTail).
 557
 558number_prefixes([], _, PL, PL).
 559number_prefixes([H|T0], N, [P-H|PL], T) :-
 560    atomic_concat(ns, N, P),
 561    succ(N, N1),
 562    number_prefixes(T0, N1, PL, T).
 563
 564unique([], L, L, []).
 565unique([A-U|T0], [A-U|T], L, Rest) :-
 566    T0 \= [A-_|_],
 567    !,
 568    unique(T0, T, L, Rest).
 569unique([A-U|T0], Prefixes, L, [U|Rest0]) :-
 570    strip_keys(T0, A, T1, Rest0, Rest),
 571    unique(T1, Prefixes, L, Rest).
 572
 573strip_keys([A-U|T0], A, T, [U|R0], R) :-
 574    !,
 575    strip_keys(T0, A, T, R0, R).
 576strip_keys(L, _, L, R, R).
 577
 578
 579%!  propose_abbrev(+State, +Len, +URI, -Abbrev) is multi.
 580%
 581%   Propose an abbreviation for URI.  Backtracking yields longer
 582%   ones.
 583
 584propose_abbrev(_, _, URI, Abbrev) :-
 585    well_known_ns(Abbrev, URI),
 586    !.
 587propose_abbrev(State, _, URI, Abbrev) :-
 588    tw_state_user_prefixes(State, true),
 589    rdf_current_prefix(Abbrev, URI),
 590    !.
 591propose_abbrev(_, Len, URI, Abbrev) :-
 592    namespace_parts(URI, Parts),
 593    include(abbrev_part, Parts, Names),
 594    reverse(Names, RevNames),
 595    length(Use, Len),
 596    append(Use, _, RevNames),
 597    atomic_list_concat(Use, -, Abbrev).
 598
 599abbrev_part(X) :-
 600    xml_name(X),
 601    \+ well_known_ns(X, _),
 602    \+ well_known_extension(X).
 603
 604well_known_ns(rdf,  'http://www.w3.org/1999/02/22-rdf-syntax-ns#').
 605well_known_ns(rdfs, 'http://www.w3.org/2000/01/rdf-schema#').
 606well_known_ns(owl,  'http://www.w3.org/2002/07/owl#').
 607well_known_ns(xsd,  'http://www.w3.org/2001/XMLSchema#').
 608well_known_ns(dc,   'http://purl.org/dc/elements/1.1/').
 609
 610well_known_extension(ttl).
 611well_known_extension(nt).
 612well_known_extension(n3).
 613well_known_extension(xml).
 614well_known_extension(rdf).
 615well_known_extension(owl).
 616
 617%!  namespace_parts(+URL, -Parts)
 618
 619namespace_parts(URL, Parts) :-
 620    atom_codes(URL, Codes),
 621    phrase(parts(Parts), Codes),
 622    !.
 623namespace_parts(URL, _) :-
 624    format(user_error, 'Couldn\'t split ~q~n', [URL]),
 625    fail.
 626
 627parts(List) --> sep2, parts2(List).
 628
 629parts2([H|T]) -->
 630    string(Codes),  {Codes \== []},
 631    sep,
 632    !,
 633    {atom_codes(H, Codes)},
 634    parts2(T).
 635parts2([]) --> [].
 636
 637string([]) --> [].
 638string([H|T]) --> [H], string(T).
 639
 640sep --> sep_char, sep2.
 641sep([], []).
 642
 643sep2 --> sep_char, !, sep2.
 644sep2 --> [].
 645
 646sep_char --> "/".
 647sep_char --> ":".
 648sep_char --> ".".
 649sep_char --> "?".
 650sep_char --> "#".
 651
 652
 653%!  init_base(+State0, -State) is det.
 654%
 655%   Initialise dealing with the base URI.  It sets two attributes of
 656%   the state: base_root and base_path.
 657
 658init_base(State0, State) :-
 659    tw_state_base(State0, BaseURI),
 660    atom(BaseURI),
 661    !,
 662    parse_url(BaseURI, Attributes),
 663    include(root_part, Attributes, RootAttrs),
 664    parse_url(BaseRoot, RootAttrs),
 665    memberchk(path(BasePath), Attributes),
 666    file_directory_name(BasePath, BaseDir),
 667    atomic_list_concat([BaseRoot, BaseDir, /], BaseDirURI),
 668    set_base_root_of_tw_state(BaseRoot, State0, State1),
 669    set_base_path_of_tw_state(BasePath, State1, State2),
 670    set_base_dir_of_tw_state(BaseDirURI, State2, State).
 671init_base(State, State).
 672
 673root_part(protocol(_)).
 674root_part(host(_)).
 675root_part(port(_)).
 676
 677
 678                 /*******************************
 679                 *              SAVE            *
 680                 *******************************/
 681
 682%!  tw_graph(+State, +Out) is det.
 683%
 684%   Write an RDF graph as Turtle data.
 685%
 686%   @tbd Write unconnected and multi-connected blank-nodes.
 687
 688tw_graph(State, Out) :-
 689    subjects(State, Subjects),
 690    length(Subjects, SubjectCount),
 691    inc_subject_count(State, SubjectCount),
 692    partition(rdf_is_bnode, Subjects, BNodes, ProperSubjects),
 693    maplist(pair_var, BNodes, Pairs),
 694    ord_list_to_rbtree(Pairs, BNTree),
 695    tw_state_nodeid_map(State, BNTree),
 696    (   ProperSubjects == []
 697    ->  true
 698    ;   length(ProperSubjects, PSCount),
 699        comment(State, 'Named toplevel resources (~D)', [PSCount], Out),
 700        tw_proper_subjects(ProperSubjects, State, Out)
 701    ),
 702    tw_bnodes(Pairs, State, Out).
 703
 704pair_var(BNode, BNode-_).
 705
 706tw_prefix_map(State, Out) :-
 707    tw_state_prefix_map(State, PrefixMap),
 708    tw_prefix_map(PrefixMap, State, Out).
 709
 710%!  tw_prefix_map(+PrefixMap, +State, +Out) is det.
 711%
 712%   Write the @base and @prefix declarations
 713
 714tw_prefix_map(PrefixMap, State, Out) :-
 715    tw_state_align_prefixes(State, true),
 716    !,
 717    longest_prefix(PrefixMap, 0, Length),
 718    PrefixCol is Length+10,
 719    tw_base(PrefixCol, State, Out),
 720    tw_prefix_map(PrefixMap, PrefixCol, State, Out).
 721tw_prefix_map(PrefixMap, State, Out) :-
 722    tw_base(0, State, Out),
 723    tw_prefix_map(PrefixMap, 0, State, Out).
 724
 725longest_prefix([], L, L).
 726longest_prefix([Prefix-_|T], L0, L) :-
 727    atom_length(Prefix, L1),
 728    L2 is max(L0, L1),
 729    longest_prefix(T, L2, L).
 730
 731
 732tw_base(Col, State, Out) :-
 733    tw_state_base(State, Base),
 734    atom(Base),
 735    !,
 736    format(Out, '@base ~t~*|', [Col]),
 737    turtle:turtle_write_uri(Out, Base),
 738    format(Out, ' .~n', []).
 739tw_base(_, _, _).
 740
 741
 742tw_prefix_map([], _, _, _).
 743tw_prefix_map([Prefix-URI|T], Col, State, Out) :-
 744    format(Out, '@prefix ~t~w: ~*|', [Prefix, Col]),
 745    tw_relative_uri(URI, State, Out),
 746    format(Out, ' .~n', []),
 747    (   T == []
 748    ->  true
 749    ;   tw_prefix_map(T, Col, State, Out)
 750    ).
 751
 752
 753%!  tw_proper_subjects(+Subjects, +State, +Out) is det.
 754%
 755%   Write the subjects that are not Bnodes.
 756
 757tw_proper_subjects([], _, _).
 758tw_proper_subjects([H|T], State, Out) :-
 759    separate_subjects(State, Out),
 760    tw_subject(H, H, State, Out),
 761    tw_proper_subjects(T, State, Out).
 762
 763
 764separate_subjects(State, Out) :-
 765    tw_state_subject_white_lines(State, ExtraLines),
 766    put_n(ExtraLines, '\n', Out).
 767
 768%!  tw_subject(+URI, +State, +Out) is det.
 769%
 770%   Write a toplevel non-bnode subject.
 771
 772tw_subject(URI, Ref, State, Out) :-
 773    subject_triples(URI, State, Pairs),
 774    length(Pairs, Count),
 775    inc_triple_count(State, Count),
 776    group_po(Pairs, Grouped),
 777    tw_subject_triples(Grouped, Ref, State, Out).
 778
 779group_po(Pairs, Grouped) :-
 780    group_pairs_by_key(Pairs, Grouped0),
 781    rdf_equal(rdf:type, RDFType),
 782    (   select(RDFType-Types, Grouped0, Grouped1)
 783    ->  Grouped = [RDFType-Types|Grouped1]
 784    ;   Grouped = Grouped0
 785    ).
 786
 787%!  tw_bnodes(+Pairs, +State, +Out) is det.
 788%
 789%   Write the Bnodes. Pairs is a list   URI-Ref, where Ref is one of
 790%   =written= if the Bnode is already written;   an integer if it is
 791%   used multiple times or a variable if   it  has not been written.
 792%   The order in which we deal with bnodes is defined as follows:
 793%
 794%       * First, write the bnodes that are not referenced at all
 795%       as toplevel bnodes using [ ... ] notation.
 796%
 797%       * Next, write the bnodes that need written as toplevel
 798%       nodes using the _:XX notation because they are referenced
 799%       multiple times in the graph. Continue this process until it
 800%       is exhausted.
 801
 802tw_bnodes(Pairs, State, Out) :-
 803    tw_top_bnodes(Pairs, State, Out, Rest1),
 804    tw_numbered_bnodes(Rest1, State, Out, 1, Rest2),
 805    tw_cyclic_bnodes(Rest2, State, Out, 0).
 806
 807
 808tw_numbered_bnodes([], _, _, _, []) :- !.
 809tw_numbered_bnodes(Pairs, State, Out, Level, Rest) :-
 810    multi_referenced(Pairs, RefPairs, Rest0),
 811    (   RefPairs == []
 812    ->  Rest = Rest0
 813    ;   length(RefPairs, Count),
 814        comment(State, 'Level ~D multi-referenced blank-nodes (~D)',
 815                [ Level, Count ], Out),
 816        tw_ref_bnodes(RefPairs, State, Out),
 817        Level1 is Level + 1,
 818        tw_numbered_bnodes(Rest0, State, Out, Level1, Rest)
 819    ).
 820
 821multi_referenced([], [], []).
 822multi_referenced([H|T], RefPairs, Rest) :-
 823    H = _-Ref,
 824    (   Ref == written
 825    ->  multi_referenced(T, RefPairs, Rest)
 826    ;   var(Ref)
 827    ->  Rest = [H|TR],
 828        multi_referenced(T, RefPairs, TR)
 829    ;   assertion(Ref = bnode(_)),
 830        RefPairs = [H|TRP],         % assigned reference
 831        multi_referenced(T, TRP, Rest)
 832    ).
 833
 834tw_ref_bnodes([], _, _).
 835tw_ref_bnodes([BNode-Ref|T], State, Out) :-
 836    separate_subjects(State, Out),
 837    tw_subject(BNode, Ref, State, Out),
 838    tw_ref_bnodes(T, State, Out).
 839
 840
 841%!  tw_top_bnodes(+Pairs, +State, +Out, -Rest)
 842%
 843%   Write the top bnodes: those that  do   not  appear  as an object
 844%   anywhere.
 845
 846tw_top_bnodes(Pairs, State, Out, Rest) :-
 847    unreferenced(Pairs, State, TopBNodes, Rest),
 848    (   TopBNodes == []
 849    ->  true
 850    ;   length(TopBNodes, Count),
 851        comment(State, 'Toplevel blank-nodes (~D)', [Count], Out),
 852        sort_bnodes(TopBNodes, SortedTopBNodes, State),
 853        tw_top_bnodes(SortedTopBNodes, State, Out)
 854    ).
 855
 856unreferenced([], _, [], []).
 857unreferenced([H|T], State, UnrefPairs, Rest) :-
 858    H = BNode-Ref,
 859    (   Ref == written
 860    ->  unreferenced(T, State, UnrefPairs, Rest)
 861    ;   var(Ref),
 862        object_link_count(BNode, State, 0)
 863    ->  UnrefPairs = [H|URT],
 864        unreferenced(T, State, URT, Rest)
 865    ;   Rest = [H|TR],
 866        unreferenced(T, State, UnrefPairs, TR)
 867    ).
 868
 869tw_top_bnodes([], _, _).
 870tw_top_bnodes([BNode-_|T], State, Out) :-
 871    tw_bnode(BNode, State, Out),
 872    tw_top_bnodes(T, State, Out).
 873
 874
 875tw_bnode(BNode, State, Out) :-
 876    subject_triples(BNode, State, Pairs),
 877    length(Pairs, Count),
 878    inc_triple_count(State, Count),
 879    (   tw_state_inline_bnodes(State, true)
 880    ->  tw_bnode_triples(Pairs, State, Out),
 881        format(Out, ' .~n', [])
 882    ;   next_bnode_id(State, BNode, Ref),
 883        tw_bnode_ntriples(Pairs, Ref, State, Out)
 884    ).
 885
 886tw_bnode_triples(Pairs, State, Out) :-
 887    group_po(Pairs, Grouped),
 888    (   tw_state_single_line_bnodes(State, true)
 889    ->  format(Out, '[ ', []),
 890        tw_triples(Grouped, -1, State, Out),
 891        format(Out, ' ]', [])
 892    ;   line_position(Out, Indent),
 893        format(Out, '[ ', []),
 894        line_position(Out, AIndent),
 895        tw_triples(Grouped, AIndent, State, Out),
 896        nl_indent(Out, State, Indent),
 897        format(Out, ']', [])
 898    ).
 899
 900tw_bnode_ntriples([], _, _, _).
 901tw_bnode_ntriples([P-O|T], Ref, State, Out) :-
 902    tw_bnode_ref(Ref, Out),
 903    format(Out, ' ', []),
 904    tw_predicate(P, State, Out),
 905    format(Out, ' ', []),
 906    tw_object(O, State, Out),
 907    format(Out, ' .~n', []),
 908    tw_bnode_ntriples(T, Ref, State, Out).
 909
 910
 911%!  tw_cyclic_bnodes(+Pairs, +BNode, +State, +Out, +Cycle)
 912%
 913%   The rest. These are groups of bnodes  that are reachable, but we
 914%   cannot find a starting point, neither from a named resource, nor
 915%   from an unlinked bnode. As long as we are not considering stable
 916%   canonical output, we can break the cycle at any point.
 917
 918tw_cyclic_bnodes([], _State, _Out, _) :- !.
 919tw_cyclic_bnodes(Pairs, State, Out, Cycle0) :-
 920    (   tw_state_canonical(State, true)
 921    ->  sort_bnode_pairs(Pairs, BNodes, State)
 922    ;   BNodes = Pairs
 923    ),
 924    succ(Cycle0, Cycle),
 925    BNodes = [BNode-Ref|_],
 926    next_bnode_id(State, BNode, Ref),
 927    comment(State, 'Breaking cycle ~D', [Cycle], Out),
 928    tw_numbered_bnodes(Pairs, State, Out, 1, Rest),
 929    tw_cyclic_bnodes(Rest, State, Out, Cycle).
 930
 931
 932%!  tw_subject_triples(+Grouped, +Subject, +State, +Out)
 933%
 934%   Save triples on Subject.  Combine groups of triples with the
 935%   same subject (;) and same subject+predicate (,).
 936%
 937%   @param  Subject is either a URI or an integer.  The latter is
 938%           used for writing a named bnode.
 939
 940tw_subject_triples([], _, _, _) :- !.
 941tw_subject_triples(Grouped, URI, State, Out) :-
 942    tw_state_group(State, false),
 943    !,
 944    tw_ungrouped_triples(Grouped, URI, State, Out).
 945tw_subject_triples(Grouped, URI, State, Out) :-
 946    tw_resource(URI, State, Out),
 947    (   tw_state_indent(State, Indent),
 948        Indent > 0
 949    ->  nl_indent(Out, State, Indent)
 950    ;   put_char(Out, ' '),
 951        line_position(Out, Indent)
 952    ),
 953    tw_triples(Grouped, Indent, State, Out),
 954    format(Out, ' .~n', []).
 955
 956%!  tw_ungrouped_triples(+Grouped, +URI, +State, +Out)
 957%
 958%   Write triples for subject URI as one line per triple.  Used
 959%   for canonical output.
 960
 961tw_ungrouped_triples([], _, _, _).
 962tw_ungrouped_triples([P-Vs|Groups], URI, State, Out) :-
 963    partition(rdf_is_bnode, Vs, BNVs, ProperVs),
 964    tw_ungrouped_values(ProperVs, P, URI, State, Out),
 965    sort_bnodes(BNVs, SortedBNVs, State),
 966    tw_ungrouped_values(SortedBNVs, P, URI, State, Out),
 967    tw_ungrouped_triples(Groups, URI, State, Out).
 968
 969tw_ungrouped_values([], _, _, _, _).
 970tw_ungrouped_values([V|T], P, URI, State, Out) :-
 971    tw_resource(URI, State, Out),
 972    put_char(Out, ' '),
 973    tw_predicate(P, State, Out),
 974    put_char(Out, ' '),
 975    tw_object(V, State, Out),
 976    format(Out, ' .~n', []),
 977    tw_ungrouped_values(T, P, URI, State, Out).
 978
 979
 980%!  tw_triples(+Groups, +Indent, +State, +Out) is det.
 981%
 982%   Triple writer that uses ; and ,- grouping
 983
 984tw_triples([P-Vs|MoreGroups], Indent, State, Out) :-
 985    tw_write_pvs(Vs, P, State, Out),
 986    (   MoreGroups == []
 987    ->  true
 988    ;   format(Out, ' ;', []),
 989        nl_indent(Out, State, Indent),
 990        tw_triples(MoreGroups, Indent, State, Out)
 991    ).
 992
 993tw_write_pvs(Values, P, State, Out) :-
 994    tw_predicate(P, State, Out),
 995    put_char(Out, ' '),
 996    line_position(Out, Indent),
 997    tw_write_vs(Values, Indent, State, Out).
 998
 999tw_predicate(P, State, Out) :-
1000    (   rdf_equal(P, rdf:type),
1001        tw_state_a(State, true)
1002    ->  format(Out, 'a', [])
1003    ;   tw_resource(P, State, Out)
1004    ).
1005
1006tw_write_vs([H|T], Indent, State, Out) :-
1007    tw_object(H, State, Out),
1008    (   T == []
1009    ->  true
1010    ;   format(Out, ' ,', []),
1011        nl_indent(Out, State, Indent),
1012        tw_write_vs(T, Indent, State, Out)
1013    ).
1014
1015%!  tw_object(+Value, +State, +Out) is det.
1016%
1017%   Write the object of a triple.
1018
1019tw_object(Value, State, Out) :-
1020    rdf_is_bnode(Value),
1021    !,
1022    tw_bnode_object(Value, State, Out).
1023tw_object(Value, State, Out) :-
1024    atom(Value),
1025    !,
1026    tw_resource(Value, State, Out).
1027tw_object(Literal, State, Out) :-
1028    tw_literal(Literal, State, Out).
1029
1030%!  tw_bnode_object(+Value, +State, +Out) is det.
1031%
1032%   Write a Bnode value.  There are a number of cases:
1033%
1034%       * The BNode was already written.  Write the same ref.
1035%       * The BNode is not shared.  Inline and set =written=
1036%       * The BNode is shared.  Generate a NodeID and store it
1037%       * The BNode is once as object: Generate a NodeID
1038%       * The BNode is more than once object: Generate a NodeID
1039%         and put in table.
1040
1041tw_bnode_object(BNode, State, Out) :-
1042    tw_state_nodeid_map(State, BNTree),
1043    rb_lookup(BNode, Ref, BNTree),
1044    !,
1045    (   var(Ref)
1046    ->  (   tw_state_inline_bnodes(State, true),
1047            tw_unshared_bnode(BNode, State, Out)
1048        ->  Ref = written
1049        ;   next_bnode_id(State, BNode, Ref),
1050            tw_bnode_ref(Ref, Out)
1051        )
1052    ;   tw_bnode_ref(Ref, Out)
1053    ).
1054tw_bnode_object(BNode, State, Out) :-
1055    object_link_count(BNode, State, N),
1056    N > 1,
1057    !,
1058    tw_state_nodeid_map(State, BNTree0),
1059    rb_insert(BNTree0, BNode, Ref, BNTree),
1060    set_nodeid_map_of_tw_state(BNTree, State),
1061    next_bnode_id(State, BNode, Ref),
1062    tw_bnode_ref(Ref, Out).
1063tw_bnode_object(BNode, State, Out) :-
1064    next_bnode_id(State, BNode, Ref),
1065    tw_bnode_ref(Ref, Out).
1066
1067tw_bnode_ref(bnode(Ref), Out) :-
1068    (   integer(Ref)
1069    ->  format(Out, '_:bn~w', [Ref])
1070    ;   format(Out, '_:~w', [Ref])
1071    ).
1072
1073%!  tw_unshared_bnode(+BNode, +State, +Out) is semidet.
1074%
1075%   Write a bnode if this is the only place it is used.
1076
1077tw_unshared_bnode(BNode, State, Out) :-
1078    object_link_count(BNode, State, 1),
1079    subject_triples(BNode, State, Pairs),
1080    (   Pairs == []
1081    ->  format(Out, '[]', [])
1082    ;   pairs_unshared_collection(Pairs, State, Collection)
1083    ->  (   Collection == []
1084        ->  format(Out, '()', [])
1085        ;   tw_state_nodeid_map(State, BNTree),
1086            rb_lookup(BNode, written, BNTree),
1087            length(Collection, NMembers),
1088            Triples is 2*NMembers,
1089            inc_triple_count(State, Triples),
1090            (   tw_state_single_line_bnodes(State, true)
1091            ->  format(Out, '( ', []),
1092                tw_collection(Collection, -1, State, Out),
1093                format(Out, ' )', [])
1094            ;   line_position(Out, Indent),
1095                format(Out, '( ', []),
1096                line_position(Out, AIndent),
1097                tw_collection(Collection, AIndent, State, Out),
1098                nl_indent(Out, State, Indent),
1099                format(Out, ')', [])
1100            )
1101        )
1102    ;   tw_bnode_triples(Pairs, State, Out)
1103    ).
1104
1105tw_collection([H|T], Indent, State, Out) :-
1106    tw_object(H, State, Out),
1107    (   T \== []
1108    ->  nl_indent(Out, State, Indent),
1109        tw_collection(T, Indent, State, Out)
1110    ;   true
1111    ).
1112
1113%!  unshared_collection(+URI, +State, -Members) is semidet.
1114%
1115%   True if URI denodes an RDF list that  is made up from bnodes, is
1116%   linked exactly once  to  its  context   and  contains  no  extra
1117%   triples.
1118
1119unshared_collection(C, _, []) :-
1120    rdf_equal(C, rdf:nil),
1121    !.
1122unshared_collection(C, State, List) :-
1123    rdf_is_bnode(C),
1124    object_link_count(C, State, 1),
1125    tw_state_nodeid_map(State, BNTree),
1126    rb_lookup(C, written, BNTree),
1127    subject_triples(C, State, Pairs),
1128    pairs_unshared_collection(Pairs, State, List).
1129
1130pairs_unshared_collection(Pairs, State, [H|T]) :-
1131    rdf_equal(rdf:first, RDFFirst),
1132    rdf_equal(rdf:rest, RDFRest),
1133    Pairs = [ RDFFirst-H,
1134              RDFRest-Rest
1135            | More
1136            ],
1137    (   More == []
1138    ;   rdf_equal(rdf:type, RDFType),
1139        rdf_equal(rdf:'List', RDFList),
1140        More == [RDFType-RDFList]
1141    ),
1142    unshared_collection(Rest, State, T).
1143
1144
1145%!  object_link_count(+BNode, +STate, -Count) is det.
1146%
1147%   Number of times BNode is used as an object in the graph
1148
1149object_link_count(BNode, State, Count) :-
1150    tw_state_graph(State, Graph),
1151    tw_state_expand(State, Expand),
1152    findall(S-P, call(Expand,S,P,BNode,Graph), Pairs0),
1153    sort(Pairs0, Pairs),            % remove duplicates
1154    length(Pairs, Count).
1155
1156%!  nl_indent(+Out, +State, +Indent) is det.
1157%
1158%   Write a newline and indent to column Indent.
1159
1160nl_indent(Out, _, -1) :-
1161    !,
1162    put_char(Out, ' ').
1163nl_indent(Out, State, Indent) :-
1164    nl(Out),
1165    tw_state_tab_distance(State, TD),
1166    (   TD == 0
1167    ->  tab(Out, Indent)
1168    ;   Tabs is Indent//TD,
1169        Spaces is Indent mod TD,
1170        put_n(Tabs, '\t', Out),
1171        put_n(Spaces, ' ', Out)
1172    ).
1173
1174put_n(N, Char, Out) :-
1175    N > 0,
1176    !,
1177    put_char(Out, Char),
1178    N2 is N - 1,
1179    put_n(N2, Char, Out).
1180put_n(_, _, _).
1181
1182
1183%!  subject_triples(+URI, +State, -Pairs) is det.
1184%
1185%   Pairs is a sorted list of P-O  pairs representing all triples on
1186%   the subject URI.
1187
1188subject_triples(URI, State, Pairs) :-
1189    tw_state_graph(State, Graph),
1190    tw_state_expand(State, Expand),
1191    findall(P-O, call(Expand, URI, P, O, Graph), Pairs0),
1192    sort(Pairs0, Pairs).
1193
1194
1195                 /*******************************
1196                 *          GRAPH-LOGIC         *
1197                 *******************************/
1198
1199%!  subjects(+State, -Subjects:ord_set) is det.
1200%
1201%   Subjects is a list of all subjects in the graph requested in
1202%   State.
1203
1204subjects(State, Subjects) :-
1205    tw_state_expand(State, Expand),
1206    tw_state_graph(State, Graph),
1207    (   Expand == lookup,
1208        atom(Graph),
1209        (   rdf_graph_property(Graph, triples(Count))
1210        ->  true
1211        ;   Count = 0                       % non-existing graph
1212        ),
1213        rdf_statistics(triples(Total)),
1214        Count * 10 < Total
1215    ->  findall(S, rdf(S,_,_,Graph), List),
1216        sort(List, Subjects)
1217    ;   Expand \== lookup
1218    ->  findall(S, call(Expand, S,_,_,Graph), List),
1219        sort(List, Subjects)
1220    ;   findall(Subject, subject(State, Subject), AllSubjects),
1221        sort(AllSubjects, Subjects)
1222    ).
1223
1224
1225subject(State, Subject) :-
1226    tw_state_graph(State, Graph),
1227    (   atom(Graph)
1228    ->  rdf_resource(Subject),
1229        (   rdf(Subject, _, _, Graph)
1230        ->  true
1231        )
1232    ;   rdf_subject(Subject)
1233    ).
1234
1235
1236:- public lookup/4.                     % called from expand hook.
1237
1238lookup(S,P,O,G) :-
1239    (   var(G)
1240    ->  rdf(S,P,O)
1241    ;   rdf(S,P,O,G)
1242    ).
1243
1244
1245                 /*******************************
1246                 *        CANONICAL ORDERING    *
1247                 *******************************/
1248
1249/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1250This section deals with the two problems of canonical graphs:
1251
1252    * Keep blank nodes in the same order
1253    * Assign stable names to blank nodes that we need to
1254      give a name.  There are two cases: (1) a blank nodes is
1255      used in more than one place and (2) a blank node series
1256      is cyclic.
1257- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1258
1259%!  sort_bnodes(+BNodes, -Sorted, +State) is det.
1260%
1261%   Sort a list of blank nodes.
1262
1263sort_bnodes(BNodes, Sorted, _State) :-
1264    sort(BNodes, Sorted).
1265
1266%!  sort_bnode_pairs(+Pairs, -Sorted, +State) is det.
1267%
1268%   Sort a list of Pairs BNode-Ref
1269
1270sort_bnode_pairs(Pairs, Sorted, _State) :-
1271    sort(Pairs, Sorted).
1272
1273%!  bnode_to_term(+BNode, -Term, +State)
1274%
1275%   Term is a canonical representation of the graph formed by BNode.
1276%   The transformation of a bnode is
1277%
1278%           bnode(p-[o1,o2,..], ..)
1279%
1280%   The arguments are alphabetically sorted   on predicate (can't we
1281%   leave the preds out them?) and   the  objects are alphabetically
1282%   sorted.  Sorting multiple bnode values?
1283
1284
1285%!  next_bnode_id(+State, +BNode, -Ref) is det.
1286%
1287%   Generate a node-id for BNode.   When writing non-canonically, we
1288%   simply number the bnodes.  Otherwise  we   want  a  more  stable
1289%   numbering. Our numbering is a hash of  the content of the bnode.
1290%   It is not unlikely that we find muliple copies, and therefore we
1291%   number the full id is bn_<hash>_<n>, <n> counting 0...
1292
1293next_bnode_id(State, _BNode, bnode(Ref)) :-
1294    tw_state_canonical(State, false),
1295    !,
1296    tw_state_bnode_id(State, Ref0),
1297    Ref is Ref0+1,
1298    nb_set_bnode_id_of_tw_state(Ref, State).
1299next_bnode_id(State, BNode, bnode(Ref)) :-
1300    bnode_hash(BNode, Hash),
1301    tw_state_bnode_hash(State, BNHash),
1302    (   var(BNHash)
1303    ->  rb_empty(BNHash)
1304    ;   true
1305    ),
1306    (   rb_update(BNHash, Hash, C0, C, BNHash1)
1307    ->  C is C0+1
1308    ;   C = 0,
1309        rb_insert(BNHash, Hash, C, BNHash1)
1310    ),
1311    set_bnode_hash_of_tw_state(BNHash1, State),
1312    format(atom(Ref), 'bn_~w_~d', [Hash, C]).
1313
1314%!  bnode_hash(+BNode, -Hash) is det.
1315%
1316%   Hash is the hash-value for a bnode.
1317%
1318%   @tbd: Hash on content.
1319
1320bnode_hash(BNode, Hash) :-
1321    term_hash(BNode, Hash).
1322
1323
1324                 /*******************************
1325                 *           PRIMITIVES         *
1326                 *******************************/
1327
1328%!  tw_resource(+Resource, +State, +Out) is det.
1329%
1330%   Write a resource
1331
1332tw_resource(BNodeID, _, Out) :-
1333    BNodeID = bnode(_),
1334    !,
1335    tw_bnode_ref(BNodeID, Out).
1336tw_resource(Resource, State, Out) :-
1337    tw_state_prefix_map(State, PrefixMap),
1338    member(Prefix-Full, PrefixMap),
1339    atom_concat(Full, Name, Resource),
1340    (   turtle:turtle_pn_local(Name)
1341    ->  true
1342    ;   Name == ''
1343    ),
1344    !,
1345    format(Out, '~w:', [Prefix]),
1346    turtle:turtle_write_pn_local(Out, Name).
1347tw_resource(Resource, State, Out) :-
1348    tw_relative_uri(Resource, State, Out).
1349
1350
1351tw_relative_uri(Resource, State, Out) :-
1352    tw_state_base_root(State, Root),
1353    atom(Root),
1354    atom_concat(Root, ResPath, Resource),
1355    sub_atom(ResPath, 0, _, _, /),
1356    tw_state_base_path(State, BasePath),
1357    relative_path(ResPath, BasePath, RelPath),
1358    !,
1359    turtle:turtle_write_uri(Out, RelPath).
1360tw_relative_uri(Resource, _, Out) :-
1361    turtle:turtle_write_uri(Out, Resource).
1362
1363relative_path(Path, RelTo, RelPath) :-
1364    atomic_list_concat(PL, /, Path),
1365    atomic_list_concat(RL, /, RelTo),
1366    delete_common_prefix(PL, RL, PL1, PL2),
1367    to_dot_dot(PL2, DotDot, PL1),
1368    atomic_list_concat(DotDot, /, RelPath).
1369
1370delete_common_prefix([H|T01], [H|T02], T1, T2) :-
1371    !,
1372    delete_common_prefix(T01, T02, T1, T2).
1373delete_common_prefix(T1, T2, T1, T2).
1374
1375to_dot_dot([], Tail, Tail).
1376to_dot_dot([_], Tail, Tail) :- !.
1377to_dot_dot([_|T0], ['..'|T], Tail) :-
1378    to_dot_dot(T0, T, Tail).
1379
1380
1381%!  tw_literal(+Literal, +State, +Out) is det.
1382%
1383%   Write a literal value to the stream Out.
1384
1385tw_literal(literal(type(Type, Value)), State, Out) :-
1386    !,
1387    tw_typed_literal(Type, Value, State, Out).
1388tw_literal(literal(lang(Lang, Value)), State, Out) :-
1389    !,
1390    tw_quoted_string(Value, State, Out),
1391    downcase_atom(Lang, TurtleLang),        % Turtle lang = [a-z]+('-'[a-z0-9]+)*
1392    format(Out, '@~w', [TurtleLang]).
1393tw_literal(literal(Value), State, Out) :-
1394    atom(Value),
1395    !,
1396    rdf_equal(xsd:string, TypeString),
1397    tw_typed_literal(TypeString, Value, State, Out).
1398                                                % Add types automatically
1399tw_literal(literal(Value), State, Out) :-
1400    integer(Value),
1401    !,
1402    rdf_equal(Type, xsd:integer),
1403    tw_typed_literal(Type, Value, State, Out).
1404tw_literal(literal(Value), State, Out) :-
1405    float(Value),
1406    !,
1407    rdf_equal(Type, xsd:double),
1408    tw_typed_literal(Type, Value, State, Out).
1409tw_literal(literal(Value), State, Out) :-
1410    xml_is_dom(Value),
1411    !,
1412    rdf_equal(Type, rdf:'XMLLiteral'),
1413    tw_typed_literal(Type, Value, State, Out).
1414tw_literal(Literal, _State, _Out) :-
1415    type_error(rdf_literal, Literal).
1416
1417
1418tw_typed_literal(Type, Value, State, Out) :-
1419    tw_state_abbreviate_literals(State, true),
1420    tw_abbreviated_literal(Type, Value, State, Out),
1421    !.
1422tw_typed_literal(Type, Value, State, Out) :-
1423    (atom(Value) ; string(Value)),
1424    !,
1425    tw_quoted_string(Value, State, Out),
1426    write(Out, '^^'),
1427    tw_resource(Type, State, Out).
1428tw_typed_literal(Type, Value, State, Out) :-
1429    rdf_equal(Type, rdf:'XMLLiteral'),
1430    !,
1431    with_output_to(string(Tmp),
1432                   xml_write(Value, [header(false)])),
1433    tw_quoted_string(Tmp, State, Out),
1434    write(Out, '^^'),
1435    tw_resource(Type, State, Out).
1436tw_typed_literal(Type, Value, State, Out) :-
1437    format(string(Tmp), '~q', [Value]),
1438    tw_quoted_string(Tmp, State, Out),
1439    write(Out, '^^'),
1440    tw_resource(Type, State, Out).
1441
1442
1443%!  tw_abbreviated_literal(+Type, +Value, +State, +Out) is semidet.
1444%
1445%   Turtle abbreviated typed literals.
1446%
1447%   @tbd:   Deal with canonical forms (or is this a task of the
1448%           RDF parser?
1449%   @tbd:   What if the value is not in the lexical space of the type?
1450
1451term_expansion((tw_abbreviated_literal(NS:Local, Value, State, Out) :- Body),
1452               (tw_abbreviated_literal(Type, Value, State, Out) :- Body)) :-
1453    atom(NS),
1454    rdf_global_id(NS:Local, Type).
1455
1456tw_abbreviated_literal(xsd:integer, Value, State, Out) :-
1457    (   tw_state_canonize_numbers(State, false)
1458    ->  write(Out, Value)
1459    ;   atom_number(Value, Int),
1460        format(Out, '~d', [Int])
1461    ).
1462tw_abbreviated_literal(xsd:double, Value, State, Out) :-
1463    (   tw_state_canonize_numbers(State, false)
1464    ->  write(Out, Value)
1465    ;   ValueF is float(Value),
1466        xsd_number_string(ValueF, FloatS),
1467        format(Out, '~s', [FloatS])
1468    ).
1469tw_abbreviated_literal(xsd:string, Value, State, Out) :-
1470    tw_quoted_string(Value, State, Out).
1471tw_abbreviated_literal(xsd:decimal, Value, _, Out) :-
1472    format(Out, '~w', [Value]).
1473tw_abbreviated_literal(xsd:boolean, Value, _, Out) :-
1474    format(Out, '~w', [Value]).
1475
1476
1477%!  tw_quoted_string(+Atom, +State, +Out) is det.
1478%
1479%   Write  Atom  to  Out  as  a  quoted  string.  We  only  use  the
1480%   single-"..." representation.
1481
1482tw_quoted_string(Atom, _, Out) :-
1483    turtle:turtle_write_quoted_string(Out, Atom).
1484
1485
1486                 /*******************************
1487                 *             COMMENT          *
1488                 *******************************/
1489
1490comment(State, Format, Args, Out) :-
1491    tw_state_comment(State, true),
1492    !,
1493    format(Out, '~n# ', []),
1494    format(Out, Format, Args),
1495    format(Out, '~n', []).
1496comment(_, _, _, _).
1497
1498
1499
1500                 /*******************************
1501                 *           STATISTICS         *
1502                 *******************************/
1503
1504inc_triple_count(State, Count) :-
1505    tw_state_triple_count(State, C0),
1506    C1 is C0+Count,
1507    nb_set_triple_count_of_tw_state(C1, State).
1508
1509inc_subject_count(State, Count) :-
1510    tw_state_subject_count(State, C0),
1511    C1 is C0+Count,
1512    nb_set_subject_count_of_tw_state(C1, State).
1513
1514:- multifile
1515    prolog:message//1.
1516
1517prolog:message(rdf(saved(File, Time, SavedSubjects, SavedTriples))) -->
1518    [ 'Saved ~D triples about ~D subjects into '-[SavedTriples, SavedSubjects] ],
1519    rdf_output(File),
1520    [ ' (~3f sec)'-[Time] ].
1521prolog:message(rdf(saved(File, Time, SavedSubjects, SavedTriples,
1522                         SavedGraphs))) -->
1523    [ 'Saved ~D graphs, ~D triples about ~D subjects into '-
1524      [SavedGraphs, SavedTriples, SavedSubjects] ],
1525    rdf_output(File),
1526    [ ' (~3f sec)'-[Time] ].
1527
1528rdf_output(Stream) -->
1529    { is_stream(Stream),
1530      stream_property(Stream, file_name(File))
1531    },
1532    !,
1533    [ '~p'-[File] ].
1534rdf_output(File) -->
1535    [ '~p'-[File] ].