View source with raw comments or as raw
   1/*  Part of SWI-Prolog
   2
   3    Author:        Jan Wielemaker
   4    E-mail:        J.Wielemaker@vu.nl
   5    WWW:           http://www.swi-prolog.org
   6    Copyright (c)  2010-2013, University of Amsterdam
   7    All rights reserved.
   8
   9    Redistribution and use in source and binary forms, with or without
  10    modification, are permitted provided that the following conditions
  11    are met:
  12
  13    1. Redistributions of source code must retain the above copyright
  14       notice, this list of conditions and the following disclaimer.
  15
  16    2. Redistributions in binary form must reproduce the above copyright
  17       notice, this list of conditions and the following disclaimer in
  18       the documentation and/or other materials provided with the
  19       distribution.
  20
  21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  32    POSSIBILITY OF SUCH DAMAGE.
  33*/
  34
  35:- module(rdf_triple,
  36          [ rdf_triples/2,              % +Parsed, -Tripples
  37            rdf_triples/3,              % +Parsed, -Tripples, +Tail
  38            rdf_reset_ids/0,            % Reset gensym id's
  39            rdf_start_file/2,           % +Options, -Cleanup
  40            rdf_end_file/1,             % +Cleanup
  41            anon_prefix/1               % Prefix for anonynmous resources
  42          ]).
  43:- use_module(library(gensym)).
  44:- use_module(library(option)).
  45:- use_module(library(uri)).
  46:- use_module(rdf_parser).
  47
  48:- predicate_options(rdf_start_file/2, 1,
  49                     [ base_uri(atom),
  50                       blank_nodes(oneof([share,noshare]))
  51                     ]).
  52
  53/** <module> Create triples from intermediate representation
  54
  55Convert the output of xml_to_rdf/3  from   library(rdf)  into  a list of
  56triples of the format described   below. The intermediate representation
  57should be regarded a proprietary representation.
  58
  59        rdf(Subject, Predicate, Object).
  60
  61Where `Subject' is
  62
  63        * Atom
  64        The subject is a resource
  65
  66        * each(URI)
  67        URI is the URI of an RDF Bag
  68
  69        * prefix(Pattern)
  70        Pattern is the prefix of a fully qualified Subject URI
  71
  72And `Predicate' is
  73
  74        * Atom
  75        The predicate is always a resource
  76
  77And `Object' is
  78
  79        * Atom
  80        URI of Object resource
  81
  82        * literal(Value)
  83        Literal value (Either a single atom or parsed XML data)
  84*/
  85
  86%!  rdf_triples(+Term, -Triples) is det.
  87%!  rdf_triples(+Term, -Tridpples, +Tail) is det.
  88%
  89%   Convert an object as parsed by rdf.pl into a list of rdf/3
  90%   triples.  The identifier of the main object created is returned
  91%   by rdf_triples/3.
  92%
  93%   Input is the `content' of the RDF element in the format as
  94%   generated by load_structure(File, Term, [dialect(xmlns)]).
  95%   rdf_triples/3 can process both individual descriptions as
  96%   well as the entire content-list of an RDF element.  The first
  97%   mode is suitable when using library(sgml) in `call-back' mode.
  98
  99rdf_triples(RDF, Tripples) :-
 100    rdf_triples(RDF, Tripples, []).
 101
 102rdf_triples([]) -->
 103    !,
 104    [].
 105rdf_triples([H|T]) -->
 106    !,
 107    rdf_triples(H),
 108    rdf_triples(T).
 109rdf_triples(Term) -->
 110    triples(Term, _).
 111
 112%!  triples(-Triples, -Id, +In, -Tail)
 113%
 114%   DGC set processing the output of  xml_to_rdf/3. Id is unified to
 115%   the identifier of the main description.
 116
 117triples(description(Type, About, Props), Subject) -->
 118    { var(About),
 119      share_blank_nodes(true)
 120    },
 121    !,
 122    (   { shared_description(description(Type, Props), Subject)
 123        }
 124    ->  []
 125    ;   { make_id('_:Description', Id)
 126        },
 127        triples(description(Type, about(Id), Props), Subject),
 128        { assert_shared_description(description(Type, Props), Subject)
 129        }
 130    ).
 131triples(description(description, IdAbout, Props), Subject) -->
 132    !,
 133    { description_id(IdAbout, Subject)
 134    },
 135    properties(Props, Subject).
 136triples(description(TypeURI, IdAbout, Props), Subject) -->
 137    { description_id(IdAbout, Subject)
 138    },
 139    properties([ rdf:type = TypeURI
 140               | Props
 141               ], Subject).
 142triples(unparsed(Data), Id) -->
 143    { make_id('_:Error', Id),
 144      print_message(error, rdf(unparsed(Data)))
 145    },
 146    [].
 147
 148
 149                 /*******************************
 150                 *          DESCRIPTIONS        *
 151                 *******************************/
 152
 153:- thread_local
 154    node_id/2,                      % nodeID --> ID
 155    unique_id/1.                    % known rdf:ID
 156
 157rdf_reset_node_ids :-
 158    retractall(node_id(_,_)),
 159    retractall(unique_id(_)).
 160
 161description_id(Id, Id) :-
 162    var(Id),
 163    !,
 164    make_id('_:Description', Id).
 165description_id(about(Id), Id).
 166description_id(id(Id), Id) :-
 167    (   unique_id(Id)
 168    ->  print_message(error, rdf(redefined_id(Id)))
 169    ;   assert(unique_id(Id))
 170    ).
 171description_id(each(Id), each(Id)).
 172description_id(prefix(Id), prefix(Id)).
 173description_id(node(NodeID), Id) :-
 174    (   node_id(NodeID, Id)
 175    ->  true
 176    ;   make_id('_:Node', Id),
 177        assert(node_id(NodeID, Id))
 178    ).
 179
 180properties(PlRDF, Subject) -->
 181    properties(PlRDF, 1, [], [], Subject).
 182
 183properties([], _, Bag, Bag, _) -->
 184    [].
 185properties([H0|T0], N, Bag0, Bag, Subject) -->
 186    property(H0, N, NN, Bag0, Bag1, Subject),
 187    properties(T0, NN, Bag1, Bag, Subject).
 188
 189%!  property(Property, N, NN, Subject)// is det.
 190%
 191%   Generate triples for {Subject,  Pred,   Object}.  Also generates
 192%   triples for Object if necessary.
 193%
 194%   @param Property One of
 195%
 196%           * Pred = Object
 197%           Used for normal statements
 198%           * id(Id, Pred = Object)
 199%           Used for reified statements
 200
 201property(Pred0 = Object, N, NN, BagH, BagT, Subject) --> % inlined object
 202    triples(Object, Id),
 203    !,
 204    { li_pred(Pred0, Pred, N, NN)
 205    },
 206    statement(Subject, Pred, Id, _, BagH, BagT).
 207property(Pred0 = collection(Elems), N, NN, BagH, BagT, Subject) -->
 208    !,
 209    { li_pred(Pred0, Pred, N, NN)
 210    },
 211    statement(Subject, Pred, Object, _Id, BagH, BagT),
 212    collection(Elems, Object).
 213property(Pred0 = Object, N, NN, BagH, BagT, Subject) -->
 214    !,
 215    { li_pred(Pred0, Pred, N, NN)
 216    },
 217    statement(Subject, Pred, Object, _Id, BagH, BagT).
 218property(id(Id, Pred0 = Object), N, NN, BagH, BagT, Subject) -->
 219    triples(Object, ObjectId),
 220    !,
 221    { li_pred(Pred0, Pred, N, NN)
 222    },
 223    statement(Subject, Pred, ObjectId, Id, BagH, BagT).
 224property(id(Id, Pred0 = collection(Elems)), N, NN, BagH, BagT, Subject) -->
 225    !,
 226    { li_pred(Pred0, Pred, N, NN)
 227    },
 228    statement(Subject, Pred, Object, Id, BagH, BagT),
 229    collection(Elems, Object).
 230property(id(Id, Pred0 = Object), N, NN, BagH, BagT, Subject) -->
 231    { li_pred(Pred0, Pred, N, NN)
 232    },
 233    statement(Subject, Pred, Object, Id, BagH, BagT).
 234
 235%!  statement(+Subject, +Pred, +Object, +Id, +BagH, -BagT)
 236%
 237%   Add a statement to the model. If nonvar(Id), we reinify the
 238%   statement using the given Id.
 239
 240statement(Subject, Pred, Object, Id, BagH, BagT) -->
 241    rdf(Subject, Pred, Object),
 242    {   BagH = [Id|BagT]
 243    ->  statement_id(Id)
 244    ;   BagT = BagH
 245    },
 246    (   { nonvar(Id)
 247        }
 248    ->  rdf(Id, rdf:type, rdf:'Statement'),
 249        rdf(Id, rdf:subject, Subject),
 250        rdf(Id, rdf:predicate, Pred),
 251        rdf(Id, rdf:object, Object)
 252    ;   []
 253    ).
 254
 255
 256statement_id(Id) :-
 257    nonvar(Id),
 258    !.
 259statement_id(Id) :-
 260    make_id('_:Statement', Id).
 261
 262%!  li_pred(+Pred, -Pred, +Nth, -NextNth)
 263%
 264%   Transform rdf:li predicates into _1, _2, etc.
 265
 266li_pred(rdf:li, rdf:Pred, N, NN) :-
 267    !,
 268    NN is N + 1,
 269    atom_concat('_', N, Pred).
 270li_pred(Pred, Pred, N, N).
 271
 272%!  collection(+Elems, -Id)
 273%
 274%   Handle the elements of a collection and return the identifier
 275%   for the whole collection in Id.
 276
 277collection([], Nil) -->
 278    { global_ref(rdf:nil, Nil)
 279    }.
 280collection([H|T], Id) -->
 281    triples(H, HId),
 282    { make_id('_:List', Id)
 283    },
 284    rdf(Id, rdf:type, rdf:'List'),
 285    rdf(Id, rdf:first, HId),
 286    rdf(Id, rdf:rest, TId),
 287    collection(T, TId).
 288
 289
 290rdf(S0, P0, O0) -->
 291    { global_ref(S0, S),
 292      global_ref(P0, P),
 293      global_obj(O0, O)
 294    },
 295    [ rdf(S, P, O) ].
 296
 297
 298global_ref(In, Out) :-
 299    (   nonvar(In),
 300        In = NS:Local
 301    ->  (   NS == rdf,
 302            rdf_name_space(RDF)
 303        ->  atom_concat(RDF, Local, Out)
 304        ;   atom_concat(NS, Local, Out0),
 305            iri_normalized(Out0, Out)
 306        )
 307    ;   Out = In
 308    ).
 309
 310global_obj(V, V) :-
 311    var(V),
 312    !.
 313global_obj(literal(type(Local, X)), literal(type(Global, X))) :-
 314    !,
 315    global_ref(Local, Global).
 316global_obj(literal(X), literal(X)) :- !.
 317global_obj(Local, Global) :-
 318    global_ref(Local, Global).
 319
 320
 321                 /*******************************
 322                 *             SHARING          *
 323                 *******************************/
 324
 325:- thread_local
 326    shared_description/3,           % +Hash, +Term, -Subject
 327    share_blank_nodes/1,            % Boolean
 328    shared_nodes/1.                 % counter
 329
 330reset_shared_descriptions :-
 331    retractall(shared_description(_,_,_)),
 332    retractall(shared_nodes(_)).
 333
 334shared_description(Term, Subject) :-
 335    term_hash(Term, Hash),
 336    shared_description(Hash, Term, Subject),
 337    (   retract(shared_nodes(N))
 338    ->  N1 is N + 1
 339    ;   N1 = 1
 340    ),
 341    assert(shared_nodes(N1)).
 342
 343
 344assert_shared_description(Term, Subject) :-
 345    term_hash(Term, Hash),
 346    assert(shared_description(Hash, Term, Subject)).
 347
 348
 349                 /*******************************
 350                 *            START/END         *
 351                 *******************************/
 352
 353%!  rdf_start_file(+Options, -Cleanup) is det.
 354%
 355%   Initialise for the translation of a file.
 356
 357rdf_start_file(Options, Cleanup) :-
 358    rdf_reset_node_ids,             % play safe
 359    reset_shared_descriptions,
 360    set_bnode_sharing(Options, C1),
 361    set_anon_prefix(Options, C2),
 362    add_cleanup(C1, C2, Cleanup).
 363
 364%!  rdf_end_file(:Cleanup) is det.
 365%
 366%   Cleanup reaching the end of an RDF file.
 367
 368rdf_end_file(Cleanup) :-
 369    rdf_reset_node_ids,
 370    (   shared_nodes(N)
 371    ->  print_message(informational, rdf(shared_blank_nodes(N)))
 372    ;   true
 373    ),
 374    reset_shared_descriptions,
 375    Cleanup.
 376
 377set_bnode_sharing(Options, erase(Ref)) :-
 378    option(blank_nodes(Share), Options, noshare),
 379    (   Share == share
 380    ->  assert(share_blank_nodes(true), Ref), !
 381    ;   Share == noshare
 382    ->  fail                        % next clause
 383    ;   throw(error(domain_error(share, Share), _))
 384    ).
 385set_bnode_sharing(_, true).
 386
 387set_anon_prefix(Options, erase(Ref)) :-
 388    option(base_uri(BaseURI), Options),
 389    nonvar(BaseURI),
 390    !,
 391    (   BaseURI == []
 392    ->  AnonBase = '_:'
 393    ;   atomic_list_concat(['_:', BaseURI, '#'], AnonBase)
 394    ),
 395    asserta(anon_prefix(AnonBase), Ref).
 396set_anon_prefix(_, true).
 397
 398add_cleanup(true, X, X) :- !.
 399add_cleanup(X, true, X) :- !.
 400add_cleanup(X, Y, (X, Y)).
 401
 402
 403                 /*******************************
 404                 *             UTIL             *
 405                 *******************************/
 406
 407%!  anon_prefix(-Prefix) is semidet.
 408%
 409%   If defined, it is the prefix used to generate a blank node.
 410
 411:- thread_local
 412    anon_prefix/1.
 413
 414make_id(For, ID) :-
 415    anon_prefix(Prefix),
 416    !,
 417    atom_concat(Prefix, For, Base),
 418    gensym(Base, ID).
 419make_id(For, ID) :-
 420    gensym(For, ID).
 421
 422anon_base('_:Description').
 423anon_base('_:Statement').
 424anon_base('_:List').
 425anon_base('_:Node').
 426
 427%!  rdf_reset_ids is det.
 428%
 429%   Utility predicate to reset the gensym counters for the various
 430%   generated identifiers.  This simplifies debugging and matching
 431%   output with the stored desired output (see rdf_test.pl).
 432
 433rdf_reset_ids :-
 434    anon_prefix(Prefix),
 435    !,
 436    (   anon_base(Base),
 437        atom_concat(Prefix, Base, X),
 438        reset_gensym(X),
 439        fail
 440    ;   true
 441    ).
 442rdf_reset_ids :-
 443    (   anon_base(Base),
 444        reset_gensym(Base),
 445        fail
 446    ;   true
 447    ).