View source with raw comments or as raw
   1/*  Part of SWI-Prolog
   2
   3    Author:        Jan Wielemaker
   4    E-mail:        J.Wielemaker@vu.nl
   5    WWW:           http://www.swi-prolog.org
   6    Copyright (c)  2010-2013, University of Amsterdam
   7    All rights reserved.
   8
   9    Redistribution and use in source and binary forms, with or without
  10    modification, are permitted provided that the following conditions
  11    are met:
  12
  13    1. Redistributions of source code must retain the above copyright
  14       notice, this list of conditions and the following disclaimer.
  15
  16    2. Redistributions in binary form must reproduce the above copyright
  17       notice, this list of conditions and the following disclaimer in
  18       the documentation and/or other materials provided with the
  19       distribution.
  20
  21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  32    POSSIBILITY OF SUCH DAMAGE.
  33*/
  34
  35:- module(rdf_parser,
  36          [ xml_to_plrdf/3,             % +XMLTerm, -RDFTerm, +State
  37            element_to_plrdf/3,         % +ContentList, -RDFTerm, +State
  38            make_rdf_state/3,           % +Options, -State, -RestOptions
  39            rdf_modify_state/3,         % +XMLAttrs, +State0, -State
  40            rdf_name_space/1
  41          ]).
  42:- use_module(rewrite).
  43:- use_module(library(sgml)).           % xml_name/1
  44:- use_module(library(lists)).
  45:- use_module(library(uri)).
  46:- use_module(library(record)).
  47
  48:- op(500, fx, \?).                     % Optional (attrs)
  49
  50term_expansion(F, T) :- rew_term_expansion(F, T).
  51goal_expansion(F, T) :- rew_goal_expansion(F, T).
  52
  53goal_expansion(attrs(Attrs, List), Goal) :-
  54    translate_attrs(List, Attrs, Goal).
  55
  56translate_attrs(Var, Attrs, rewrite(Var, Attrs)) :-
  57    var(Var),
  58    !.
  59translate_attrs([], _, true) :- !.
  60translate_attrs([H], Attrs, Goal) :-
  61    !,
  62    (   var(H)
  63    ->  Goal = rewrite(H, Attrs)
  64    ;   H = \?Optional
  65    ->  Goal = (   member(A, Attrs),
  66                   OptRewrite
  67               ->  true
  68               ;   true
  69               ),
  70        expand_goal(rewrite(\Optional, A), OptRewrite)
  71    ;   Goal = (   member(A, Attrs),
  72                   Rewrite
  73               ->  true
  74               ),
  75        expand_goal(rewrite(H, A), Rewrite)
  76    ).
  77translate_attrs([H|T], Attrs0, (G0, G1)) :-
  78    !,
  79    (   var(H)
  80    ->  G0 = rewrite(H, Attrs0),
  81        Attrs1 = Attrs0
  82    ;   H = \?Optional
  83    ->  G0 = (   select(A, Attrs0, Attrs1),
  84                 OptRewrite
  85             ->  true
  86             ;   Attrs1 = Attrs0
  87             ),
  88        expand_goal(rewrite(\Optional, A), OptRewrite)
  89    ;   G0 = (   select(A, Attrs0, Attrs1),
  90                 Rewrite
  91             ),
  92        expand_goal(rewrite(H, A), Rewrite)
  93    ),
  94    translate_attrs(T, Attrs1, G1).
  95translate_attrs(Rule, Attrs, Goal) :-
  96    expand_goal(rewrite(Rule, Attrs), Goal).
  97
  98
  99:- multifile rdf_name_space/1.
 100:- dynamic   rdf_name_space/1.
 101
 102%!  rdf_name_space(?URL) is nondet.
 103%
 104%   True if URL must be handled  as rdf: Determines special handling
 105%   of rdf:about, rdf:resource, etc.
 106
 107
 108rdf_name_space('http://www.w3.org/1999/02/22-rdf-syntax-ns#').
 109rdf_name_space('http://www.w3.org/TR/REC-rdf-syntax').
 110
 111
 112:- record
 113    rdf_state(base_uri='',
 114              lang='',
 115              ignore_lang=false,
 116              convert_typed_literal).
 117
 118
 119%!  xml_to_plrdf(+RDFElementOrObject, -RDFTerm, +State)
 120%
 121%   Translate an XML (using namespaces)  term   into  an Prolog term
 122%   representing the RDF data.  This  term   can  then  be  fed into
 123%   rdf_triples/[2,3] to create a list of   RDF triples. State is an
 124%   instance of an rdf_state record.
 125
 126xml_to_plrdf(Element, RDF, State) :-
 127    (   is_list(Element)
 128    ->  rewrite(\xml_content_objects(RDF, State), Element)
 129    ;   rewrite(\xml_objects(RDF, State), Element)
 130    ).
 131
 132%!  element_to_plrdf(+DOM, -RDFTerm, +State)
 133%
 134%   Rewrite a single XML element.
 135
 136element_to_plrdf(Element, RDF, State) :-
 137    rewrite(\nodeElementList(RDF, State), [Element]).
 138
 139xml_objects(Objects, Options0) ::=
 140        E0,
 141        { modify_state(E0, Options0, E, Options), !,
 142          rewrite(\xml_objects(Objects, Options), E)
 143        }.
 144xml_objects(Objects, Options) ::=
 145        element((\rdf('RDF'), !),
 146                _,
 147                \nodeElementList(Objects, Options)),
 148        !.
 149xml_objects(Objects, Options) ::=
 150        element(_, _, \xml_content_objects(Objects, Options)).
 151
 152xml_content_objects([], _) ::=
 153        [].
 154xml_content_objects([H|T], Options) ::=
 155        [ \xml_objects(H, Options)
 156        | \xml_content_objects(T, Options)
 157        ].
 158
 159
 160nodeElementList([], _Options) ::=
 161        [], !.
 162nodeElementList(L, Options) ::=
 163        [ (\ws, !)
 164        | \nodeElementList(L, Options)
 165        ].
 166nodeElementList([H|T], Options) ::=
 167        [ \nodeElementOrError(H, Options)
 168        | \nodeElementList(T, Options)
 169        ].
 170
 171nodeElementOrError(H, Options) ::=
 172        \nodeElement(H, Options), !.
 173nodeElementOrError(unparsed(Data), _Options) ::=
 174        Data.
 175
 176nodeElement(description(Type, About, Properties), Options) ::=
 177        \description(Type, About, Properties, Options).
 178
 179
 180                 /*******************************
 181                 *          DESCRIPTION         *
 182                 *******************************/
 183
 184description(Type, About, Properties, Options0) ::=
 185        E0,
 186        { modify_state(E0, Options0, E, Options), !,
 187          rewrite(\description(Type, About, Properties, Options), E)
 188        }.
 189description(description, About, Properties, Options) ::=
 190        element(\rdf('Description'),
 191                \attrs([ \?idAboutAttr(About, Options)
 192                       | \propAttrs(PropAttrs, Options)
 193                       ]),
 194                \propertyElts(PropElts, Options)),
 195        { !, append(PropAttrs, PropElts, Properties)
 196        }.
 197description(Type, About, Properties, Options) ::=
 198        element(\name_uri(Type, Options),
 199                \attrs([ \?idAboutAttr(About, Options)
 200                       | \propAttrs(PropAttrs, Options)
 201                       ]),
 202                \propertyElts(PropElts, Options)),
 203        { append(PropAttrs, PropElts, Properties)
 204        }.
 205
 206propAttrs([], _) ::=
 207        [], !.
 208propAttrs([H|T], Options) ::=
 209        [ \propAttr(H, Options)
 210        | \propAttrs(T, Options)
 211        ].
 212
 213propAttr(rdf:type = URI, Options) ::=
 214        \rdf_or_unqualified(type) = \value_uri(URI, Options), !.
 215propAttr(Name = Literal, Options) ::=
 216        Name = Value,
 217        { mkliteral(Value, Literal, Options)
 218        }.
 219
 220propertyElts([], _) ::=
 221        [], !.
 222propertyElts(Elts, Options) ::=
 223        [ (\ws, !)
 224        | \propertyElts(Elts, Options)
 225        ].
 226propertyElts([H|T], Options) ::=
 227        [ \propertyElt(H, Options)
 228        | \propertyElts(T, Options)
 229        ].
 230
 231propertyElt(E, Options) ::=
 232        \propertyElt(Id, Name, Value, Options),
 233        { mkprop(Name, Value, Prop),
 234          (   var(Id)
 235          ->  E = Prop
 236          ;   E = id(Id, Prop)
 237          )
 238        }.
 239
 240mkprop(NS:Local, Value, rdf:Local = Value) :-
 241    rdf_name_space(NS),
 242    !.
 243mkprop(Name, Value, Name = Value).
 244
 245
 246propertyElt(Id, Name, Value, Options0) ::=
 247        E0,
 248        { modify_state(E0, Options0, E, Options), !,
 249          rewrite(\propertyElt(Id, Name, Value, Options), E)
 250        }.
 251propertyElt(Id, Name, Value, Options) ::=
 252        \literalPropertyElt(Id, Name, Value, Options), !.
 253propertyElt(_, Name, Literal, Options) ::=
 254        element(Name,
 255                \attrs([ \parseLiteral
 256                       ]),
 257                Content),
 258        { !,
 259          literal_value(Content, Literal, Options)
 260        }.
 261propertyElt(Id, Name, collection(Elements), Options) ::=
 262        element(Name,
 263                \attrs([ \parseCollection,
 264                         \?idAttr(Id, Options)
 265                       ]),
 266                \nodeElementList(Elements, Options)).
 267                                        % 5.14 emptyPropertyElt
 268propertyElt(Id, Name, Value, Options) ::=
 269        element(Name, A, \all_ws),
 270        { !,
 271          rewrite(\emptyPropertyElt(Id, Value, Options), A)
 272        }.
 273
 274propertyElt(_, Name, description(description, Id, Properties), Options) ::=
 275        element(Name,
 276                \attrs([ \parseResource,
 277                         \?idAboutAttr(Id, Options)
 278                       ]),
 279                \propertyElts(Properties, Options)),
 280        !.
 281propertyElt(Id, Name, Literal, Options) ::=
 282        element(Name,
 283                \attrs([ \?idAttr(Id, Options)
 284                       ]),
 285                [ Value ]),
 286        { atom(Value), !,
 287          mkliteral(Value, Literal, Options)
 288        }.
 289propertyElt(Id, Name, Value, Options) ::=
 290        element(Name,
 291                \attrs([ \?idAttr(Id, Options)
 292                       ]),
 293                \an_rdf_object(Value, Options)), !.
 294propertyElt(Id, Name, unparsed(Value), Options) ::=
 295        element(Name,
 296                \attrs([ \?idAttr(Id, Options)
 297                       ]),
 298                Value).
 299
 300literalPropertyElt(Id, Name, Literal, Options) ::=
 301        element(Name,
 302                \attrs([ \typeAttr(Type, Options),
 303                         \?idAttr(Id, Options)
 304                       ]),
 305                Content),
 306        { typed_literal(Type, Content, Literal, Options)
 307        }.
 308
 309emptyPropertyElt(Id, Literal, Options) ::=
 310        \attrs([ \?idAttr(Id, Options),
 311                 \?parseLiteral
 312               | \noMoreAttrs
 313               ]),
 314        { !,
 315          mkliteral('', Literal, Options)
 316        }.
 317emptyPropertyElt(Id,
 318                 description(description, About, Properties),
 319                 Options) ::=
 320        \attrs([ \?idAttr(Id, Options),
 321                 \?aboutResourceEmptyElt(About, Options),
 322                 \?parseResource
 323               | \propAttrs(Properties, Options)
 324               ]), !.
 325
 326aboutResourceEmptyElt(about(URI), Options) ::=
 327        \resourceAttr(URI, Options), !.
 328aboutResourceEmptyElt(node(URI), _Options) ::=
 329        \nodeIDAttr(URI).
 330
 331%!  literal_value(+In, -Value, +Options)
 332%
 333%   Create the literal value for rdf:parseType="Literal" attributes.
 334%   The content is the Prolog XML DOM tree for the literal.
 335%
 336%   @tbd    Note that the specs demand a canonical textual representation
 337%           of the XML data as a Unicode string.  For now the user can
 338%           achieve this using the convert_typed_literal hook.
 339
 340literal_value(Value, literal(type(rdf:'XMLLiteral', Value)), _).
 341
 342%!  mkliteral(+Atom, -Object, +Options)
 343%
 344%   Translate attribute value Atom into an RDF object using the
 345%   lang(Lang) option from Options.
 346
 347mkliteral(Text, literal(Val), Options) :-
 348    atom(Text),
 349    (   rdf_state_lang(Options, Lang),
 350        Lang \== ''
 351    ->  Val = lang(Lang, Text)
 352    ;   Val = Text
 353    ).
 354
 355%!  typed_literal(+Type, +Content, -Literal, +Options)
 356%
 357%   Handle a literal attribute with rdf:datatype=Type qualifier. NB:
 358%   possibly  it  is  faster  to  use  a  global  variable  for  the
 359%   conversion hook.
 360
 361typed_literal(Type, Content, literal(Object), Options) :-
 362    rdf_state_convert_typed_literal(Options, Convert),
 363    nonvar(Convert),
 364    !,
 365    (   catch(call(Convert, Type, Content, Object), E, true)
 366    ->  (   var(E)
 367        ->  true
 368        ;   Object = E
 369        )
 370    ;   Object = error(cannot_convert(Type, Content), _)
 371    ).
 372typed_literal(Type, [], literal(type(Type, '')), _Options) :- !.
 373typed_literal(Type, [Text], literal(type(Type, Text)), _Options) :- !.
 374typed_literal(Type, Content, literal(type(Type, Content)), _Options).
 375
 376
 377idAboutAttr(id(Id), Options) ::=
 378        \idAttr(Id, Options), !.
 379idAboutAttr(about(About), Options) ::=
 380        \aboutAttr(About, Options), !.
 381idAboutAttr(node(About), _Options) ::=
 382        \nodeIDAttr(About), !.
 383
 384%!  an_rdf_object(-Object, +OptionsURI)
 385%
 386%   Deals with an object, but there may be spaces around.  I'm still
 387%   not sure where to deal with these.  Best is to ask the XML parser
 388%   to get rid of them, So most likely this code will change if this
 389%   happens.
 390
 391an_rdf_object(Object, Options) ::=
 392        [ \nodeElement(Object, Options)
 393        ], !.
 394an_rdf_object(Object, Options) ::=
 395        [ (\ws, !)
 396        | \an_rdf_object(Object, Options)
 397        ].
 398an_rdf_object(Object, Options) ::=
 399        [ \nodeElement(Object, Options),
 400          \ws
 401        ], !.
 402
 403ws ::=
 404        A,
 405        { atom(A),
 406          atom_chars(A, Chars),
 407          all_blank(Chars), !
 408        }.
 409ws ::=
 410        pi(_).
 411
 412all_ws ::=
 413        [], !.
 414all_ws ::=
 415        [\ws | \all_ws].
 416
 417all_blank([]).
 418all_blank([H|T]) :-
 419    char_type(H, space),            % SWI-Prolog specific
 420    all_blank(T).
 421
 422
 423                 /*******************************
 424                 *         RDF ATTRIBUTES       *
 425                 *******************************/
 426
 427idAttr(Id, Options) ::=
 428        \rdf_or_unqualified('ID') = \uniqueid(Id, Options).
 429
 430aboutAttr(About, Options) ::=
 431        \rdf_or_unqualified(about) = \value_uri(About, Options).
 432
 433nodeIDAttr(About) ::=
 434        \rdf_or_unqualified(nodeID) = About.
 435
 436resourceAttr(URI, Options) ::=
 437        \rdf_or_unqualified(resource) = \value_uri(URI, Options).
 438
 439typeAttr(Type, Options) ::=
 440        \rdf_or_unqualified(datatype) = \value_uri(Type, Options).
 441
 442name_uri(URI, Options) ::=
 443        NS:Local,
 444        {   !, atom_concat(NS, Local, A),
 445            rewrite(\value_uri(URI, Options), A)
 446        }.
 447name_uri(URI, Options) ::=
 448        \value_uri(URI, Options).
 449
 450value_uri(URI, Options) ::=
 451        A,
 452        {   rdf_state_base_uri(Options, Base),
 453            (   Base \== []
 454            ->  iri_normalized(A, Base, URI)
 455            ;   URI = A
 456            )
 457        }.
 458
 459uniqueid(Id, Options) ::=
 460        A,
 461        {   unique_xml_name(A, HashID),
 462            make_globalid(HashID, Options, Id)
 463        }.
 464
 465unique_xml_name(Name, HashID) :-
 466    atom_concat(#, Name, HashID),
 467    (   xml_name(Name)
 468    ->  true
 469    ;   print_message(warning, rdf(not_a_name(Name)))
 470    ).
 471
 472make_globalid(In, Options, Id) :-
 473    rdf_state_base_uri(Options, Base),
 474    iri_normalized(In, Base, Id).
 475
 476parseLiteral    ::= \rdf_or_unqualified(parseType) = 'Literal'.
 477parseResource   ::= \rdf_or_unqualified(parseType) = 'Resource'.
 478parseCollection ::= \rdf_or_unqualified(parseType) = 'Collection'.
 479
 480
 481                 /*******************************
 482                 *           PRIMITIVES         *
 483                 *******************************/
 484
 485rdf(Tag) ::=
 486        NS:Tag,
 487        { rdf_name_space(NS), !
 488        }.
 489
 490rdf_or_unqualified(Tag) ::=
 491        Tag.
 492rdf_or_unqualified(Tag) ::=
 493        NS:Tag,
 494        { rdf_name_space(NS), !
 495        }.
 496
 497
 498                 /*******************************
 499                 *             BASICS           *
 500                 *******************************/
 501
 502/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 503This code is translated by the  goal_expansion/2   rule  at the start of
 504this file. We leave the original code for reference.
 505
 506attrs(Bag) ::=
 507        L0,
 508        { do_attrs(Bag, L0)
 509        }.
 510
 511do_attrs([], _) :- !.
 512do_attrs([\?H|T], L0) :- !,             % optional
 513        (   select(X, L0, L),
 514            rewrite(\H, X)
 515        ->  true
 516        ;   L = L0
 517        ),
 518        do_attrs(T, L).
 519do_attrs([H|T], L0) :-
 520        select(X, L0, L),
 521        rewrite(H, X), !,
 522        do_attrs(T, L).
 523do_attrs(C, L) :-
 524        rewrite(C, L).
 525- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 526
 527%       \noMoreAttrs
 528%
 529%       Check attribute-list is empty.  Reserved xml: attributes are
 530%       excluded from this test.
 531
 532noMoreAttrs ::=
 533        [], !.
 534noMoreAttrs ::=
 535        [ xml:_=_
 536        | \noMoreAttrs
 537        ].
 538
 539%!  modify_state(+Element0, +Options0, -Element, -Options) is semidet.
 540%
 541%   If Element0 contains xml:base = Base, strip it from the
 542%   attributes list and update base_uri(_) in the Options
 543%
 544%   It Element0 contains xml:lang = Lang, strip it from the
 545%   attributes list and update lang(_) in the Options
 546%
 547%   Remove all xmlns=_, xmlns:_=_ and xml:_=_.  Only succeed
 548%   if something changed.
 549
 550modify_state(element(Name, Attrs0, Content), Options0,
 551             element(Name, Attrs,  Content), Options) :-
 552    modify_a_state(Attrs0, Options0, Attrs, Options),
 553    Attrs0 \== Attrs.
 554
 555rdf_modify_state(Attributes, State0, State) :-
 556    modify_a_state(Attributes, State0, _, State).
 557
 558
 559modify_a_state([], Options, [], Options).
 560modify_a_state([Name=Value|T0], Options0, T, Options) :-
 561    modify_a(Name, Value, Options0, Options1),
 562    !,
 563    modify_a_state(T0, Options1, T, Options).
 564modify_a_state([H|T0], Options0, [H|T], Options) :-
 565    modify_a_state(T0, Options0, T, Options).
 566
 567
 568modify_a(xml:base, Base1, Options0, Options) :-
 569    !,
 570    rdf_state_base_uri(Options0, Base0),
 571    remove_fragment(Base1, Base2),
 572    iri_normalized(Base2, Base0, Base),
 573    set_base_uri_of_rdf_state(Base, Options0, Options).
 574modify_a(xml:lang, Lang, Options0, Options) :-
 575    !,
 576    rdf_state_ignore_lang(Options0, false),
 577    !,
 578    set_lang_of_rdf_state(Lang, Options0, Options).
 579modify_a(xmlns, _, Options, Options).
 580modify_a(xmlns:_, _, Options, Options).
 581modify_a(xml:_, _, Options, Options).
 582
 583
 584%!  remove_fragment(+URI, -WithoutFragment)
 585%
 586%   When handling xml:base, we must delete the possible fragment.
 587
 588remove_fragment(URI, Plain) :-
 589    sub_atom(URI, B, _, _, #),
 590    !,
 591    sub_atom(URI, 0, B, _, Plain).
 592remove_fragment(URI, URI).
 593
 594
 595                 /*******************************
 596                 *     HELP PCE-EMACS A BIT     *
 597                 *******************************/
 598
 599:- multifile
 600    emacs_prolog_colours:term_colours/2,
 601    emacs_prolog_colours:goal_classification/2.
 602
 603expand(c(X), _, X) :- !.
 604expand(In,   Pattern, Colours) :-
 605    compound(In),
 606    !,
 607    In =.. [F|Args],
 608    expand_list(Args, PatternArgs, ColourArgs),
 609    Pattern =.. [F|PatternArgs],
 610    Colours = functor(F) - ColourArgs.
 611expand(X, X, classify).
 612
 613expand_list([], [], []).
 614expand_list([H|T], [PH|PT], [CH|CT]) :-
 615    expand(H, PH, CH),
 616    expand_list(T, PT, CT).
 617
 618:- discontiguous
 619    term_expansion/2.
 620
 621term_expansion(term_colours(C),
 622               emacs_prolog_colours:term_colours(Pattern, Colours)) :-
 623    expand(C, Pattern, Colours).
 624
 625term_colours((c(head(+(1))) ::= c(match), {c(body)})).
 626term_colours((c(head(+(1))) ::= c(match))).
 627
 628emacs_prolog_colours:goal_classification(\_, expanded).
 629
 630:- dynamic
 631    prolog:meta_goal/2.
 632:- multifile
 633    prolog:meta_goal/2,
 634    prolog:called_by/2.
 635
 636prolog:meta_goal(rewrite(A, _), [A]).
 637prolog:meta_goal(\A,            [A+1]).
 638
 639prolog:called_by(attrs(Attrs, _Term), Called) :-
 640    findall(G+1, sub_term(\?G, Attrs), Called, Tail),
 641    findall(G+1, sub_term(\G, Attrs), Tail).