View source with raw comments or as raw
   1/*  Part of SWI-Prolog
   2
   3    Author:        Jan Wielemaker & Richard O'Keefe
   4    E-mail:        J.Wielemaker@vu.nl
   5    WWW:           http://www.swi-prolog.org
   6    Copyright (c)  2004-2016, University of Amsterdam
   7                              VU University Amsterdam
   8    All rights reserved.
   9
  10    Redistribution and use in source and binary forms, with or without
  11    modification, are permitted provided that the following conditions
  12    are met:
  13
  14    1. Redistributions of source code must retain the above copyright
  15       notice, this list of conditions and the following disclaimer.
  16
  17    2. Redistributions in binary form must reproduce the above copyright
  18       notice, this list of conditions and the following disclaimer in
  19       the documentation and/or other materials provided with the
  20       distribution.
  21
  22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33    POSSIBILITY OF SUCH DAMAGE.
  34*/
  35
  36:- module(sgml_write,
  37          [ html_write/2,               %          +Data, +Options
  38            html_write/3,               % +Stream, +Data, +Options
  39            sgml_write/2,               %          +Data, +Options
  40            sgml_write/3,               % +Stream, +Data, +Options
  41            xml_write/2,                %          +Data, +Options
  42            xml_write/3                 % +Stream, +Data, +Options
  43          ]).
  44:- use_module(library(lists)).
  45:- use_module(library(sgml)).
  46:- use_module(library(assoc)).
  47:- use_module(library(option)).
  48:- use_module(library(error)).
  49
  50:- predicate_options(xml_write/2, 2, [pass_to(xml_write/3, 3)]).
  51:- predicate_options(xml_write/3, 3,
  52                     [ dtd(any),
  53                       doctype(atom),
  54                       public(atom),
  55                       system(atom),
  56                       header(boolean),
  57                       nsmap(list),
  58                       indent(nonneg),
  59                       layout(boolean),
  60                       net(boolean),
  61                       cleanns(boolean)
  62                     ]).
  63
  64:- multifile
  65    xmlns/2.                        % NS, URI
  66
  67/** <module> XML/SGML writer module
  68
  69This library provides the inverse functionality   of  the sgml.pl parser
  70library, writing XML, SGML and HTML documents from the parsed output. It
  71is intended to allow rewriting in a  different dialect or encoding or to
  72perform document transformation in Prolog on the parsed representation.
  73
  74The current implementation is  particularly   keen  on getting character
  75encoding and the use of character  entities   right.  Some work has been
  76done providing layout, but space handling in   XML  and SGML make this a
  77very hazardous area.
  78
  79The Prolog-based low-level character and  escape   handling  is the real
  80bottleneck in this library and will probably be   moved  to C in a later
  81stage.
  82
  83@see    library(http/html_write) provides a high-level library for
  84        emitting HTML and XHTML.
  85*/
  86
  87%!  xml_write(+Data, +Options) is det.
  88%!  sgml_write(+Data, +Options) is det.
  89%!  html_write(+Data, +Options) is det.
  90%!  xml_write(+Stream, +Data, +Options) is det.
  91%!  sgml_write(+Stream, +Data, +Options) is det.
  92%!  html_write(+Stream, +Data, +Options) is det.
  93%
  94%   Write a term as created by the SGML/XML parser to a stream in
  95%   SGML or XML format.  Options:
  96%
  97%           * cleanns(Bool)
  98%           If `true` (default), remove duplicate `xmlns`
  99%           attributes.
 100%           * dtd(DTD)
 101%           The DTD.  This is needed for SGML documents that contain
 102%           elements with content model EMPTY.  Characters which may
 103%           not be written directly in the Stream's encoding will be
 104%           written using character data entities from the DTD if at
 105%           all possible, otherwise as numeric character references.
 106%           Note that the DTD will NOT be written out at all; as yet
 107%           there is no way to write out an internal subset,  though
 108%           it would not be hard to add one.
 109%
 110%           * doctype(DocType)
 111%           Document type for the SGML document type declaration.
 112%           If omitted it is taken from the root element.  There is
 113%           never any point in having this be disagree with the
 114%           root element.  A <!DOCTYPE> declaration will be written
 115%           if and only if at least one of doctype(_), public(_), or
 116%           system(_) is provided in Options.
 117%
 118%           * public(PubId)
 119%           The public identifier to be written in the <!DOCTYPE> line.
 120%
 121%           * system(SysId)
 122%           The system identifier to be written in the <!DOCTYPE> line.
 123%
 124%           * header(Bool)
 125%           If Bool is 'false', do not emit the <xml ...> header
 126%           line.  (xml_write/3 only)
 127%
 128%           * nsmap(Map:list(Id=URI))
 129%           When emitting embedded XML, assume these namespaces
 130%           are already defined from the environment.  (xml_write/3
 131%           only).
 132%
 133%           * indent(Indent)
 134%           Indentation of the document (for embedding)
 135%
 136%           * layout(Bool)
 137%           Emit/do not emit layout characters to make output
 138%           readable.
 139%
 140%           * net(Bool)
 141%           Use/do not use Null End Tags.
 142%           For XML, this applies only to empty elements, so you get
 143%
 144%           ==
 145%               <foo/>      (default, net(true))
 146%               <foo></foo> (net(false))
 147%           ==
 148%
 149%           For SGML, this applies to empty elements, so you get
 150%
 151%           ==
 152%               <foo>       (if foo is declared to be EMPTY in the DTD)
 153%               <foo></foo> (default, net(false))
 154%               <foo//      (net(true))
 155%           ==
 156%
 157%           and also to elements with character content not containing /
 158%
 159%           ==
 160%               <b>xxx</b>  (default, net(false))
 161%               <b/xxx/     (net(true)).
 162%           ==
 163%
 164%   Note that if the stream is UTF-8, the system will write special
 165%   characters as UTF-8 sequences, while if it is ISO Latin-1 it
 166%   will use (character) entities if there is a DTD that provides
 167%   them, otherwise it will use numeric character references.
 168
 169xml_write(Data, Options) :-
 170    current_output(Stream),
 171    xml_write(Stream, Data, Options).
 172
 173xml_write(Stream0, Data, Options) :-
 174    fix_user_stream(Stream0, Stream),
 175    (   stream_property(Stream, encoding(text))
 176    ->  set_stream(Stream, encoding(utf8)),
 177        call_cleanup(xml_write(Stream, Data, Options),
 178                     set_stream(Stream, encoding(text)))
 179    ;   new_state(xml, State),
 180        init_state(Options, State),
 181        get_state(State, nsmap, NSMap),
 182        add_missing_namespaces(Data, NSMap, Data1),
 183        emit_xml_encoding(Stream, Options),
 184        emit_doctype(Options, Data, Stream),
 185        write_initial_indent(State, Stream),
 186        emit(Data1, Stream, State)
 187    ).
 188
 189
 190sgml_write(Data, Options) :-
 191    current_output(Stream),
 192    sgml_write(Stream, Data, Options).
 193
 194sgml_write(Stream0, Data, Options) :-
 195    fix_user_stream(Stream0, Stream),
 196    (   stream_property(Stream, encoding(text))
 197    ->  set_stream(Stream, encoding(utf8)),
 198        call_cleanup(sgml_write(Stream, Data, Options),
 199                     set_stream(Stream, encoding(text)))
 200    ;   new_state(sgml, State),
 201        init_state(Options, State),
 202        write_initial_indent(State, Stream),
 203        emit_doctype(Options, Data, Stream),
 204        emit(Data, Stream, State)
 205    ).
 206
 207
 208html_write(Data, Options) :-
 209    current_output(Stream),
 210    html_write(Stream, Data, Options).
 211
 212html_write(Stream, Data, Options) :-
 213    sgml_write(Stream, Data,
 214               [ dtd(html)
 215               | Options
 216               ]).
 217
 218fix_user_stream(user, user_output) :- !.
 219fix_user_stream(Stream, Stream).
 220
 221
 222init_state([], _).
 223init_state([H|T], State) :-
 224    update_state(H, State),
 225    init_state(T, State).
 226
 227update_state(dtd(DTD), State) :-
 228    !,
 229    (   atom(DTD)
 230    ->  dtd(DTD, DTDObj)
 231    ;   DTDObj = DTD
 232    ),
 233    set_state(State, dtd, DTDObj),
 234    dtd_character_entities(DTDObj, EntityMap),
 235    set_state(State, entity_map, EntityMap).
 236update_state(nsmap(Map), State) :-
 237    !,
 238    set_state(State, nsmap, Map).
 239update_state(cleanns(Bool), State) :-
 240    !,
 241    must_be(boolean, Bool),
 242    set_state(State, cleanns, Bool).
 243update_state(indent(Indent), State) :-
 244    !,
 245    must_be(integer, Indent),
 246    set_state(State, indent, Indent).
 247update_state(layout(Bool), State) :-
 248    !,
 249    must_be(boolean, Bool),
 250    set_state(State, layout, Bool).
 251update_state(doctype(_), _) :- !.
 252update_state(public(_),  _) :- !.
 253update_state(system(_),  _) :- !.
 254update_state(net(Bool), State) :-
 255    !,
 256    must_be(boolean, Bool),
 257    set_state(State, net, Bool).
 258update_state(header(Bool), _) :-
 259    !,
 260    must_be(boolean, Bool).
 261update_state(Option, _) :-
 262    domain_error(xml_write_option, Option).
 263
 264%       emit_xml_encoding(+Stream, +Options)
 265%
 266%       Emit the XML fileheader with   encoding information. Setting the
 267%       right encoding on the output stream  must be done before calling
 268%       xml_write/3.
 269
 270emit_xml_encoding(Out, Options) :-
 271    option(header(Hdr), Options, true),
 272    Hdr == true,
 273    !,
 274    stream_property(Out, encoding(Encoding)),
 275    (   (   Encoding == utf8
 276        ;   Encoding == wchar_t
 277        )
 278    ->  format(Out, '<?xml version="1.0" encoding="UTF-8"?>~n~n', [])
 279    ;   Encoding == iso_latin_1
 280    ->  format(Out, '<?xml version="1.0" encoding="ISO-8859-1"?>~n~n', [])
 281    ;   domain_error(xml_encoding, Encoding)
 282    ).
 283emit_xml_encoding(_, _).
 284
 285
 286%!  emit_doctype(+Options, +Data, +Stream)
 287%
 288%   Emit the document-type declaration.
 289%   There is a problem with the first clause if we are emitting SGML:
 290%   the SGML DTDs for HTML up to HTML 4 *do not allow* any 'version'
 291%   attribute; so the only time this is useful is when it is illegal!
 292
 293emit_doctype(_Options, Data, Out) :-
 294    (   Data = [_|_], memberchk(element(html,Att,_), Data)
 295    ;   Data = element(html,Att,_)
 296    ),
 297    memberchk(version=Version, Att),
 298    !,
 299    format(Out, '<!DOCTYPE HTML PUBLIC "~w">~n~n', [Version]).
 300emit_doctype(Options, Data, Out) :-
 301    (   memberchk(public(PubId), Options) -> true
 302    ;   PubId = (-)
 303    ),
 304    (   memberchk(system(SysId), Options) -> true
 305    ;   SysId = (-)
 306    ),
 307    \+ (PubId == (-),
 308        SysId == (-),
 309        \+ memberchk(doctype(_), Options)
 310    ),
 311    (   Data = element(DocType,_,_)
 312    ;   Data = [_|_], memberchk(element(DocType,_,_), Data)
 313    ;   memberchk(doctype(DocType), Options)
 314    ),
 315    !,
 316    write_doctype(Out, DocType, PubId, SysId).
 317emit_doctype(_, _, _).
 318
 319write_doctype(Out, DocType, -, -) :-
 320    !,
 321    format(Out, '<!DOCTYPE ~w []>~n~n', [DocType]).
 322write_doctype(Out, DocType, -, SysId) :-
 323    !,
 324    format(Out, '<!DOCTYPE ~w SYSTEM "~w">~n~n', [DocType,SysId]).
 325write_doctype(Out, DocType, PubId, -) :-
 326    !,
 327    format(Out, '<!DOCTYPE ~w PUBLIC "~w">~n~n', [DocType,PubId]).
 328write_doctype(Out, DocType, PubId, SysId) :-
 329    format(Out, '<!DOCTYPE ~w PUBLIC "~w" "~w">~n~n', [DocType,PubId,SysId]).
 330
 331
 332%!  emit(+Element, +Out, +State, +Options)
 333%
 334%   Emit a single element
 335
 336emit(Var, _, _) :-
 337    var(Var),
 338    !,
 339    instantiation_error(Var).
 340emit([], _, _) :- !.
 341emit([H|T], Out, State) :-
 342    !,
 343    emit(H, Out, State),
 344    emit(T, Out, State).
 345emit(CDATA, Out, State) :-
 346    atomic(CDATA),
 347    !,
 348    sgml_write_content(Out, CDATA, State).
 349emit(Element, Out, State) :-
 350    \+ \+ emit_element(Element, Out, State).
 351
 352emit_element(pi(PI), Out, State) :-
 353    !,
 354    get_state(State, entity_map, EntityMap),
 355    write(Out, <?),
 356    write_quoted(Out, PI, '', EntityMap),
 357    (   get_state(State, dialect, xml) ->
 358        write(Out, ?>)
 359    ;   write(Out, >)
 360    ).
 361emit_element(element(Name, Attributes, Content), Out, State) :-
 362    !,
 363    must_be(list, Attributes),
 364    must_be(list, Content),
 365    (   get_state(State, dialect, xml)
 366    ->  update_nsmap(Attributes, CleanAttrs, State),
 367        (   get_state(State, cleanns, true)
 368        ->  WriteAttrs = CleanAttrs
 369        ;   WriteAttrs = Attributes
 370        )
 371    ;   WriteAttrs = Attributes
 372    ),
 373    att_length(WriteAttrs, State, Alen),
 374    (   Alen > 60,
 375        get_state(State, layout, true)
 376    ->  Sep = nl,
 377        AttIndent = 4
 378    ;   Sep = sp,
 379        AttIndent = 0
 380    ),
 381    put_char(Out, '<'),
 382    emit_name(Name, Out, State),
 383    (   AttIndent > 0
 384    ->  \+ \+ ( inc_indent(State, AttIndent),
 385                attributes(WriteAttrs, Sep, Out, State)
 386              )
 387    ;   attributes(WriteAttrs, Sep, Out, State)
 388    ),
 389    content(Content, Out, Name, State).
 390emit_element(E, _, _) :-
 391    type_error(xml_dom, E).
 392
 393attributes([], _, _, _).
 394attributes([H|T], Sep, Out, State) :-
 395    (   Sep == nl
 396    ->  write_indent(State, Out)
 397    ;   put_char(Out, ' ')
 398    ),
 399    attribute(H, Out, State),
 400    attributes(T, Sep, Out, State).
 401
 402attribute(Name=Value, Out, State) :-
 403    emit_name(Name, Out, State),
 404    put_char(Out, =),
 405    sgml_write_attribute(Out, Value, State).
 406
 407att_length(Atts, State, Len) :-
 408    att_length(Atts, State, 0, Len).
 409
 410att_length([], _, Len, Len).
 411att_length([A0|T], State, Len0, Len) :-
 412    alen(A0, State, AL),
 413    Len1 is Len0 + 1 + AL,
 414    att_length(T, State, Len1, Len).
 415
 416alen(ns(NS, _URI):Name=Value, _State, Len) :-
 417    !,
 418    atom_length(Value, AL),
 419    vlen(Name, NL),
 420    atom_length(NS, NsL),
 421    Len is AL+NL+NsL+3.
 422alen(URI:Name=Value, State, Len) :-
 423    !,
 424    atom_length(Value, AL),
 425    vlen(Name, NL),
 426    get_state(State, nsmap, Nsmap),
 427    (   memberchk(NS=URI, Nsmap)
 428    ->  atom_length(NS, NsL)
 429    ;   atom_length(URI, NsL)
 430    ),
 431    Len is AL+NL+NsL+3.
 432alen(Name=Value, _, Len) :-
 433    atom_length(Name, NL),
 434    vlen(Value, AL),
 435    Len is AL+NL+3.
 436
 437vlen(Value, Len) :-
 438    is_list(Value),
 439    !,
 440    vlen_list(Value, 0, Len).
 441vlen(Value, Len) :-
 442    atom_length(Value, Len).
 443
 444vlen_list([], L, L).
 445vlen_list([H|T], L0, L) :-
 446    atom_length(H, HL),
 447    (   L0 == 0
 448    ->  L1 is L0 + HL
 449    ;   L1 is L0 + HL + 1
 450    ),
 451    vlen_list(T, L1, L).
 452
 453
 454emit_name(Name, Out, _) :-
 455    atom(Name),
 456    !,
 457    write(Out, Name).
 458emit_name(ns(NS,_URI):Name, Out, _State) :-
 459    !,
 460    (  NS == ''
 461    -> write(Out, Name)
 462    ;  format(Out, '~w:~w', [NS, Name])
 463    ).
 464emit_name(URI:Name, Out, State) :-
 465    get_state(State, nsmap, NSMap),
 466    memberchk(NS=URI, NSMap),
 467    !,
 468    (   NS == []
 469    ->  write(Out, Name)
 470    ;   format(Out, '~w:~w', [NS, Name])
 471    ).
 472emit_name(Term, Out, _) :-              % error?
 473    write(Out, Term).
 474
 475%!  update_nsmap(+Attributes, -Attributes1, !State)
 476%
 477%   Modify the nsmap of State to reflect modifications due to xmlns
 478%   arguments.
 479%
 480%   @arg    Attributes1 is a copy of Attributes with all redundant
 481%           namespace attributes deleted.
 482
 483update_nsmap(Attributes, Attributes1, State) :-
 484    get_state(State, nsmap, Map0),
 485    update_nsmap(Attributes, Attributes1, Map0, Map),
 486    set_state(State, nsmap, Map).
 487
 488update_nsmap([], [], Map, Map).
 489update_nsmap([xmlns:NS=URI|T], Attrs, Map0, Map) :-
 490    !,
 491    (   memberchk(NS=URI, Map0)
 492    ->  update_nsmap(T, Attrs, Map0, Map)
 493    ;   set_nsmap(NS, URI, Map0, Map1),
 494        Attrs = [xmlns:NS=URI|Attrs1],
 495        update_nsmap(T, Attrs1, Map1, Map)
 496    ).
 497update_nsmap([xmlns=URI|T], Attrs, Map0, Map) :-
 498    !,
 499    (   memberchk([]=URI, Map0)
 500    ->  update_nsmap(T, Attrs, Map0, Map)
 501    ;   set_nsmap([], URI, Map0, Map1),
 502        Attrs = [xmlns=URI|Attrs1],
 503        update_nsmap(T, Attrs1, Map1, Map)
 504    ).
 505update_nsmap([H|T0], [H|T], Map0, Map) :-
 506    !,
 507    update_nsmap(T0, T, Map0, Map).
 508
 509set_nsmap(NS, URI, Map0, Map) :-
 510    select(NS=_, Map0, Map1),
 511    !,
 512    Map = [NS=URI|Map1].
 513set_nsmap(NS, URI, Map, [NS=URI|Map]).
 514
 515
 516%!  content(+Content, +Out, +Element, +State, +Options)
 517%
 518%   Emit the content part of a structure  as well as the termination
 519%   for the content. For empty content   we have three versions: XML
 520%   style '/>', SGML declared EMPTY element (nothing) or normal SGML
 521%   element (we must close with the same element name).
 522
 523content([], Out, Element, State) :-    % empty element
 524    !,
 525    (   get_state(State, net, true)
 526    ->  (   get_state(State, dialect, xml) ->
 527            write(Out, />)
 528        ;   empty_element(State, Element) ->
 529            write(Out, >)
 530        ;   write(Out, //)
 531        )
 532    ;/* get_state(State, net, false) */
 533        write(Out, >),
 534        (   get_state(State, dialect, sgml),
 535            empty_element(State, Element)
 536        ->  true
 537        ;   emit_close(Element, Out, State)
 538        )
 539    ).
 540content([CDATA], Out, Element, State) :-
 541    atomic(CDATA),
 542    !,
 543    (   get_state(State, dialect, sgml),
 544        get_state(State, net, true),
 545        \+ sub_atom(CDATA, _, _, _, /),
 546        write_length(CDATA, Len, []),
 547        Len < 20
 548    ->  write(Out, /),
 549        sgml_write_content(Out, CDATA, State),
 550        write(Out, /)
 551    ;   verbatim_element(Element, State)
 552    ->  write(Out, >),
 553        write(Out, CDATA),
 554        emit_close(Element, Out, State)
 555    ;/* XML or not NET */
 556            write(Out, >),
 557        sgml_write_content(Out, CDATA, State),
 558        emit_close(Element, Out, State)
 559    ).
 560content(Content, Out, Element, State) :-
 561    get_state(State, layout, true),
 562    /* If xml:space='preserve' is present, */
 563        /* we MUST NOT tamper with white space at all. */
 564        \+ (Element = element(_,Atts,_),
 565        memberchk('xml:space'=preserve, Atts)
 566    ),
 567    element_content(Content, Elements),
 568    !,
 569    format(Out, >, []),
 570    \+ \+ (
 571        inc_indent(State),
 572        write_element_content(Elements, Out, State)
 573    ),
 574    write_indent(State, Out),
 575    emit_close(Element, Out, State).
 576content(Content, Out, Element, State) :-
 577    format(Out, >, []),
 578    write_mixed_content(Content, Out, Element, State),
 579    emit_close(Element, Out, State).
 580
 581verbatim_element(Element, State) :-
 582    verbatim_element(Element),
 583    get_state(State, dtd, DTD),
 584    DTD \== (-),
 585    dtd_property(DTD, doctype(html)).
 586
 587verbatim_element(script).
 588verbatim_element(style).
 589
 590emit_close(Element, Out, State) :-
 591    write(Out, '</'),
 592    emit_name(Element, Out, State),
 593    write(Out, '>').
 594
 595
 596write_mixed_content([], _, _, _).
 597write_mixed_content([H|T], Out, Element, State) :-
 598    write_mixed_content_element(H, Out, State),
 599    write_mixed_content(T, Out, Element, State).
 600
 601write_mixed_content_element(H, Out, State) :-
 602    (   atom(H)
 603    ->  sgml_write_content(Out, H, State)
 604    ;   string(H)
 605    ->  sgml_write_content(Out, H, State)
 606    ;   functor(H, element, 3)
 607    ->  emit(H, Out, State)
 608    ;   functor(H, pi, 1)
 609    ->  emit(H, Out, State)
 610    ;   var(H)
 611    ->  instantiation_error(H)
 612    ;   H = sdata(Data)             % cannot be written without entity!
 613    ->  print_message(warning, sgml_write(sdata_as_cdata(Data))),
 614        sgml_write_content(Out, Data, State)
 615    ;   type_error(sgml_content, H)
 616    ).
 617
 618
 619element_content([], []).
 620element_content([element(Name,Atts,C)|T0], [element(Name,Atts,C)|T]) :-
 621    !,
 622    element_content(T0, T).
 623element_content([Blank|T0], T) :-
 624    atom(Blank),
 625    atom_codes(Blank, Codes),
 626    all_blanks(Codes),
 627    element_content(T0, T).
 628
 629all_blanks([]).
 630all_blanks([H|T]) :-
 631    code_type(H, space),
 632    all_blanks(T).
 633
 634write_element_content([], _, _).
 635write_element_content([H|T], Out, State) :-
 636    write_indent(State, Out),
 637    emit(H, Out, State),
 638    write_element_content(T, Out, State).
 639
 640
 641                 /*******************************
 642                 *           NAMESPACES         *
 643                 *******************************/
 644
 645%!  add_missing_namespaces(+DOM0, +NsMap, -DOM)
 646%
 647%   Add xmlns:NS=URI definitions to the toplevel element(s) to
 648%   deal with missing namespaces.
 649
 650add_missing_namespaces([], _, []) :- !.
 651add_missing_namespaces([H0|T0], Def, [H|T]) :-
 652    !,
 653    add_missing_namespaces(H0, Def, H),
 654    add_missing_namespaces(T0, Def, T).
 655add_missing_namespaces(Elem0, Def, Elem) :-
 656    Elem0 = element(Name, Atts0, Content),
 657    !,
 658    missing_namespaces(Elem0, Def, Missing),
 659    (   Missing == []
 660    ->  Elem = Elem0
 661    ;   add_missing_ns(Missing, Atts0, Atts),
 662        Elem = element(Name, Atts, Content)
 663    ).
 664add_missing_namespaces(DOM, _, DOM).    % CDATA, etc.
 665
 666add_missing_ns([], Atts, Atts).
 667add_missing_ns([H|T], Atts0, Atts) :-
 668    generate_ns(H, NS),
 669    add_missing_ns(T, [xmlns:NS=H|Atts0], Atts).
 670
 671%!  generate_ns(+URI, -NS) is det.
 672%
 673%   Generate a namespace (NS) identifier for URI.
 674
 675generate_ns(URI, NS) :-
 676    xmlns(NS, URI),
 677    !.
 678generate_ns(URI, NS) :-
 679    default_ns(URI, NS),
 680    !.
 681generate_ns(_, NS) :-
 682    gensym(xns, NS).
 683
 684%!  xmlns(?NS, ?URI) is nondet.
 685%
 686%   Hook to define human readable  abbreviations for XML namespaces.
 687%   xml_write/3 tries these locations:
 688%
 689%     1. This hook
 690%     2. Defaults (see below)
 691%     3. rdf_db:ns/2 for RDF-DB integration
 692%
 693%   Default XML namespaces are:
 694%
 695%     | xsi    | http://www.w3.org/2001/XMLSchema-instance |
 696%     | xs     | http://www.w3.org/2001/XMLSchema          |
 697%     | xhtml  | http://www.w3.org/1999/xhtml              |
 698%     | soap11 | http://schemas.xmlsoap.org/soap/envelope/ |
 699%     | soap12 | http://www.w3.org/2003/05/soap-envelope   |
 700%
 701%   @see xml_write/2, rdf_register_ns/2.
 702
 703:- multifile
 704    rdf_db:ns/2.
 705
 706default_ns('http://www.w3.org/2001/XMLSchema-instance', xsi).
 707default_ns('http://www.w3.org/2001/XMLSchema',          xs).
 708default_ns('http://www.w3.org/1999/xhtml',              xhtml).
 709default_ns('http://schemas.xmlsoap.org/soap/envelope/', soap11).
 710default_ns('http://www.w3.org/2003/05/soap-envelope',   soap12).
 711default_ns(URI, NS) :-
 712    rdf_db:ns(NS, URI).
 713
 714%!  missing_namespaces(+DOM, +NSMap, -Missing)
 715%
 716%   Return a list of URIs appearing in DOM that are not covered
 717%   by xmlns definitions.
 718
 719missing_namespaces(DOM, Defined, Missing) :-
 720    missing_namespaces(DOM, Defined, [], Missing).
 721
 722missing_namespaces([], _, L, L) :- !.
 723missing_namespaces([H|T], Def, L0, L) :-
 724    !,
 725    missing_namespaces(H, Def, L0, L1),
 726    missing_namespaces(T, Def, L1, L).
 727missing_namespaces(element(Name, Atts, Content), Def, L0, L) :-
 728    !,
 729    update_nsmap(Atts, _, Def, Def1),
 730    missing_ns(Name, Def1, L0, L1),
 731    missing_att_ns(Atts, Def1, L1, L2),
 732    missing_namespaces(Content, Def1, L2, L).
 733missing_namespaces(_, _, L, L).
 734
 735missing_att_ns([], _, M, M).
 736missing_att_ns([Name=_|T], Def, M0, M) :-
 737    missing_ns(Name, Def, M0, M1),
 738    missing_att_ns(T, Def, M1, M).
 739
 740missing_ns(ns(NS, URI):_, Def, M0, M) :-
 741    !,
 742    (  memberchk(NS=URI, Def)
 743    -> M = M0
 744    ;  NS == ''
 745    -> M = M0
 746    ;  M = [URI|M0]
 747    ).
 748missing_ns(URI:_, Def, M0, M) :-
 749    !,
 750    (   (   memberchk(_=URI, Def)
 751        ;   memberchk(URI, M0)
 752        ;   URI = xml               % predefined ones
 753        ;   URI = xmlns
 754        )
 755    ->  M = M0
 756    ;   M = [URI|M0]
 757    ).
 758missing_ns(_, _, M, M).
 759
 760                 /*******************************
 761                 *         QUOTED WRITE         *
 762                 *******************************/
 763
 764sgml_write_attribute(Out, Values, State) :-
 765    is_list(Values),
 766    !,
 767    get_state(State, entity_map, EntityMap),
 768    put_char(Out, '"'),
 769    write_quoted_list(Values, Out, '"<&\r\n\t', EntityMap),
 770    put_char(Out, '"').
 771sgml_write_attribute(Out, Value, State) :-
 772    is_text(Value),
 773    !,
 774    get_state(State, entity_map, EntityMap),
 775    put_char(Out, '"'),
 776    write_quoted(Out, Value, '"<&\r\n\t', EntityMap),
 777    put_char(Out, '"').
 778sgml_write_attribute(Out, Value, _State) :-
 779    number(Value),
 780    !,
 781    format(Out, '"~w"', [Value]).
 782sgml_write_attribute(_, Value, _) :-
 783    type_error(sgml_attribute_value, Value).
 784
 785write_quoted_list([], _, _, _).
 786write_quoted_list([H|T], Out, Escape, EntityMap) :-
 787    write_quoted(Out, H, Escape, EntityMap),
 788    (   T == []
 789    ->  true
 790    ;   put_char(Out, ' '),
 791        write_quoted_list(T, Out, Escape, EntityMap)
 792    ).
 793
 794
 795sgml_write_content(Out, Value, State) :-
 796    is_text(Value),
 797    !,
 798    get_state(State, entity_map, EntityMap),
 799    write_quoted(Out, Value, '<&>\r', EntityMap).
 800sgml_write_content(Out, Value, _) :-
 801    write(Out, Value).
 802
 803is_text(Value) :- atom(Value), !.
 804is_text(Value) :- string(Value), !.
 805
 806write_quoted(Out, Atom, Escape, EntityMap) :-
 807    atom(Atom),
 808    !,
 809    atom_codes(Atom, Codes),
 810    writeq(Codes, Out, Escape, EntityMap).
 811write_quoted(Out, String, Escape, EntityMap) :-
 812    string(String),
 813    !,
 814    string_codes(String, Codes),
 815    writeq(Codes, Out, Escape, EntityMap).
 816write_quoted(_, String, _, _) :-
 817    type_error(atom_or_string, String).
 818
 819
 820%!  writeq(+Text:codes, +Out:stream, +Escape:atom, +Escape:assoc) is det.
 821
 822writeq([], _, _, _).
 823writeq([H|T], Out, Escape, EntityMap) :-
 824    (   char_code(HC, H),
 825        sub_atom(Escape, _, _, _, HC)
 826    ->  write_entity(H, Out, EntityMap)
 827    ;   H >= 256
 828    ->  (   stream_property(Out, encoding(Enc)),
 829            unicode_encoding(Enc)
 830        ->  put_code(Out, H)
 831        ;   write_entity(H, Out, EntityMap)
 832        )
 833    ;   put_code(Out, H)
 834    ),
 835    writeq(T, Out, Escape, EntityMap).
 836
 837unicode_encoding(utf8).
 838unicode_encoding(wchar_t).
 839unicode_encoding(unicode_le).
 840unicode_encoding(unicode_be).
 841
 842write_entity(Code, Out, EntityMap) :-
 843    (   get_assoc(Code, EntityMap, EntityName)
 844    ->  format(Out, '&~w;', [EntityName])
 845    ;   format(Out, '&#x~16R;', [Code])
 846    ).
 847
 848
 849                 /*******************************
 850                 *          INDENTATION         *
 851                 *******************************/
 852
 853write_initial_indent(State, Out) :-
 854    (   get_state(State, indent, Indent),
 855        Indent > 0
 856    ->  emit_indent(Indent, Out)
 857    ;   true
 858    ).
 859
 860write_indent(State, _) :-
 861    get_state(State, layout, false),
 862    !.
 863write_indent(State, Out) :-
 864    get_state(State, indent, Indent),
 865    emit_indent(Indent, Out).
 866
 867emit_indent(Indent, Out) :-
 868    Tabs is Indent // 8,
 869    Spaces is Indent mod 8,
 870    format(Out, '~N', []),
 871    write_n(Tabs, '\t', Out),
 872    write_n(Spaces, ' ', Out).
 873
 874write_n(N, Char, Out) :-
 875    (   N > 0
 876    ->  put_char(Out, Char),
 877        N2 is N - 1,
 878        write_n(N2, Char, Out)
 879    ;   true
 880    ).
 881
 882inc_indent(State) :-
 883    inc_indent(State, 2).
 884
 885inc_indent(State, Inc) :-
 886    state(indent, Arg),
 887    arg(Arg, State, I0),
 888    I is I0 + Inc,
 889    setarg(Arg, State, I).
 890
 891
 892                 /*******************************
 893                 *         DTD HANDLING         *
 894                 *******************************/
 895
 896%!  empty_element(+State, +Element)
 897%
 898%   True if Element is declared  with   EMPTY  content in the (SGML)
 899%   DTD.
 900
 901empty_element(State, Element) :-
 902    get_state(State, dtd, DTD),
 903    DTD \== (-),
 904    dtd_property(DTD, element(Element, _, empty)).
 905
 906%!  dtd_character_entities(+DTD, -Map)
 907%
 908%   Return an assoc mapping character entities   to their name. Note
 909%   that the entity representation is a bit dubious. Entities should
 910%   allow for a wide-character version and avoid the &#..; trick.
 911
 912dtd_character_entities(DTD, Map) :-
 913    empty_assoc(Empty),
 914    dtd_property(DTD, entities(Entities)),
 915    fill_entity_map(Entities, DTD, Empty, Map).
 916
 917fill_entity_map([], _, Map, Map).
 918fill_entity_map([H|T], DTD, Map0, Map) :-
 919    (   dtd_property(DTD, entity(H, CharEntity)),
 920        atom(CharEntity),
 921        (   sub_atom(CharEntity, 0, _, _, '&#'),
 922            sub_atom(CharEntity, _, _, 0, ';')
 923        ->  sub_atom(CharEntity, 2, _, 1, Name),
 924            atom_number(Name, Code)
 925        ;   atom_length(CharEntity, 1),
 926            char_code(CharEntity, Code)
 927        )
 928    ->  put_assoc(Code, Map0, H, Map1),
 929        fill_entity_map(T, DTD, Map1, Map)
 930    ;   fill_entity_map(T, DTD, Map0, Map)
 931    ).
 932
 933
 934
 935                 /*******************************
 936                 *            FIELDS            *
 937                 *******************************/
 938
 939state(indent,     1).                   % current indentation
 940state(layout,     2).                   % use layout (true/false)
 941state(dtd,        3).                   % DTD for entity names
 942state(entity_map, 4).                   % compiled entity-map
 943state(dialect,    5).                   % xml/sgml
 944state(nsmap,      6).                   % defined namespaces
 945state(net,        7).                   % Should null end-tags be used?
 946state(cleanns,    8).                   % Remove duplicate xmlns declarations
 947
 948new_state(Dialect,
 949    state(
 950        0,              % indent
 951        true,           % layout
 952        -,              % DTD
 953        EntityMap,      % entity_map
 954        Dialect,        % dialect
 955        [],             % NS=Full map
 956        Net,            % Null End-Tags?
 957        true            % Remove duplicate xmlns declarations
 958    )) :-
 959    (   Dialect == sgml
 960    ->  Net = false,
 961        empty_assoc(EntityMap)
 962    ;   Net = true,
 963        xml_entities(EntityMap)
 964    ).
 965
 966get_state(State, Field, Value) :-
 967    state(Field, Arg),
 968    arg(Arg, State, Value).
 969
 970set_state(State, Field, Value) :-
 971    state(Field, Arg),
 972    setarg(Arg, State, Value).
 973
 974term_expansion(xml_entities(map),
 975               xml_entities(Map)) :-
 976    list_to_assoc([ 0'< - lt,
 977                    0'& - amp,
 978                    0'> - gt,
 979                    0'\' - apos,
 980                    0'\" - quot
 981                  ], Map).
 982xml_entities(map).
 983
 984                 /*******************************
 985                 *            MESSAGES          *
 986                 *******************************/
 987
 988:- multifile
 989    prolog:message/3.
 990
 991prolog:message(sgml_write(sdata_as_cdata(Data))) -->
 992    [ 'SGML-write: emitting SDATA as CDATA: "~p"'-[Data] ].