View source with raw comments or as raw
   1/*  Part of SWI-Prolog
   2
   3    Author:        Jan Wielemaker and Matt Lilley
   4    E-mail:        J.Wielemaker@vu.nl
   5    WWW:           http://www.swi-prolog.org
   6    Copyright (c)  2016, CWI Amsterdam
   7    All rights reserved.
   8
   9    Redistribution and use in source and binary forms, with or without
  10    modification, are permitted provided that the following conditions
  11    are met:
  12
  13    1. Redistributions of source code must retain the above copyright
  14       notice, this list of conditions and the following disclaimer.
  15
  16    2. Redistributions in binary form must reproduce the above copyright
  17       notice, this list of conditions and the following disclaimer in
  18       the documentation and/or other materials provided with the
  19       distribution.
  20
  21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  32    POSSIBILITY OF SUCH DAMAGE.
  33*/
  34
  35:- module(c14n2,
  36          [ xml_write_canonical/3       % +Stream, +Term, +Options
  37          ]).
  38:- use_module(library(error)).
  39:- use_module(library(option)).
  40:- use_module(library(sgml_write)).
  41:- use_module(library(dicts)).
  42:- use_module(library(ordsets)).
  43:- use_module(library(apply)).
  44:- use_module(library(lists)).
  45
  46/** <module> C14n2 canonical XML documents
  47
  48C14n2 specifies a _canonical_ XML document.   This library writes such a
  49document from an XML DOM as returned by   the  XML (or SGML) parser. The
  50process takes two steps:
  51
  52  - Normalise the DOM
  53  - Call xml_write/3 with appropriate flags
  54*/
  55
  56%!  xml_write_canonical(+Stream, +DOM, +Options) is det.
  57%
  58%   Write an XML DOM using the   canonical conventions as defined by
  59%   C14n2. Namespace declarations in the   canonical document depend
  60%   on the original namespace  declarations.   For  this  reason the
  61%   input document must be parsed   (see load_structure/3) using the
  62%   dialect `xmlns` and the option keep_prefix(true).
  63
  64xml_write_canonical(Stream, DOM, Options) :-
  65    option(method(Method), Options, 'http://www.w3.org/TR/2001/REC-xml-c14n-20010315'),
  66    xml_canonical_dom(DOM, CDOM, xml{in_ns:ns{}, out_ns:ns{}, is_root:true, method:Method}),
  67    xml_write(Stream, CDOM,
  68              [ header(false),
  69                layout(false),
  70                net(false)
  71              ]).
  72
  73xml_canonical_dom(Var, _, _) :-
  74    var(Var),
  75    !,
  76    instantiation_error(Var).
  77xml_canonical_dom(DOM, CDOM, Options) :-
  78    is_list(DOM),
  79    !,
  80    xml_canonical_list(DOM, CDOM, Options).
  81xml_canonical_dom(element( Name,  Attrs,  Content),
  82                  element(CName, CAttrs, CContent),
  83                  Options) :-
  84    !,
  85    InNS0  = Options.in_ns,
  86    OutNS0 = Options.out_ns,
  87    Method = Options.method,
  88    take_ns(Attrs, Method, Name, Attrs1, InNS0, InNS),
  89    partition(has_ns, Attrs1, AttrsWithNS0, AttrsSans0),
  90    sort(1, @<, AttrsWithNS0, AttrsWithNS1),
  91    sort(1, @<, AttrsSans0, AttrsSans),
  92    put_elemns(Name, CName, InNS, OutNS0, OutNS1, KillDefault),
  93    put_ns_attrs(AttrsWithNS1, AttrsWithNS, InNS, OutNS1, OutNS),
  94    ns_attrs(OutNS0, OutNS, NSAttrs),
  95    (  Options.is_root == true ->
  96           (  select(xmlns=DefaultNamespace, NSAttrs, NSAttrs0)
  97              % If there is a default namespace, it must come first, and I dont think sort/4 can sort on two keys at once
  98           -> findall(xmlns:NS=URI, member(xmlns:NS=URI, Attrs), RootNSAttrs, NSAttrs0),
  99              sort(2, @=<, RootNSAttrs, RootNSAttrs0),
 100              RootNSAttrs1 = [xmlns=DefaultNamespace|RootNSAttrs0]
 101           ;  Method == 'http://www.w3.org/2001/10/xml-exc-c14n#'
 102           -> RootNSAttrs1 = NSAttrs
 103           ;  findall(xmlns:NS=URI, member(xmlns:NS=URI, Attrs), RootNSAttrs, NSAttrs),
 104              sort(2, @<, RootNSAttrs, RootNSAttrs1)
 105           ),
 106           append([KillDefault, RootNSAttrs1, AttrsSans, AttrsWithNS], CAttrs)
 107    ;  append([KillDefault, NSAttrs, AttrsSans, AttrsWithNS], CAttrs)
 108    ),
 109    must_be(list, Content),
 110    xml_canonical_list(Content, CContent,
 111                       Options.put(_{in_ns:InNS, out_ns:OutNS, is_root:false})).
 112xml_canonical_dom(CDATA, CDATA, _) :-
 113    atomic(CDATA).
 114
 115has_ns(_NS:_Name=_Value).
 116
 117xml_canonical_list([], [], _).
 118xml_canonical_list([H0|T0], [H|T], Options) :-
 119    xml_canonical_dom(H0, H, Options),
 120    xml_canonical_list(T0, T, Options).
 121
 122take_ns([], _, _, [], NSList, NSList).
 123take_ns([H|T0], Method, Name, T, NSList0, NSList) :-
 124    xml_ns(H, NS, URL),
 125    !,
 126    (  include_ns(Name, Method, NS, URL)
 127    -> take_ns(T0, Method, Name, T, NSList0.put(NS, URL), NSList)
 128    ;  take_ns(T0, Method, Name, T, NSList0, NSList)
 129    ).
 130take_ns([H|T0], Method, Name, [H|T], NSList0, NSList) :-
 131    take_ns(T0, Method, Name, T, NSList0, NSList).
 132
 133include_ns(ns(Prefix, URI):_, 'http://www.w3.org/2001/10/xml-exc-c14n#', Prefix, URI):- !.
 134include_ns(_, 'http://www.w3.org/TR/2001/REC-xml-c14n-20010315', _, _):- !.
 135
 136
 137put_ns_attrs([], [], _, OutNS, OutNS).
 138put_ns_attrs([Name=Value|T0], [CName=Value|T], InNS, OutNS0, OutNS) :-
 139    put_ns(Name, CName, InNS, OutNS0, OutNS1),
 140    put_ns_attrs(T0, T, InNS, OutNS1, OutNS).
 141
 142put_elemns(Name, Name, _InNS, OutNS0, OutNS1, [xmlns='']) :-
 143    atom(Name),
 144    dict_pairs(OutNS0, _, Pairs),
 145    memberchk(URL-'', Pairs),
 146    !,
 147    del_dict(URL, OutNS0, '', OutNS1).
 148put_elemns(Name, CName, InNS, OutNS0, OutNS, []) :-
 149    put_ns(Name, CName, InNS, OutNS0, OutNS).
 150
 151put_ns(ns(NS, URL):Name, CName, _InNS, OutNS, OutNS) :-
 152    get_dict(URL, OutNS, NS),
 153    !,
 154    make_cname(NS:Name, CName).
 155put_ns(ns(NS, URL):Name, CName, _InNS, OutNS0, OutNS) :-
 156    !,
 157    make_cname(NS:Name, CName),
 158    OutNS = OutNS0.put(URL, NS).
 159put_ns(URL:Name, CName, _InNS, OutNS, OutNS) :-
 160    get_dict(URL, OutNS, NS),
 161    !,
 162    make_cname(NS:Name, CName).
 163put_ns(URL:Name, CName, InNS, OutNS0, OutNS) :-
 164    dict_pairs(InNS, _, Pairs),
 165    memberchk(NS-URL, Pairs),
 166    !,
 167    make_cname(NS:Name, CName),
 168    OutNS = OutNS0.put(URL, NS).
 169put_ns(Name, Name, _, OutNS, OutNS).
 170
 171ns_attrs(OutNS, OutNS, []) :- !.
 172ns_attrs(OutNS0, OutNS, NSAttrs) :-
 173    !,
 174    dict_keys(OutNS, URLs),
 175    dict_keys(OutNS0, URLs0),
 176    ord_subtract(URLs, URLs0, NewURLs),
 177    maplist(ns_attr(OutNS), NewURLs, NSAttrs0),
 178    sort(NSAttrs0, NSAttrs).
 179
 180ns_attr(Dict, URL, NSAttr) :-
 181    ns_simplify(xmlns:Dict.URL=URL, NSAttr).
 182
 183ns_simplify(xmlns:''=URL, xmlns=URL) :- !.
 184ns_simplify(xmlns:NS=URL, XMLNS=URL) :-
 185    make_cname(xmlns:NS, XMLNS).
 186
 187xml_ns(xmlns=URL, '', URL) :- !.
 188xml_ns(xmlns:NS=URL, NS, URL) :- !.
 189xml_ns(Name=URL, NS, URL) :-
 190    atom(Name),
 191    atom_concat('xmlns:', NS, Name).
 192
 193make_cname('':Name, Name) :- !.
 194make_cname(NS:Name, CName) :-
 195    atomic_list_concat([NS,Name], :, CName).