View source with raw comments or as raw
   1/*  Part of SWI-Prolog
   2
   3    Author:        Jan Wielemaker
   4    E-mail:        J.Wielemaker@vu.nl
   5    WWW:           http://www.swi-prolog.org
   6    Copyright (c)  2013, VU University Amsterdam
   7    All rights reserved.
   8
   9    Redistribution and use in source and binary forms, with or without
  10    modification, are permitted provided that the following conditions
  11    are met:
  12
  13    1. Redistributions of source code must retain the above copyright
  14       notice, this list of conditions and the following disclaimer.
  15
  16    2. Redistributions in binary form must reproduce the above copyright
  17       notice, this list of conditions and the following disclaimer in
  18       the documentation and/or other materials provided with the
  19       distribution.
  20
  21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  32    POSSIBILITY OF SUCH DAMAGE.
  33*/
  34
  35:- module(yadis,
  36          [ xrds_dom/2,                 % +URI, -XRDS_DOM
  37            xrds_location/2             % +Xid, -URL
  38          ]).
  39:- use_module(library(http/http_open)).
  40:- use_module(library(xpath)).
  41:- use_module(library(uri)).
  42:- use_module(library(sgml)).
  43
  44/** <module> Yadis discovery
  45
  46@see http://en.wikipedia.org/wiki/Yadis
  47*/
  48
  49:- multifile
  50    xrds_specified_location/2.
  51
  52%!  xrds_dom(+Id, -XRDS_DOM) is det.
  53%
  54%   True when XRDS_DOM is  a  parsed   XML  document  for  the given
  55%   resource.
  56
  57xrds_dom(Xid, XRDS_DOM) :-
  58    xrds_location(Xid, XRDSLocation),
  59    xrds_load(XRDSLocation, XRDS_DOM).
  60
  61%!  xid_normalize(+OpenID, -URL) is det.
  62%
  63%   Translate the user-specified  OpenID  agent   into  a  URL. This
  64%   follows appendix A.1. (Normalization), RFC3986).
  65%
  66%   @tbd This does not implement XRI identifiers.
  67
  68xid_normalize(Xid, URL) :-
  69    add_component(scheme, Xid, URL0, http),
  70    add_component(path,   URL0, URL, /).
  71
  72add_component(Field, URL0, URL, Default) :-
  73    uri_components(URL0, Comp),
  74    uri_data(Field, Comp, Value),
  75    (   var(Value)
  76    ->  (   Field == scheme
  77        ->  atomic_list_concat([Default, '://', URL0], URL)
  78        ;   Value = Default,
  79            uri_components(URL, Comp)
  80        )
  81    ;   Field == path,
  82        Value = ''
  83    ->  uri_data(path, Comp, Default, Comp2),
  84        uri_components(URL, Comp2)
  85    ;   URL = URL0
  86    ).
  87
  88
  89%!  xrds_location(+Id, -XRDSLocation) is semidet.
  90%
  91%   Discover the location of the XRDS document from the given Id.
  92
  93xrds_location(Xid, XRDSLocation) :-
  94    xid_normalize(Xid, URL),
  95    (   xrds_specified_location(URL, XRDSLocation)
  96    ->  XRDSLocation \== (-)
  97    ;   catch(xrds_location_direct(URL, XRDSLocation),
  98              E, yadis_failed(E))
  99    ->  true
 100    ;   catch(xrds_location_html(URL, XRDSLocation),
 101              E, yadis_failed(E))
 102    ).
 103
 104yadis_failed(E) :-
 105    (   debugging(yadis)
 106    ->  print_message(warning, E)
 107    ;   true
 108    ),
 109    fail.
 110
 111xrds_location_direct(URL, XRDSLocation) :-
 112    setup_call_cleanup(
 113        http_open(URL, In,
 114                  [ method(head),
 115                    request_header(accept='application/xrds+xml'),
 116                    header(x_xrds_location, Reply),
 117                    cert_verify_hook(ssl_verify)
 118                  ]),
 119        true,
 120        close(In)),
 121    Reply \== '',
 122    !,
 123    XRDSLocation = Reply.
 124
 125xrds_location_html(URL, XRDSLocation) :-
 126    setup_call_cleanup(
 127        http_open(URL, In,
 128                  [ cert_verify_hook(ssl_verify)
 129                  ]),
 130        html_head_dom(In, DOM),
 131        close(In)),
 132    xpath(DOM, meta(@'http-equiv'=Equiv, @content), Content),
 133    downcase_atom(Equiv, 'x-xrds-location'),
 134    !,
 135    XRDSLocation = Content.
 136
 137%!  xrds_load(+XRDSLocation, -XRDS_DOM) is det.
 138%
 139%   Parse the XRDS document at XRDSLocation.
 140
 141xrds_load(XRDSLocation, XRDS_DOM) :-
 142    setup_call_cleanup(
 143        http_open(XRDSLocation, In,
 144                  [ request_header(accept='application/xrds+xml'),
 145                    cert_verify_hook(ssl_verify)
 146                  ]),
 147        load_structure(In, XRDS_DOM,
 148                       [ dialect(xmlns),
 149                         space(remove)
 150                       ]),
 151        close(In)).
 152
 153:- public ssl_verify/5.
 154
 155%!  ssl_verify(+SSL, +ProblemCert, +AllCerts, +FirstCert, +Error)
 156%
 157%   Accept all certificates.
 158
 159ssl_verify(_SSL,
 160           _ProblemCertificate, _AllCertificates, _FirstCertificate,
 161           _Error).
 162
 163
 164%!  html_head_dom(+Stream, -HeadDOM) is semidet.
 165%
 166%   Extract the HTML head content from   the  given stream. Does not
 167%   parse the remainder of the document.
 168
 169:- thread_local
 170    html_head_dom/1.
 171
 172html_head_dom(Stream, HeadDOM) :-
 173    dtd(html, DTD),
 174    new_sgml_parser(Parser, [dtd(DTD)]),
 175    call_cleanup(
 176        sgml_parse(Parser,
 177                   [ source(Stream),
 178                     syntax_errors(quiet),
 179                     call(begin, on_begin)
 180                   ]),
 181        free_sgml_parser(Parser)),
 182    retract(html_head_dom(HeadDOM)).
 183
 184on_begin(head, Attrs, Parser) :-
 185    sgml_parse(Parser,
 186               [ document(DOM),
 187                 parse(content)
 188               ]),
 189    asserta(html_head_dom(element(head, Attrs, DOM))).
 190
 191%!  xrds_specified_location(+URL, -XRDSLocation) is nondet.
 192%
 193%   Hook that allows for specifying locations of XRDS documents. For
 194%   example, Google does not reply to   Yadis discovery messages. We
 195%   can fake it does using:
 196%
 197%     ==
 198%     yadis:xrds_specified_location('http://google.com/',
 199%                                   'https://www.google.com/accounts/o8/id').
 200%     ==
 201%
 202%   If this hook succeeds with XRDSLocation bound to `-` (minus), we
 203%   assume there is no XRDS document associated to URL.  This can be
 204%   used to avoid retrieving misleading or broken XRDS documents.