View source with raw comments or as raw
   1/*  Part of SWI-Prolog
   2
   3    Author:        Jan Wielemaker
   4    E-mail:        J.Wielemaker@vu.nl
   5    WWW:           http://www.swi-prolog.org
   6    Copyright (c)  2002-2016, University of Amsterdam
   7                              VU University Amsterdam
   8    All rights reserved.
   9
  10    Redistribution and use in source and binary forms, with or without
  11    modification, are permitted provided that the following conditions
  12    are met:
  13
  14    1. Redistributions of source code must retain the above copyright
  15       notice, this list of conditions and the following disclaimer.
  16
  17    2. Redistributions in binary form must reproduce the above copyright
  18       notice, this list of conditions and the following disclaimer in
  19       the documentation and/or other materials provided with the
  20       distribution.
  21
  22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33    POSSIBILITY OF SUCH DAMAGE.
  34*/
  35
  36:- module(http_open,
  37          [ http_open/3,                % +URL, -Stream, +Options
  38            http_set_authorization/2,   % +URL, +Authorization
  39            http_close_keep_alive/1     % +Address
  40          ]).
  41:- use_module(library(uri)).
  42:- use_module(library(readutil)).
  43:- use_module(library(socket)).
  44:- use_module(library(lists)).
  45:- use_module(library(option)).
  46:- use_module(library(error)).
  47:- use_module(library(base64)).
  48:- use_module(library(debug)).
  49:- use_module(library(aggregate)).
  50:- use_module(library(apply)).
  51:- use_module(library(http/http_header), [http_parse_header/2]).
  52:- use_module(library(http/http_stream)).
  53
  54/** <module> HTTP client library
  55
  56This library defines http_open/3, which opens a  URL as a Prolog stream.
  57The functionality of the  library  can   be  extended  by  loading two
  58additional modules that act as plugins:
  59
  60    * library(http/http_ssl_plugin)
  61    Loading this library causes http_open/3 to handle HTTPS connections.
  62    Relevant options for SSL certificate handling are handed to
  63    ssl_context/3. This plugin is loaded automatically if the scheme
  64    `https` is requested using a default SSL context. See the plugin for
  65    additional information regarding security.
  66
  67    * library(http/http_cookie)
  68    Loading this library adds tracking cookies to http_open/3. Returned
  69    cookies are collected in the Prolog database and supplied for
  70    subsequent requests.
  71
  72Here is a simple example to fetch a web-page:
  73
  74  ==
  75  ?- http_open('http://www.google.com/search?q=prolog', In, []),
  76     copy_stream_data(In, user_output),
  77     close(In).
  78  <!doctype html><head><title>prolog - Google Search</title><script>
  79  ...
  80  ==
  81
  82The example below fetches the modification time of a web-page. Note that
  83Modified is '' (the empty atom)  if   the  web-server does not provide a
  84time-stamp for the resource. See also parse_time/2.
  85
  86  ==
  87  modified(URL, Stamp) :-
  88          http_open(URL, In,
  89                    [ method(head),
  90                      header(last_modified, Modified)
  91                    ]),
  92          close(In),
  93          Modified \== '',
  94          parse_time(Modified, Stamp).
  95  ==
  96
  97Then next example uses Google search. It exploits library(uri) to manage
  98URIs, library(sgml) to load  an  HTML   document  and  library(xpath) to
  99navigate the parsed HTML. Note that  you   may  need to adjust the XPath
 100queries if the data returned by Google changes.
 101
 102  ==
 103  :- use_module(library(http/http_open)).
 104  :- use_module(library(xpath)).
 105  :- use_module(library(sgml)).
 106  :- use_module(library(uri)).
 107
 108  google(For, Title, HREF) :-
 109          uri_encoded(query_value, For, Encoded),
 110          atom_concat('http://www.google.com/search?q=', Encoded, URL),
 111          http_open(URL, In, []),
 112          call_cleanup(
 113              load_html(In, DOM, []),
 114              close(In)),
 115          xpath(DOM, //h3(@class=r), Result),
 116          xpath(Result, //a(@href=HREF0, text), Title),
 117          uri_components(HREF0, Components),
 118          uri_data(search, Components, Query),
 119          uri_query_components(Query, Parts),
 120          memberchk(q=HREF, Parts).
 121  ==
 122
 123An example query is below:
 124
 125==
 126?- google(prolog, Title, HREF).
 127Title = 'SWI-Prolog',
 128HREF = 'http://www.swi-prolog.org/' ;
 129Title = 'Prolog - Wikipedia',
 130HREF = 'https://nl.wikipedia.org/wiki/Prolog' ;
 131Title = 'Prolog - Wikipedia, the free encyclopedia',
 132HREF = 'https://en.wikipedia.org/wiki/Prolog' ;
 133Title = 'Pro-Log is logistiek dienstverlener m.b.t. vervoer over water.',
 134HREF = 'http://www.pro-log.nl/' ;
 135Title = 'Learn Prolog Now!',
 136HREF = 'http://www.learnprolognow.org/' ;
 137Title = 'Free Online Version - Learn Prolog
 138...
 139==
 140
 141@see load_html/3 and xpath/3 can be used to parse and navigate HTML
 142     documents.
 143@see http_get/3 and http_post/4 provide an alternative interface that
 144     convert the reply depending on the =|Content-Type|= header.
 145*/
 146
 147:- multifile
 148    http:encoding_filter/3,           % +Encoding, +In0, -In
 149    http:current_transfer_encoding/1, % ?Encoding
 150    http:disable_encoding_filter/1,   % +ContentType
 151    http:http_protocol_hook/5,        % +Protocol, +Parts, +StreamPair,
 152                                      % -NewStreamPair, +Options
 153    http:open_options/2,              % +Parts, -Options
 154    http:write_cookies/3,             % +Out, +Parts, +Options
 155    http:update_cookies/3,            % +CookieLine, +Parts, +Options
 156    http:authenticate_client/2,       % +URL, +Action
 157    http:http_connection_over_proxy/6.
 158
 159:- meta_predicate
 160    http_open(+,-,:).
 161
 162:- predicate_options(http_open/3, 3,
 163                     [ authorization(compound),
 164                       final_url(-atom),
 165                       header(+atom, -atom),
 166                       headers(-list),
 167                       connection(+atom),
 168                       method(oneof([delete,get,put,head,post,patch,options])),
 169                       size(-integer),
 170                       status_code(-integer),
 171                       output(-stream),
 172                       timeout(number),
 173                       proxy(atom, integer),
 174                       proxy_authorization(compound),
 175                       bypass_proxy(boolean),
 176                       request_header(any),
 177                       user_agent(atom),
 178                       version(-compound),
 179        % The option below applies if library(http/http_header) is loaded
 180                       post(any),
 181        % The options below apply if library(http/http_ssl_plugin)) is loaded
 182                       pem_password_hook(callable),
 183                       cacert_file(atom),
 184                       cert_verify_hook(callable)
 185                     ]).
 186
 187%!  user_agent(-Agent) is det.
 188%
 189%   Default value for =|User-Agent|=,  can   be  overruled using the
 190%   option user_agent(Agent) of http_open/3.
 191
 192user_agent('SWI-Prolog').
 193
 194%!  http_open(+URL, -Stream, +Options) is det.
 195%
 196%   Open the data at the HTTP  server   as  a  Prolog stream. URL is
 197%   either an atom  specifying  a  URL   or  a  list  representing a
 198%   broken-down  URL  as  specified  below.   After  this  predicate
 199%   succeeds the data can be read from Stream. After completion this
 200%   stream must be  closed  using   the  built-in  Prolog  predicate
 201%   close/1. Options provides additional options:
 202%
 203%     * authenticate(+Boolean)
 204%     If `false` (default `true`), do _not_ try to automatically
 205%     authenticate the client if a 401 (Unauthorized) status code
 206%     is received.
 207%
 208%     * authorization(+Term)
 209%     Send authorization. See also http_set_authorization/2. Supported
 210%     schemes:
 211%
 212%       - basic(+User, +Password)
 213%       HTTP Basic authentication.
 214%       - bearer(+Token)
 215%       HTTP Bearer authentication.
 216%       - digest(+User, +Password)
 217%       HTTP Digest authentication.  This option is only provided
 218%       if the plugin library(http/http_digest) is also loaded.
 219%
 220%     * connection(+Connection)
 221%     Specify the =Connection= header.  Default is =close=.  The
 222%     alternative is =|Keep-alive|=.  This maintains a pool of
 223%     available connections as determined by keep_connection/1.
 224%     The library(http/websockets) uses =|Keep-alive, Upgrade|=.
 225%     Keep-alive connections can be closed explicitly using
 226%     http_close_keep_alive/1. Keep-alive connections may
 227%     significantly improve repetitive requests on the same server,
 228%     especially if the IP route is long, HTTPS is used or the
 229%     connection uses a proxy.
 230%
 231%     * final_url(-FinalURL)
 232%     Unify FinalURL with the final   destination. This differs from
 233%     the  original  URL  if  the  returned  head  of  the  original
 234%     indicates an HTTP redirect (codes 301,  302 or 303). Without a
 235%     redirect, FinalURL is the same as URL if  URL is an atom, or a
 236%     URL constructed from the parts.
 237%
 238%     * header(Name, -AtomValue)
 239%     If provided, AtomValue is  unified  with   the  value  of  the
 240%     indicated  field  in  the  reply    header.  Name  is  matched
 241%     case-insensitive and the underscore  (_)   matches  the hyphen
 242%     (-). Multiple of these options  may   be  provided  to extract
 243%     multiple  header  fields.  If  the  header  is  not  available
 244%     AtomValue is unified to the empty atom ('').
 245%
 246%     * headers(-List)
 247%     If provided, List is unified with  a list of Name(Value) pairs
 248%     corresponding to fields in the reply   header.  Name and Value
 249%     follow the same conventions  used   by  the header(Name,Value)
 250%     option.
 251%
 252%     * method(+Method)
 253%     One of =get= (default), =head=, =delete=, =post=,   =put=   or
 254%     =patch=.
 255%     The  =head= message can be
 256%     used in combination with  the   header(Name,  Value) option to
 257%     access information on the resource   without actually fetching
 258%     the resource itself.  The  returned   stream  must  be  closed
 259%     immediately.
 260%
 261%     If post(Data) is provided, the default is =post=.
 262%
 263%     * size(-Size)
 264%     Size is unified with the   integer value of =|Content-Length|=
 265%     in the reply header.
 266%
 267%     * version(-Version)
 268%     Version is a _pair_ `Major-Minor`, where `Major` and `Minor`
 269%     are integers representing the HTTP version in the reply header.
 270%
 271%     * range(+Range)
 272%     Ask for partial content. Range   is  a term _|Unit(From,To)|_,
 273%     where `From` is an integer and `To`   is  either an integer or
 274%     the atom `end`. HTTP 1.1 only   supports Unit = `bytes`. E.g.,
 275%     to   ask   for    bytes    1000-1999,     use    the    option
 276%     range(bytes(1000,1999))
 277%
 278%     * redirect(+Boolean)
 279%     If `false` (default `true`), do _not_ automatically redirect
 280%     if a 3XX code is received.  Must be combined with
 281%     status_code(Code) and one of the header options to read the
 282%     redirect reply. In particular, without status_code(Code) a
 283%     redirect is mapped to an exception.
 284%
 285%     * status_code(-Code)
 286%     If this option is  present  and   Code  unifies  with the HTTP
 287%     status code, do *not* translate errors (4xx, 5xx) into an
 288%     exception. Instead, http_open/3 behaves as if 200 (success) is
 289%     returned, providing the application to read the error document
 290%     from the returned stream.
 291%
 292%     * output(-Out)
 293%     Unify the output stream with Out and do not close it. This can
 294%     be used to upgrade a connection.
 295%
 296%     * timeout(+Timeout)
 297%     If provided, set a timeout on   the stream using set_stream/2.
 298%     With this option if no new data arrives within Timeout seconds
 299%     the stream raises an exception.  Default   is  to wait forever
 300%     (=infinite=).
 301%
 302%     * post(+Data)
 303%     Issue a =POST= request on the HTTP server.  Data is
 304%     handed to http_post_data/3.
 305%
 306%     * proxy(+Host:Port)
 307%     Use an HTTP proxy to connect to the outside world.  See also
 308%     socket:proxy_for_url/3.  This option overrules the proxy
 309%     specification defined by socket:proxy_for_url/3.
 310%
 311%     * proxy(+Host, +Port)
 312%     Synonym for proxy(+Host:Port).  Deprecated.
 313%
 314%     * proxy_authorization(+Authorization)
 315%     Send authorization to the proxy.  Otherwise   the  same as the
 316%     =authorization= option.
 317%
 318%     * bypass_proxy(+Boolean)
 319%     If =true=, bypass proxy hooks.  Default is =false=.
 320%
 321%     * request_header(Name = Value)
 322%     Additional  name-value  parts  are  added   in  the  order  of
 323%     appearance to the HTTP request   header.  No interpretation is
 324%     done.
 325%
 326%     * max_redirect(+Max)
 327%     Sets the maximum length of a redirection chain.  This is needed
 328%     for some IRIs that redirect indefinitely to other IRIs without
 329%     looping (e.g., redirecting to IRIs with a random element in them).
 330%     Max must be either a non-negative integer or the atom `infinite`.
 331%     The default value is `10`.
 332%
 333%     * user_agent(+Agent)
 334%     Defines the value of the  =|User-Agent|=   field  of  the HTTP
 335%     header. Default is =SWI-Prolog=.
 336%
 337%   The hook http:open_options/2 can  be   used  to  provide default
 338%   options   based   on   the   broken-down     URL.   The   option
 339%   status_code(-Code)  is  particularly  useful   to  query  *REST*
 340%   interfaces that commonly return status   codes  other than `200`
 341%   that need to be be processed by the client code.
 342%
 343%   @param URL is either an atom or string (url) or a list of _parts_.
 344
 345%               When provided, this list may contain the fields
 346%               =scheme=, =user=, =password=, =host=, =port=, =path=
 347%               and either =query_string= (whose argument is an atom)
 348%               or =search= (whose argument is a list of
 349%               =|Name(Value)|= or =|Name=Value|= compound terms).
 350%               Only =host= is mandatory.  The example below opens the
 351%               URL =|http://www.example.com/my/path?q=Hello%20World&lang=en|=.
 352%               Note that values must *not* be quoted because the
 353%               library inserts the required quotes.
 354%
 355%               ==
 356%               http_open([ host('www.example.com'),
 357%                           path('/my/path'),
 358%                           search([ q='Hello world',
 359%                                    lang=en
 360%                                  ])
 361%                         ])
 362%               ==
 363%
 364%       @error existence_error(url, Id)
 365%       @see ssl_context/3 for SSL related options if
 366%       library(http/http_ssl_plugin) is loaded.
 367
 368:- multifile
 369    socket:proxy_for_url/3.           % +URL, +Host, -ProxyList
 370
 371http_open(URL, Stream, QOptions) :-
 372    meta_options(is_meta, QOptions, Options),
 373    (   atomic(URL)
 374    ->  parse_url_ex(URL, Parts)
 375    ;   Parts = URL
 376    ),
 377    autoload_https(Parts),
 378    add_authorization(Parts, Options, Options1),
 379    findall(HostOptions,
 380            http:open_options(Parts, HostOptions),
 381            AllHostOptions),
 382    foldl(merge_options_rev, AllHostOptions, Options1, Options2),
 383    (   option(bypass_proxy(true), Options)
 384    ->  try_http_proxy(direct, Parts, Stream, Options2)
 385    ;   term_variables(Options2, Vars2),
 386        findall(Result-Vars2,
 387                try_a_proxy(Parts, Result, Options2),
 388                ResultList),
 389        last(ResultList, Status-Vars2)
 390    ->  (   Status = true(_Proxy, Stream)
 391        ->  true
 392        ;   throw(error(proxy_error(tried(ResultList)), _))
 393        )
 394    ;   try_http_proxy(direct, Parts, Stream, Options2)
 395    ).
 396
 397try_a_proxy(Parts, Result, Options) :-
 398    parts_uri(Parts, AtomicURL),
 399    option(host(Host), Parts),
 400    (   (   option(proxy(ProxyHost:ProxyPort), Options)
 401        ;   is_list(Options),
 402            memberchk(proxy(ProxyHost,ProxyPort), Options)
 403        )
 404    ->  Proxy = proxy(ProxyHost, ProxyPort)
 405    ;   socket:proxy_for_url(AtomicURL, Host, Proxy)
 406    ),
 407    debug(http(proxy),
 408          'http_open: Connecting via ~w to ~w', [Proxy, AtomicURL]),
 409    (   catch(try_http_proxy(Proxy, Parts, Stream, Options), E, true)
 410    ->  (   var(E)
 411        ->  !, Result = true(Proxy, Stream)
 412        ;   Result = error(Proxy, E)
 413        )
 414    ;   Result = false(Proxy)
 415    ),
 416    debug(http(proxy), 'http_open: ~w: ~p', [Proxy, Result]).
 417
 418try_http_proxy(Method, Parts, Stream, Options0) :-
 419    option(host(Host), Parts),
 420    (   Method == direct
 421    ->  parts_request_uri(Parts, RequestURI)
 422    ;   parts_uri(Parts, RequestURI)
 423    ),
 424    select_option(visited(Visited0), Options0, OptionsV, []),
 425    Options = [visited([Parts|Visited0])|OptionsV],
 426    parts_scheme(Parts, Scheme),
 427    default_port(Scheme, DefPort),
 428    url_part(port(Port), Parts, DefPort),
 429    host_and_port(Host, DefPort, Port, HostPort),
 430    (   option(connection(Connection), Options0),
 431        keep_alive(Connection),
 432        get_from_pool(Host:Port, StreamPair),
 433        debug(http(connection), 'Trying Keep-alive to ~p using ~p',
 434              [ Host:Port, StreamPair ]),
 435        catch(send_rec_header(StreamPair, Stream, HostPort,
 436                              RequestURI, Parts, Options),
 437              error(E,_),
 438              keep_alive_error(E))
 439    ->  true
 440    ;   http:http_connection_over_proxy(Method, Parts, Host:Port,
 441                                        SocketStreamPair, Options, Options1),
 442        (   catch(http:http_protocol_hook(Scheme, Parts,
 443                                          SocketStreamPair,
 444                                          StreamPair, Options),
 445                  Error,
 446                  ( close(SocketStreamPair, [force(true)]),
 447                    throw(Error)))
 448        ->  true
 449        ;   StreamPair = SocketStreamPair
 450        ),
 451        send_rec_header(StreamPair, Stream, HostPort,
 452                        RequestURI, Parts, Options1)
 453    ),
 454    return_final_url(Options).
 455
 456http:http_connection_over_proxy(direct, _, Host:Port,
 457                                StreamPair, Options, Options) :-
 458    !,
 459    open_socket(Host:Port, StreamPair, Options).
 460http:http_connection_over_proxy(proxy(ProxyHost, ProxyPort), Parts, _,
 461                                StreamPair, Options, Options) :-
 462    \+ ( memberchk(scheme(Scheme), Parts),
 463         secure_scheme(Scheme)
 464       ),
 465    !,
 466    % We do not want any /more/ proxy after this
 467    open_socket(ProxyHost:ProxyPort, StreamPair,
 468                [bypass_proxy(true)|Options]).
 469http:http_connection_over_proxy(socks(SocksHost, SocksPort), _Parts, Host:Port,
 470                                StreamPair, Options, Options) :-
 471    !,
 472    tcp_connect(SocksHost:SocksPort, StreamPair, [bypass_proxy(true)]),
 473    catch(negotiate_socks_connection(Host:Port, StreamPair),
 474          Error,
 475          ( close(StreamPair, [force(true)]),
 476            throw(Error)
 477          )).
 478
 479
 480merge_options_rev(Old, New, Merged) :-
 481    merge_options(New, Old, Merged).
 482
 483is_meta(pem_password_hook).             % SSL plugin callbacks
 484is_meta(cert_verify_hook).
 485
 486
 487http:http_protocol_hook(http, _, StreamPair, StreamPair, _).
 488
 489default_port(https, 443) :- !.
 490default_port(wss,   443) :- !.
 491default_port(_,     80).
 492
 493host_and_port(Host, DefPort, DefPort, Host) :- !.
 494host_and_port(Host, _,       Port,    Host:Port).
 495
 496%!  autoload_https(+Parts) is det.
 497%
 498%   If the requested scheme is https or wss, load the HTTPS plugin.
 499
 500autoload_https(Parts) :-
 501    memberchk(scheme(S), Parts),
 502    secure_scheme(S),
 503    \+ clause(http:http_protocol_hook(S, _, StreamPair, StreamPair, _),_),
 504    exists_source(library(http/http_ssl_plugin)),
 505    !,
 506    use_module(library(http/http_ssl_plugin)).
 507autoload_https(_).
 508
 509secure_scheme(https).
 510secure_scheme(wss).
 511
 512%!  send_rec_header(+StreamPair, -Stream,
 513%!                  +Host, +RequestURI, +Parts, +Options) is det.
 514%
 515%   Send header to Out and process reply.  If there is an error or
 516%   failure, close In and Out and return the error or failure.
 517
 518send_rec_header(StreamPair, Stream, Host, RequestURI, Parts, Options) :-
 519    (   catch(guarded_send_rec_header(StreamPair, Stream,
 520                                      Host, RequestURI, Parts, Options),
 521              E, true)
 522    ->  (   var(E)
 523        ->  (   option(output(StreamPair), Options)
 524            ->  true
 525            ;   true
 526            )
 527        ;   close(StreamPair, [force(true)]),
 528            throw(E)
 529        )
 530    ;   close(StreamPair, [force(true)]),
 531        fail
 532    ).
 533
 534guarded_send_rec_header(StreamPair, Stream, Host, RequestURI, Parts, Options) :-
 535    user_agent(Agent, Options),
 536    method(Options, MNAME),
 537    http_version(Version),
 538    option(connection(Connection), Options, close),
 539    debug(http(send_request), "> ~w ~w HTTP/~w", [MNAME, RequestURI, Version]),
 540    debug(http(send_request), "> Host: ~w", [Host]),
 541    debug(http(send_request), "> User-Agent: ~w", [Agent]),
 542    debug(http(send_request), "> Connection: ~w", [Connection]),
 543    format(StreamPair,
 544           '~w ~w HTTP/~w\r\n\c
 545               Host: ~w\r\n\c
 546               User-Agent: ~w\r\n\c
 547               Connection: ~w\r\n',
 548           [MNAME, RequestURI, Version, Host, Agent, Connection]),
 549    parts_uri(Parts, URI),
 550    x_headers(Options, URI, StreamPair),
 551    write_cookies(StreamPair, Parts, Options),
 552    (   option(post(PostData), Options)
 553    ->  http_header:http_post_data(PostData, StreamPair, [])
 554    ;   format(StreamPair, '\r\n', [])
 555    ),
 556    flush_output(StreamPair),
 557                                    % read the reply header
 558    read_header(StreamPair, Parts, ReplyVersion, Code, Comment, Lines),
 559    update_cookies(Lines, Parts, Options),
 560    do_open(ReplyVersion, Code, Comment, Lines, Options, Parts, Host,
 561            StreamPair, Stream).
 562
 563
 564%!  http_version(-Version:atom) is det.
 565%
 566%   HTTP version we publish. We  can  only   use  1.1  if we support
 567%   chunked encoding.
 568
 569http_version('1.1') :-
 570    http:current_transfer_encoding(chunked),
 571    !.
 572http_version('1.0').
 573
 574method(Options, MNAME) :-
 575    option(post(_), Options),
 576    !,
 577    option(method(M), Options, post),
 578    (   map_method(M, MNAME0)
 579    ->  MNAME = MNAME0
 580    ;   domain_error(method, M)
 581    ).
 582method(Options, MNAME) :-
 583    option(method(M), Options, get),
 584    (   map_method(M, MNAME0)
 585    ->  MNAME = MNAME0
 586    ;   map_method(_, M)
 587    ->  MNAME = M
 588    ;   domain_error(method, M)
 589    ).
 590
 591map_method(delete,  'DELETE').
 592map_method(get,     'GET').
 593map_method(head,    'HEAD').
 594map_method(post,    'POST').
 595map_method(put,     'PUT').
 596map_method(patch,   'PATCH').
 597map_method(options, 'OPTIONS').
 598
 599%!  x_headers(+Options, +URI, +Out) is det.
 600%
 601%   Emit extra headers from   request_header(Name=Value)  options in
 602%   Options.
 603%
 604%   @tbd Use user/password fields
 605
 606x_headers(Options, URI, Out) :-
 607    x_headers_(Options, [url(URI)|Options], Out).
 608
 609x_headers_([], _, _).
 610x_headers_([H|T], Options, Out) :-
 611    x_header(H, Options, Out),
 612    x_headers_(T, Options, Out).
 613
 614x_header(request_header(Name=Value), _, Out) :-
 615    !,
 616    debug(http(send_request), "> ~w: ~w", [Name, Value]),
 617    format(Out, '~w: ~w\r\n', [Name, Value]).
 618x_header(proxy_authorization(ProxyAuthorization), Options, Out) :-
 619    !,
 620    auth_header(ProxyAuthorization, Options, 'Proxy-Authorization', Out).
 621x_header(authorization(Authorization), Options, Out) :-
 622    !,
 623    auth_header(Authorization, Options, 'Authorization', Out).
 624x_header(range(Spec), _, Out) :-
 625    !,
 626    Spec =.. [Unit, From, To],
 627    (   To == end
 628    ->  ToT = ''
 629    ;   must_be(integer, To),
 630        ToT = To
 631    ),
 632    debug(http(send_request), "> Range: ~w=~d-~w", [Unit, From, ToT]),
 633    format(Out, 'Range: ~w=~d-~w\r\n', [Unit, From, ToT]).
 634x_header(_, _, _).
 635
 636%!  auth_header(+AuthOption, +Options, +HeaderName, +Out)
 637
 638auth_header(basic(User, Password), _, Header, Out) :-
 639    !,
 640    format(codes(Codes), '~w:~w', [User, Password]),
 641    phrase(base64(Codes), Base64Codes),
 642    debug(http(send_request), "> ~w: Basic ~s", [Header, Base64Codes]),
 643    format(Out, '~w: Basic ~s\r\n', [Header, Base64Codes]).
 644auth_header(bearer(Token), _, Header, Out) :-
 645    !,
 646    debug(http(send_request), "> ~w: Bearer ~w", [Header,Token]),
 647    format(Out, '~w: Bearer ~w\r\n', [Header, Token]).
 648auth_header(Auth, Options, _, Out) :-
 649    option(url(URL), Options),
 650    add_method(Options, Options1),
 651    http:authenticate_client(URL, send_auth_header(Auth, Out, Options1)),
 652    !.
 653auth_header(Auth, _, _, _) :-
 654    domain_error(authorization, Auth).
 655
 656user_agent(Agent, Options) :-
 657    (   option(user_agent(Agent), Options)
 658    ->  true
 659    ;   user_agent(Agent)
 660    ).
 661
 662add_method(Options0, Options) :-
 663    option(method(_), Options0),
 664    !,
 665    Options = Options0.
 666add_method(Options0, Options) :-
 667    option(post(_), Options0),
 668    !,
 669    Options = [method(post)|Options0].
 670add_method(Options0, [method(get)|Options0]).
 671
 672
 673%!  do_open(+HTTPVersion, +HTTPStatusCode, +HTTPStatusComment, +Header,
 674%!          +Options, +Parts, +Host, +In, -FinalIn) is det.
 675%
 676%   Handle the HTTP status. If 200, we   are ok. If a redirect, redo
 677%   the open, returning a new stream. Else issue an error.
 678%
 679%   @error  existence_error(url, URL)
 680
 681                                        % Redirections
 682do_open(_, Code, _, Lines, Options0, Parts, _, In, Stream) :-
 683    redirect_code(Code),
 684    option(redirect(true), Options0, true),
 685    location(Lines, RequestURI),
 686    !,
 687    debug(http(redirect), 'http_open: redirecting to ~w', [RequestURI]),
 688    close(In),
 689    parts_uri(Parts, Base),
 690    uri_resolve(RequestURI, Base, Redirected),
 691    parse_url_ex(Redirected, RedirectedParts),
 692    (   redirect_limit_exceeded(Options0, Max)
 693    ->  format(atom(Comment), 'max_redirect (~w) limit exceeded', [Max]),
 694        throw(error(permission_error(redirect, http, Redirected),
 695                    context(_, Comment)))
 696    ;   redirect_loop(RedirectedParts, Options0)
 697    ->  throw(error(permission_error(redirect, http, Redirected),
 698                    context(_, 'Redirection loop')))
 699    ;   true
 700    ),
 701    redirect_options(Options0, Options),
 702    http_open(RedirectedParts, Stream, Options).
 703                                        % Need authentication
 704do_open(_Version, Code, _Comment, Lines, Options0, Parts, _Host, In0, Stream) :-
 705    authenticate_code(Code),
 706    option(authenticate(true), Options0, true),
 707    parts_uri(Parts, URI),
 708    parse_headers(Lines, Headers),
 709    http:authenticate_client(
 710             URI,
 711             auth_reponse(Headers, Options0, Options)),
 712    !,
 713    close(In0),
 714    http_open(Parts, Stream, Options).
 715                                        % Accepted codes
 716do_open(Version, Code, _, Lines, Options, Parts, Host, In0, In) :-
 717    (   option(status_code(Code), Options),
 718        Lines \== []
 719    ->  true
 720    ;   Code == 200
 721    ),
 722    !,
 723    parts_uri(Parts, URI),
 724    parse_headers(Lines, Headers),
 725    return_version(Options, Version),
 726    return_size(Options, Headers),
 727    return_fields(Options, Headers),
 728    return_headers(Options, Headers),
 729    consider_keep_alive(Lines, Parts, Host, In0, In1, Options),
 730    transfer_encoding_filter(Lines, In1, In),
 731                                    % properly re-initialise the stream
 732    set_stream(In, file_name(URI)),
 733    set_stream(In, record_position(true)).
 734do_open(_, _, _, [], Options, _, _, _, _) :-
 735    option(connection(Connection), Options),
 736    keep_alive(Connection),
 737    !,
 738    throw(error(keep_alive(closed),_)).
 739                                        % report anything else as error
 740do_open(_Version, Code, Comment, _,  _, Parts, _, _, _) :-
 741    parts_uri(Parts, URI),
 742    (   map_error_code(Code, Error)
 743    ->  Formal =.. [Error, url, URI]
 744    ;   Formal = existence_error(url, URI)
 745    ),
 746    throw(error(Formal, context(_, status(Code, Comment)))).
 747
 748
 749%!  redirect_limit_exceeded(+Options:list(compound), -Max:nonneg) is semidet.
 750%
 751%   True if we have exceeded the maximum redirection length (default 10).
 752
 753redirect_limit_exceeded(Options, Max) :-
 754    option(visited(Visited), Options, []),
 755    length(Visited, N),
 756    option(max_redirect(Max), Options, 10),
 757    (Max == infinite -> fail ; N > Max).
 758
 759
 760%!  redirect_loop(+Parts, +Options) is semidet.
 761%
 762%   True if we are in  a  redirection   loop.  Note  that some sites
 763%   redirect once to the same place using  cookies or similar, so we
 764%   allow for two tries. In fact,   we  should probably test whether
 765%   authorization or cookie headers have changed.
 766
 767redirect_loop(Parts, Options) :-
 768    option(visited(Visited), Options, []),
 769    include(==(Parts), Visited, Same),
 770    length(Same, Count),
 771    Count > 2.
 772
 773
 774%!  redirect_options(+Options0, -Options) is det.
 775%
 776%   A redirect from a POST should do a GET on the returned URI. This
 777%   means we must remove  the   method(post)  and post(Data) options
 778%   from the original option-list.
 779
 780redirect_options(Options0, Options) :-
 781    (   select_option(post(_), Options0, Options1)
 782    ->  true
 783    ;   Options1 = Options0
 784    ),
 785    (   select_option(method(Method), Options1, Options),
 786        \+ redirect_method(Method)
 787    ->  true
 788    ;   Options = Options1
 789    ).
 790
 791redirect_method(delete).
 792redirect_method(get).
 793redirect_method(head).
 794
 795
 796%!  map_error_code(+HTTPCode, -PrologError) is semidet.
 797%
 798%   Map HTTP error codes to Prolog errors.
 799%
 800%   @tbd    Many more maps. Unfortunately many have no sensible Prolog
 801%           counterpart.
 802
 803map_error_code(401, permission_error).
 804map_error_code(403, permission_error).
 805map_error_code(404, existence_error).
 806map_error_code(405, permission_error).
 807map_error_code(407, permission_error).
 808map_error_code(410, existence_error).
 809
 810redirect_code(301).                     % moved permanently
 811redirect_code(302).                     % moved temporary
 812redirect_code(303).                     % see also
 813
 814authenticate_code(401).
 815
 816%!  open_socket(+Address, -StreamPair, +Options) is det.
 817%
 818%   Create and connect a client socket to Address.  Options
 819%
 820%       * timeout(+Timeout)
 821%       Sets timeout on the stream, *after* connecting the
 822%       socket.
 823%
 824%   @tbd    Make timeout also work on tcp_connect/4.
 825%   @tbd    This is the same as do_connect/4 in http_client.pl
 826
 827open_socket(Address, StreamPair, Options) :-
 828    debug(http(open), 'http_open: Connecting to ~p ...', [Address]),
 829    tcp_connect(Address, StreamPair, Options),
 830    stream_pair(StreamPair, In, Out),
 831    debug(http(open), '\tok ~p ---> ~p', [In, Out]),
 832    set_stream(In, record_position(false)),
 833    (   option(timeout(Timeout), Options)
 834    ->  set_stream(In, timeout(Timeout))
 835    ;   true
 836    ).
 837
 838
 839return_version(Options, Major-Minor) :-
 840    option(version(Major-Minor), Options, _).
 841
 842return_size(Options, Headers) :-
 843    (   memberchk(content_length(Size), Headers)
 844    ->  option(size(Size), Options, _)
 845    ;   true
 846    ).
 847
 848return_fields([], _).
 849return_fields([header(Name, Value)|T], Headers) :-
 850    !,
 851    (   Term =.. [Name,Value],
 852        memberchk(Term, Headers)
 853    ->  true
 854    ;   Value = ''
 855    ),
 856    return_fields(T, Headers).
 857return_fields([_|T], Lines) :-
 858    return_fields(T, Lines).
 859
 860return_headers(Options, Headers) :-
 861    option(headers(Headers), Options, _).
 862
 863%!  parse_headers(+Lines, -Headers:list(compound)) is det.
 864%
 865%   Parse the header lines for   the  headers(-List) option. Invalid
 866%   header   lines   are   skipped,   printing   a   warning   using
 867%   pring_message/2.
 868
 869parse_headers([], []) :- !.
 870parse_headers([Line|Lines], Headers) :-
 871    catch(http_parse_header(Line, [Header]), Error, true),
 872    (   var(Error)
 873    ->  Headers = [Header|More]
 874    ;   print_message(warning, Error),
 875        Headers = More
 876    ),
 877    parse_headers(Lines, More).
 878
 879
 880%!  return_final_url(+Options) is semidet.
 881%
 882%   If Options contains final_url(URL), unify URL with the final
 883%   URL after redirections.
 884
 885return_final_url(Options) :-
 886    option(final_url(URL), Options),
 887    var(URL),
 888    !,
 889    option(visited([Parts|_]), Options),
 890    parts_uri(Parts, URL).
 891return_final_url(_).
 892
 893
 894%!  transfer_encoding_filter(+Lines, +In0, -In) is det.
 895%
 896%   Install filters depending on the transfer  encoding. If In0 is a
 897%   stream-pair, we close the output   side. If transfer-encoding is
 898%   not specified, the content-encoding is  interpreted as a synonym
 899%   for transfer-encoding, because many   servers incorrectly depend
 900%   on  this.  Exceptions  to  this   are  content-types  for  which
 901%   disable_encoding_filter/1 holds.
 902
 903transfer_encoding_filter(Lines, In0, In) :-
 904    transfer_encoding(Lines, Encoding),
 905    !,
 906    transfer_encoding_filter_(Encoding, In0, In).
 907transfer_encoding_filter(Lines, In0, In) :-
 908    content_encoding(Lines, Encoding),
 909    content_type(Lines, Type),
 910    \+ http:disable_encoding_filter(Type),
 911    !,
 912    transfer_encoding_filter_(Encoding, In0, In).
 913transfer_encoding_filter(_, In, In).
 914
 915transfer_encoding_filter_(Encoding, In0, In) :-
 916    stream_pair(In0, In1, Out),
 917    (   nonvar(Out)
 918    ->  close(Out)
 919    ;   true
 920    ),
 921    (   http:encoding_filter(Encoding, In1, In)
 922    ->  true
 923    ;   domain_error(http_encoding, Encoding)
 924    ).
 925
 926content_type(Lines, Type) :-
 927    member(Line, Lines),
 928    phrase(field('content-type'), Line, Rest),
 929    !,
 930    atom_codes(Type, Rest).
 931
 932%!  http:disable_encoding_filter(+ContentType) is semidet.
 933%
 934%   Do not use  the   =|Content-encoding|=  as =|Transfer-encoding|=
 935%   encoding for specific values of   ContentType. This predicate is
 936%   multifile and can thus be extended by the user.
 937
 938http:disable_encoding_filter('application/x-gzip').
 939http:disable_encoding_filter('application/x-tar').
 940http:disable_encoding_filter('x-world/x-vrml').
 941http:disable_encoding_filter('application/zip').
 942http:disable_encoding_filter('application/x-gzip').
 943http:disable_encoding_filter('application/x-zip-compressed').
 944http:disable_encoding_filter('application/x-compress').
 945http:disable_encoding_filter('application/x-compressed').
 946http:disable_encoding_filter('application/x-spoon').
 947
 948%!  transfer_encoding(+Lines, -Encoding) is semidet.
 949%
 950%   True if Encoding  is  the   value  of  the =|Transfer-encoding|=
 951%   header.
 952
 953transfer_encoding(Lines, Encoding) :-
 954    what_encoding(transfer_encoding, Lines, Encoding).
 955
 956what_encoding(What, Lines, Encoding) :-
 957    member(Line, Lines),
 958    phrase(encoding_(What, Debug), Line, Rest),
 959    !,
 960    atom_codes(Encoding, Rest),
 961    debug(http(What), '~w: ~p', [Debug, Rest]).
 962
 963encoding_(content_encoding, 'Content-encoding') -->
 964    field('content-encoding').
 965encoding_(transfer_encoding, 'Transfer-encoding') -->
 966    field('transfer-encoding').
 967
 968%!  content_encoding(+Lines, -Encoding) is semidet.
 969%
 970%   True if Encoding is the value of the =|Content-encoding|=
 971%   header.
 972
 973content_encoding(Lines, Encoding) :-
 974    what_encoding(content_encoding, Lines, Encoding).
 975
 976%!  read_header(+In:stream, +Parts, -Version, -Code:int,
 977%!  -Comment:atom, -Lines:list) is det.
 978%
 979%   Read the HTTP reply-header.  If the reply is completely empty
 980%   an existence error is thrown.  If the replied header is
 981%   otherwise invalid a 500 HTTP error is simulated, having the
 982%   comment =|Invalid reply header|=.
 983%
 984%   @param Parts    A list of compound terms that describe the
 985%                   parsed request URI.
 986%   @param Version  HTTP reply version as Major-Minor pair
 987%   @param Code     Numeric HTTP reply-code
 988%   @param Comment  Comment of reply-code as atom
 989%   @param Lines    Remaining header lines as code-lists.
 990%
 991%   @error existence_error(http_reply, Uri)
 992
 993read_header(In, Parts, Major-Minor, Code, Comment, Lines) :-
 994    read_line_to_codes(In, Line),
 995    (   Line == end_of_file
 996    ->  parts_uri(Parts, Uri),
 997        existence_error(http_reply,Uri)
 998    ;   true
 999    ),
1000    Line \== end_of_file,
1001    phrase(first_line(Major-Minor, Code, Comment), Line),
1002    debug(http(open), 'HTTP/~d.~d ~w ~w', [Major, Minor, Code, Comment]),
1003    read_line_to_codes(In, Line2),
1004    rest_header(Line2, In, Lines),
1005    !,
1006    (   debugging(http(open))
1007    ->  forall(member(HL, Lines),
1008               debug(http(open), '~s', [HL]))
1009    ;   true
1010    ).
1011read_header(_, _, 1-1, 500, 'Invalid reply header', []).
1012
1013rest_header([], _, []) :- !.            % blank line: end of header
1014rest_header(L0, In, [L0|L]) :-
1015    read_line_to_codes(In, L1),
1016    rest_header(L1, In, L).
1017
1018%!  content_length(+Header, -Length:int) is semidet.
1019%
1020%   Find the Content-Length in an HTTP reply-header.
1021
1022content_length(Lines, Length) :-
1023    member(Line, Lines),
1024    phrase(content_length(Length0), Line),
1025    !,
1026    Length = Length0.
1027
1028location(Lines, RequestURI) :-
1029    member(Line, Lines),
1030    phrase(atom_field(location, RequestURI), Line),
1031    !.
1032
1033connection(Lines, Connection) :-
1034    member(Line, Lines),
1035    phrase(atom_field(connection, Connection0), Line),
1036    !,
1037    Connection = Connection0.
1038
1039first_line(Major-Minor, Code, Comment) -->
1040    "HTTP/", integer(Major), ".", integer(Minor),
1041    skip_blanks,
1042    integer(Code),
1043    skip_blanks,
1044    rest(Comment).
1045
1046atom_field(Name, Value) -->
1047    field(Name),
1048    rest(Value).
1049
1050content_length(Len) -->
1051    field('content-length'),
1052    integer(Len).
1053
1054field(Name) -->
1055    { atom_codes(Name, Codes) },
1056    field_codes(Codes).
1057
1058field_codes([]) -->
1059    ":",
1060    skip_blanks.
1061field_codes([H|T]) -->
1062    [C],
1063    { match_header_char(H, C)
1064    },
1065    field_codes(T).
1066
1067match_header_char(C, C) :- !.
1068match_header_char(C, U) :-
1069    code_type(C, to_lower(U)),
1070    !.
1071match_header_char(0'_, 0'-).
1072
1073
1074skip_blanks -->
1075    [C],
1076    { code_type(C, white)
1077    },
1078    !,
1079    skip_blanks.
1080skip_blanks -->
1081    [].
1082
1083%!  integer(-Int)//
1084%
1085%   Read 1 or more digits and return as integer.
1086
1087integer(Code) -->
1088    digit(D0),
1089    digits(D),
1090    { number_codes(Code, [D0|D])
1091    }.
1092
1093digit(C) -->
1094    [C],
1095    { code_type(C, digit)
1096    }.
1097
1098digits([D0|D]) -->
1099    digit(D0),
1100    !,
1101    digits(D).
1102digits([]) -->
1103    [].
1104
1105%!  rest(-Atom:atom)//
1106%
1107%   Get rest of input as an atom.
1108
1109rest(Atom) --> call(rest_(Atom)).
1110
1111rest_(Atom, L, []) :-
1112    atom_codes(Atom, L).
1113
1114
1115                 /*******************************
1116                 *   AUTHORIZATION MANAGEMENT   *
1117                 *******************************/
1118
1119%!  http_set_authorization(+URL, +Authorization) is det.
1120%
1121%   Set user/password to supply with URLs   that have URL as prefix.
1122%   If  Authorization  is  the   atom    =|-|=,   possibly   defined
1123%   authorization is cleared.  For example:
1124%
1125%   ==
1126%   ?- http_set_authorization('http://www.example.com/private/',
1127%                             basic('John', 'Secret'))
1128%   ==
1129%
1130%   @tbd    Move to a separate module, so http_get/3, etc. can use this
1131%           too.
1132
1133:- dynamic
1134    stored_authorization/2,
1135    cached_authorization/2.
1136
1137http_set_authorization(URL, Authorization) :-
1138    must_be(atom, URL),
1139    retractall(stored_authorization(URL, _)),
1140    (   Authorization = (-)
1141    ->  true
1142    ;   check_authorization(Authorization),
1143        assert(stored_authorization(URL, Authorization))
1144    ),
1145    retractall(cached_authorization(_,_)).
1146
1147check_authorization(Var) :-
1148    var(Var),
1149    !,
1150    instantiation_error(Var).
1151check_authorization(basic(User, Password)) :-
1152    must_be(atom, User),
1153    must_be(text, Password).
1154check_authorization(digest(User, Password)) :-
1155    must_be(atom, User),
1156    must_be(text, Password).
1157
1158%!  authorization(+URL, -Authorization) is semidet.
1159%
1160%   True if Authorization must be supplied for URL.
1161%
1162%   @tbd    Cleanup cache if it gets too big.
1163
1164authorization(_, _) :-
1165    \+ stored_authorization(_, _),
1166    !,
1167    fail.
1168authorization(URL, Authorization) :-
1169    cached_authorization(URL, Authorization),
1170    !,
1171    Authorization \== (-).
1172authorization(URL, Authorization) :-
1173    (   stored_authorization(Prefix, Authorization),
1174        sub_atom(URL, 0, _, _, Prefix)
1175    ->  assert(cached_authorization(URL, Authorization))
1176    ;   assert(cached_authorization(URL, -)),
1177        fail
1178    ).
1179
1180add_authorization(_, Options, Options) :-
1181    option(authorization(_), Options),
1182    !.
1183add_authorization(Parts, Options0, Options) :-
1184    url_part(user(User), Parts),
1185    url_part(password(Passwd), Parts),
1186    Options = [authorization(basic(User,Passwd))|Options0].
1187add_authorization(Parts, Options0, Options) :-
1188    stored_authorization(_, _) ->   % quick test to avoid work
1189    parts_uri(Parts, URL),
1190    authorization(URL, Auth),
1191    !,
1192    Options = [authorization(Auth)|Options0].
1193add_authorization(_, Options, Options).
1194
1195
1196%!  parse_url_ex(+URL, -Parts)
1197%
1198%   Parts:  Scheme,  Host,  Port,    User:Password,  RequestURI  (no
1199%   fragment).
1200
1201parse_url_ex(URL, [uri(URL)|Parts]) :-
1202    uri_components(URL, Components),
1203    phrase(components(Components), Parts),
1204    (   option(host(_), Parts)
1205    ->  true
1206    ;   domain_error(url, URL)
1207    ).
1208
1209components(Components) -->
1210    uri_scheme(Components),
1211    uri_authority(Components),
1212    uri_request_uri(Components).
1213
1214uri_scheme(Components) -->
1215    { uri_data(scheme, Components, Scheme), nonvar(Scheme) },
1216    !,
1217    [ scheme(Scheme)
1218    ].
1219uri_scheme(_) --> [].
1220
1221uri_authority(Components) -->
1222    { uri_data(authority, Components, Auth), nonvar(Auth),
1223      !,
1224      uri_authority_components(Auth, Data)
1225    },
1226    [ authority(Auth) ],
1227    auth_field(user, Data),
1228    auth_field(password, Data),
1229    auth_field(host, Data),
1230    auth_field(port, Data).
1231uri_authority(_) --> [].
1232
1233auth_field(Field, Data) -->
1234    { uri_authority_data(Field, Data, EncValue), nonvar(EncValue),
1235      !,
1236      (   atom(EncValue)
1237      ->  uri_encoded(query_value, Value, EncValue)
1238      ;   Value = EncValue
1239      ),
1240      Part =.. [Field,Value]
1241    },
1242    [ Part ].
1243auth_field(_, _) --> [].
1244
1245uri_request_uri(Components) -->
1246    { uri_data(path, Components, Path0),
1247      uri_data(search, Components, Search),
1248      (   Path0 == ''
1249      ->  Path = (/)
1250      ;   Path = Path0
1251      ),
1252      uri_data(path, Components2, Path),
1253      uri_data(search, Components2, Search),
1254      uri_components(RequestURI, Components2)
1255    },
1256    [ request_uri(RequestURI)
1257    ].
1258
1259%!  parts_scheme(+Parts, -Scheme) is det.
1260%!  parts_uri(+Parts, -URI) is det.
1261%!  parts_request_uri(+Parts, -RequestURI) is det.
1262%!  parts_search(+Parts, -Search) is det.
1263%!  parts_authority(+Parts, -Authority) is semidet.
1264
1265parts_scheme(Parts, Scheme) :-
1266    url_part(scheme(Scheme), Parts),
1267    !.
1268parts_scheme(Parts, Scheme) :-          % compatibility with library(url)
1269    url_part(protocol(Scheme), Parts),
1270    !.
1271parts_scheme(_, http).
1272
1273parts_authority(Parts, Auth) :-
1274    url_part(authority(Auth), Parts),
1275    !.
1276parts_authority(Parts, Auth) :-
1277    url_part(host(Host), Parts, _),
1278    url_part(port(Port), Parts, _),
1279    url_part(user(User), Parts, _),
1280    url_part(password(Password), Parts, _),
1281    uri_authority_components(Auth,
1282                             uri_authority(User, Password, Host, Port)).
1283
1284parts_request_uri(Parts, RequestURI) :-
1285    option(request_uri(RequestURI), Parts),
1286    !.
1287parts_request_uri(Parts, RequestURI) :-
1288    url_part(path(Path), Parts, /),
1289    ignore(parts_search(Parts, Search)),
1290    uri_data(path, Data, Path),
1291    uri_data(search, Data, Search),
1292    uri_components(RequestURI, Data).
1293
1294parts_search(Parts, Search) :-
1295    option(query_string(Search), Parts),
1296    !.
1297parts_search(Parts, Search) :-
1298    option(search(Fields), Parts),
1299    !,
1300    uri_query_components(Search, Fields).
1301
1302
1303parts_uri(Parts, URI) :-
1304    option(uri(URI), Parts),
1305    !.
1306parts_uri(Parts, URI) :-
1307    parts_scheme(Parts, Scheme),
1308    ignore(parts_authority(Parts, Auth)),
1309    parts_request_uri(Parts, RequestURI),
1310    uri_components(RequestURI, Data),
1311    uri_data(scheme, Data, Scheme),
1312    uri_data(authority, Data, Auth),
1313    uri_components(URI, Data).
1314
1315parts_port(Parts, Port) :-
1316    parts_scheme(Parts, Scheme),
1317    default_port(Scheme, DefPort),
1318    url_part(port(Port), Parts, DefPort).
1319
1320url_part(Part, Parts) :-
1321    Part =.. [Name,Value],
1322    Gen =.. [Name,RawValue],
1323    option(Gen, Parts),
1324    !,
1325    Value = RawValue.
1326
1327url_part(Part, Parts, Default) :-
1328    Part =.. [Name,Value],
1329    Gen =.. [Name,RawValue],
1330    (   option(Gen, Parts)
1331    ->  Value = RawValue
1332    ;   Value = Default
1333    ).
1334
1335
1336                 /*******************************
1337                 *            COOKIES           *
1338                 *******************************/
1339
1340write_cookies(Out, Parts, Options) :-
1341    http:write_cookies(Out, Parts, Options),
1342    !.
1343write_cookies(_, _, _).
1344
1345update_cookies(_, _, _) :-
1346    predicate_property(http:update_cookies(_,_,_), number_of_clauses(0)),
1347    !.
1348update_cookies(Lines, Parts, Options) :-
1349    (   member(Line, Lines),
1350        phrase(atom_field('set_cookie', CookieData), Line),
1351        http:update_cookies(CookieData, Parts, Options),
1352        fail
1353    ;   true
1354    ).
1355
1356
1357                 /*******************************
1358                 *           OPEN ANY           *
1359                 *******************************/
1360
1361:- multifile iostream:open_hook/6.
1362
1363%!  iostream:open_hook(+Spec, +Mode, -Stream, -Close,
1364%!                     +Options0, -Options) is semidet.
1365%
1366%   Hook implementation that makes  open_any/5   support  =http= and
1367%   =https= URLs for `Mode == read`.
1368
1369iostream:open_hook(URL, read, Stream, Close, Options0, Options) :-
1370    (atom(URL) -> true ; string(URL)),
1371    uri_is_global(URL),
1372    uri_components(URL, Components),
1373    uri_data(scheme, Components, Scheme),
1374    http_scheme(Scheme),
1375    !,
1376    Options = Options0,
1377    Close = close(Stream),
1378    http_open(URL, Stream, Options0).
1379
1380http_scheme(http).
1381http_scheme(https).
1382
1383
1384                 /*******************************
1385                 *          KEEP-ALIVE          *
1386                 *******************************/
1387
1388%!  consider_keep_alive(+HeaderLines, +Parts, +Host,
1389%!                      +Stream0, -Stream,
1390%!                      +Options) is det.
1391
1392consider_keep_alive(Lines, Parts, Host, StreamPair, In, Options) :-
1393    option(connection(Asked), Options),
1394    keep_alive(Asked),
1395    connection(Lines, Given),
1396    keep_alive(Given),
1397    content_length(Lines, Bytes),
1398    !,
1399    stream_pair(StreamPair, In0, _),
1400    connection_address(Host, Parts, HostPort),
1401    debug(http(connection),
1402          'Keep-alive to ~w (~D bytes)', [HostPort, Bytes]),
1403    stream_range_open(In0, In,
1404                      [ size(Bytes),
1405                        onclose(keep_alive(StreamPair, HostPort))
1406                      ]).
1407consider_keep_alive(_, _, _, Stream, Stream, _).
1408
1409connection_address(Host, _, Host) :-
1410    Host = _:_,
1411    !.
1412connection_address(Host, Parts, Host:Port) :-
1413    parts_port(Parts, Port).
1414
1415keep_alive(keep_alive) :- !.
1416keep_alive(Connection) :-
1417    downcase_atom(Connection, 'keep-alive').
1418
1419:- public keep_alive/4.
1420
1421keep_alive(StreamPair, Host, In, Left) :-
1422    read_incomplete(In, Left),
1423    add_to_pool(Host, StreamPair),
1424    !.
1425keep_alive(StreamPair, _, _, _) :-
1426    close(StreamPair, [force(true)]).
1427
1428%!  read_incomplete(+In, +Left) is semidet.
1429%
1430%   If we have not all input from  a Keep-alive connection, read the
1431%   remainder if it is short. Else, we fail and close the stream.
1432
1433read_incomplete(_, 0) :- !.
1434read_incomplete(In, Left) :-
1435    Left < 100,
1436    !,
1437    catch(setup_call_cleanup(
1438              open_null_stream(Null),
1439              copy_stream_data(In, Null, Left),
1440              close(Null)),
1441          _,
1442          fail).
1443
1444:- dynamic
1445    connection_pool/4,              % Hash, Address, Stream, Time
1446    connection_gc_time/1.
1447
1448add_to_pool(Address, StreamPair) :-
1449    keep_connection(Address),
1450    get_time(Now),
1451    term_hash(Address, Hash),
1452    assertz(connection_pool(Hash, Address, StreamPair, Now)).
1453
1454get_from_pool(Address, StreamPair) :-
1455    term_hash(Address, Hash),
1456    retract(connection_pool(Hash, Address, StreamPair, _)).
1457
1458%!  keep_connection(+Address) is semidet.
1459%
1460%   Succeeds if we want to keep   the  connection open. We currently
1461%   keep a maximum of 10 connections  waiting   and  a  maximum of 2
1462%   waiting for the same address. Connections   older than 2 seconds
1463%   are closed.
1464
1465keep_connection(Address) :-
1466    close_old_connections(2),
1467    predicate_property(connection_pool(_,_,_,_), number_of_clauses(C)),
1468    C =< 10,
1469    term_hash(Address, Hash),
1470    aggregate_all(count, connection_pool(Hash, Address, _, _), Count),
1471    Count =< 2.
1472
1473close_old_connections(Timeout) :-
1474    get_time(Now),
1475    Before is Now - Timeout,
1476    (   connection_gc_time(GC),
1477        GC > Before
1478    ->  true
1479    ;   (   retractall(connection_gc_time(_)),
1480            asserta(connection_gc_time(Now)),
1481            connection_pool(Hash, Address, StreamPair, Added),
1482            Added < Before,
1483            retract(connection_pool(Hash, Address, StreamPair, Added)),
1484            debug(http(connection),
1485                  'Closing inactive keep-alive to ~p', [Address]),
1486            close(StreamPair, [force(true)]),
1487            fail
1488        ;   true
1489        )
1490    ).
1491
1492
1493%!  http_close_keep_alive(+Address) is det.
1494%
1495%   Close all keep-alive connections matching Address. Address is of
1496%   the  form  Host:Port.  In  particular,  http_close_keep_alive(_)
1497%   closes all currently known keep-alive connections.
1498
1499http_close_keep_alive(Address) :-
1500    forall(get_from_pool(Address, StreamPair),
1501           close(StreamPair, [force(true)])).
1502
1503%!  keep_alive_error(+Error)
1504%
1505%   Deal with an error from reusing  a keep-alive connection. If the
1506%   error is due to an I/O error   or end-of-file, fail to backtrack
1507%   over get_from_pool/2. Otherwise it is a   real error and we thus
1508%   re-raise it.
1509
1510keep_alive_error(keep_alive(closed)) :-
1511    !,
1512    debug(http(connection), 'Keep-alive connection was closed', []),
1513    fail.
1514keep_alive_error(io_error(_,_)) :-
1515    !,
1516    debug(http(connection), 'IO error on Keep-alive connection', []),
1517    fail.
1518keep_alive_error(Error) :-
1519    throw(Error).
1520
1521
1522                 /*******************************
1523                 *     HOOK DOCUMENTATION       *
1524                 *******************************/
1525
1526%!  http:open_options(+Parts, -Options) is nondet.
1527%
1528%   This hook is used by the HTTP   client library to define default
1529%   options based on the the broken-down request-URL.  The following
1530%   example redirects all trafic, except for localhost over a proxy:
1531%
1532%       ==
1533%       :- multifile
1534%           http:open_options/2.
1535%
1536%       http:open_options(Parts, Options) :-
1537%           option(host(Host), Parts),
1538%           Host \== localhost,
1539%           Options = [proxy('proxy.local', 3128)].
1540%       ==
1541%
1542%   This hook may return multiple   solutions.  The returned options
1543%   are  combined  using  merge_options/3  where  earlier  solutions
1544%   overrule later solutions.
1545
1546%!  http:write_cookies(+Out, +Parts, +Options) is semidet.
1547%
1548%   Emit a =|Cookie:|= header for the  current connection. Out is an
1549%   open stream to the HTTP server, Parts is the broken-down request
1550%   (see uri_components/2) and Options is the list of options passed
1551%   to http_open.  The predicate is called as if using ignore/1.
1552%
1553%   @see complements http:update_cookies/3.
1554%   @see library(http/http_cookie) implements cookie handling on
1555%   top of these hooks.
1556
1557%!  http:update_cookies(+CookieData, +Parts, +Options) is semidet.
1558%
1559%   Update the cookie database.  CookieData  is   the  value  of the
1560%   =|Set-Cookie|= field, Parts is  the   broken-down  request  (see
1561%   uri_components/2) and Options is the list   of options passed to
1562%   http_open.
1563%
1564%   @see complements http:write_cookies
1565%   @see library(http/http_cookies) implements cookie handling on
1566%   top of these hooks.