View source with raw comments or as raw
   1/*  Part of SWI-Prolog
   2
   3    Author:        Jan Wielemaker
   4    E-mail:        J.Wielemaker@vu.nl
   5    WWW:           http://www.swi-prolog.org
   6    Copyright (c)  2007-2016, University of Amsterdam
   7                              VU University Amsterdam
   8    All rights reserved.
   9
  10    Redistribution and use in source and binary forms, with or without
  11    modification, are permitted provided that the following conditions
  12    are met:
  13
  14    1. Redistributions of source code must retain the above copyright
  15       notice, this list of conditions and the following disclaimer.
  16
  17    2. Redistributions in binary form must reproduce the above copyright
  18       notice, this list of conditions and the following disclaimer in
  19       the documentation and/or other materials provided with the
  20       distribution.
  21
  22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33    POSSIBILITY OF SUCH DAMAGE.
  34*/
  35
  36:- module(http_stream,
  37          [ http_chunked_open/3,        % +Stream, -DataStream, +Options
  38            stream_range_open/3,        % +Stream, -DataStream, +Options
  39            multipart_open/3,           % +Stream, +DataStream, +Options)
  40            multipart_open_next/1,      % +DataStream
  41
  42                                        % CGI Stream interaction
  43            cgi_open/4,                 % +Stream, -DataStream, :Hook, +Options
  44            cgi_property/2,             % +Stream, -Property
  45            cgi_set/2,                  % +Stream, -Property
  46            cgi_discard/1,              % +Stream
  47            is_cgi_stream/1,            % +Stream
  48            cgi_statistics/1            % ?Statistics
  49          ]).
  50
  51:- use_foreign_library(foreign(http_stream)).
  52:- public http_stream_debug/1.          % set debug level
  53
  54:- meta_predicate
  55    stream_range_open(+,-,:).       % onclose option is module sensitive
  56
  57/** <module> HTTP Streams
  58
  59This module realises  encoding  and   decoding  filters,  implemented as
  60Prolog streams that read/write to an  underlying stream. This allows for
  61sequences of streams acting as an in-process pipeline.
  62
  63The predicate http_chunked_open/3 realises encoding  and decoding of the
  64HTTP _Chunked_ encoding. This encoding is an obligatory part of the HTTP
  651.1 specification. Messages are split into chunks, each preceeded by the
  66length of the chunk. Chunked  encoding   allows  sending messages over a
  67serial link (typically a TCP/IP stream) for  which the reader knows when
  68the message is ended. Unlike standard HTTP   though, the sender does not
  69need to know the message length  in   advance.  The  protocol allows for
  70sending short chunks. This is  supported   totally  transparent  using a
  71flush on the output stream.
  72
  73The predicate stream_range_open/3 handles the Content-length on an input
  74stream for handlers that are designed  to   process  an entire file. The
  75filtering stream claims end-of-file after reading  a specified number of
  76bytes, dispite the fact that the underlying stream may be longer.
  77
  78@see    The HTTP 1.1 protocol http://www.w3.org/Protocols/rfc2616/rfc2616.html
  79@author Jan Wielemaker
  80*/
  81
  82%!  http_chunked_open(+RawStream, -DataStream, +Options) is det.
  83%
  84%   Create a stream to realise HTTP   chunked  encoding or decoding.
  85%   The technique is similar to library(zlib), using a Prolog stream
  86%   as a filter on another stream.  Options:
  87%
  88%           * close_parent(+Bool)
  89%           If =true= (default =false=), the parent stream is closed
  90%           if DataStream is closed.
  91%
  92%           * max_chunk_size(+PosInt)
  93%           Define the maximum size of a chunk.  Default is the
  94%           default buffer size of fully buffered streams (4096).
  95%           Larger values may improve throughput.  It is also
  96%           allowed to use =|set_stream(DataStream, buffer(line))|=
  97%           on the data stream to get line-buffered output. See
  98%           set_stream/2 for details. Switching buffering to =false=
  99%           is supported.
 100%
 101%   Here is example code to write a chunked data to a stream
 102%
 103%   ==
 104%           http_chunked_open(Out, S, []),
 105%           format(S, 'Hello world~n', []),
 106%           close(S).
 107%   ==
 108%
 109%   If a stream is known to contain chunked data, we can extract
 110%   this data using
 111%
 112%   ==
 113%           http_chunked_open(In, S, []),
 114%           read_stream_to_codes(S, Codes),
 115%           close(S).
 116%   ==
 117%
 118%   The current implementation does not  generate chunked extensions
 119%   or an HTTP trailer. If such extensions  appear on the input they
 120%   are silently ignored. This  is  compatible   with  the  HTTP 1.1
 121%   specifications. Although a filtering  stream   is  an  excellent
 122%   mechanism for encoding and decoding   the core chunked protocol,
 123%   it does not well support out-of-band data.
 124%
 125%   After http_chunked_open/3, the encoding  of   DataStream  is the
 126%   same as the  encoding  of  RawStream,   while  the  encoding  of
 127%   RawStream is =octet=, the only value   allowed  for HTTP chunked
 128%   streams. Closing the DataStream  restores   the  old encoding on
 129%   RawStream.
 130%
 131%   @error  io_error(read, Stream) where the message context provides
 132%           an indication of the problem.  This error is raised if
 133%           the input is not valid HTTP chunked data.
 134
 135
 136                 /*******************************
 137                 *             RANGES           *
 138                 *******************************/
 139
 140%!  stream_range_open(+RawStream, -DataStream, +Options) is det.
 141%
 142%   DataStream is a stream  whose  size   is  defined  by the option
 143%   size(ContentLength).   Closing   DataStream   does   not   close
 144%   RawStream.  Options processed:
 145%
 146%     - size(+Bytes)
 147%     Number of bytes represented by the main stream.
 148%     - onclose(:Closure)
 149%     Calls call(Closure, RawStream, BytesLeft) when DataStream is
 150%     closed. BytesLeft is the number of bytes of the range stream
 151%     that have *not* been read, i.e., 0 (zero) if all data has been
 152%     read from the stream when the range is closed. This was
 153%     introduced for supporting Keep-alive in http_open/3 to
 154%     reschedule the original stream for a new request if the data
 155%     of the previous request was processed.
 156
 157
 158                 /*******************************
 159                 *            MULTIPART         *
 160                 *******************************/
 161
 162%!  multipart_open(+Stream, -DataSttream, +Options) is det.
 163%
 164%   DataStream  is  a  stream  that  signals  `end_of_file`  if  the
 165%   multipart _boundary_ is encountered. The stream  can be reset to
 166%   read the next part using multipart_open_next/1. Options:
 167%
 168%     - close_parent(+Boolean)
 169%     Close Stream if DataStream is closed.
 170%     - boundary(+Text)
 171%     Define the boundary string.  Text is an atom, string, code or
 172%     character list.
 173%
 174%   All parts of a multipart input can   be read using the following
 175%   skeleton:
 176%
 177%     ==
 178%     process_multipart(Stream) :-
 179%           multipart_open(Stream, DataStream, [boundary(...)]),
 180%           process_parts(DataStream).
 181%
 182%     process_parts(DataStream) :-
 183%           process_part(DataStream),
 184%           (   multipart_open_next(DataStream)
 185%           ->  process_parts(DataStream)
 186%           ;   close(DataStream)
 187%           ).
 188%     ==
 189%
 190%   @license The multipart parser contains   code licensed under the
 191%   MIT license, based on _node-formidable_   by Felix Geisendoerfer
 192%   and Igor Afonov.
 193
 194%!  multipart_open_next(+DataStream) is semidet.
 195%
 196%   Prepare DataStream to read the  next   part  from  the multipart
 197%   input data. Succeeds if a next part exists and fails if the last
 198%   part was processed. Note that it is  mandatory to read each part
 199%   up to the end_of_file.
 200
 201
 202                 /*******************************
 203                 *           CGI SUPPORT        *
 204                 *******************************/
 205
 206%!  cgi_open(+OutStream, -CGIStream, :Hook, +Options) is det.
 207%
 208%   Process CGI output. OutStream is   normally the socket returning
 209%   data to the HTTP client. CGIStream   is  the stream the (Prolog)
 210%   code writes to. The CGIStream provides the following functions:
 211%
 212%       * At the end of the header, it calls Hook using
 213%       call(Hook, header, Stream), where Stream is a stream holding
 214%       the buffered header.
 215%
 216%       * If the stream is closed, it calls Hook using
 217%       call(Hook, data, Stream), where Stream holds the buffered
 218%       data.
 219%
 220%   The stream calls Hook, adding  the   event  and CGIStream to the
 221%   closure. Defined events are:
 222%
 223%       * header
 224%       Called  if  the  header  is   complete.  Typically  it  uses
 225%       cgi_property/2 to extract the collected  header and combines
 226%       these with the request and policies   to decide on encoding,
 227%       transfer-encoding, connection parameters and   the  complete
 228%       header (as a Prolog term). Typically   it  uses cgi_set/2 to
 229%       associate these with the stream.
 230%
 231%       * send_header
 232%       Called if the HTTP header must  be sent. This is immediately
 233%       after setting the transfer encoding to =chunked= or when the
 234%       CGI stream is closed.  Typically   it  requests  the current
 235%       header, optionally the content-length and   sends the header
 236%       to the original (client) stream.
 237%
 238%       * close
 239%       Called from close/1 on the CGI   stream  after everything is
 240%       complete.
 241%
 242%   The predicates cgi_property/2  and  cgi_set/2   can  be  used to
 243%   control the stream and store status   info.  Terms are stored as
 244%   Prolog records and can thus be transferred between threads.
 245
 246%!  cgi_property(+CGIStream, ?Property) is det.
 247%
 248%   Inquire the status of the CGI stream.  Defined properties are:
 249%
 250%       * request(-Term)
 251%       The original request
 252%       * header(-Term)
 253%       Term is the header term as registered using cgi_set/2
 254%       * client(-Stream)
 255%       Stream is the original output stream used to create
 256%       this stream.
 257%       * thread(-ThreadID)
 258%       ThreadID is the identifier of the `owning thread'
 259%       * transfer_encoding(-Tranfer)
 260%       One of =chunked= or =none=.
 261%       * connection(-Connection)
 262%       One of =Keep-Alive= or =close=
 263%       * content_length(-ContentLength)
 264%       Total byte-size of the content.  Available in the close
 265%       handler if the transfer_encoding is =none=.
 266%       * header_codes(-Codes)
 267%       Codes represents the header collected.  Available in the
 268%       header handler.
 269%       * state(-State)
 270%       One of =header=, =data= or =discarded=
 271%       * id(-ID)
 272%       Request sequence number.  This number is guaranteed to be
 273%       unique.
 274
 275%!  cgi_set(+CGIStream, ?Property) is det.
 276%
 277%   Change one of the properies.  Supported properties are:
 278%
 279%       * request(+Term)
 280%       Associate a request to the stream.
 281%       * header(+Term)
 282%       Register a reply header.  This header is normally retrieved
 283%       from the =send_header= hook to send the reply header to the
 284%       client.
 285%       * connection(-Connection)
 286%       One of =Keep-Alive= or =close=.
 287%       * transfer_encoding(-Tranfer)
 288%       One of =chunked= or =none=.  Initially set to =none=.  When
 289%       switching to =chunked= from the =header= hook, it calls the
 290%       =send_header= hook and if there is data queed this is send
 291%       as first chunk.  Each subsequent write to the CGI stream
 292%       emits a chunk.
 293
 294%!  cgi_discard(+CGIStream) is det.
 295%
 296%   Discard content produced sofar. It sets   the  state property to
 297%   =discarded=, causing close to omit the   writing  the data. This
 298%   must be used for an alternate output (e.g. an error page) if the
 299%   page generator fails.
 300
 301%!  is_cgi_stream(+Stream) is semidet.
 302%
 303%   True if Stream is a CGI stream created using cgi_open/4.
 304
 305:- multifile
 306    http:encoding_filter/3,                 % +Encoding, +In0,  -In
 307    http:current_transfer_encoding/1.       % ?Encoding
 308
 309:- public
 310    http:encoding_filter/3,
 311    http:current_transfer_encoding/1.
 312
 313%!  http:encoding_filter(+Encoding, +In0, -In) is semidet.
 314%
 315%   Install a filter to deal with   =chunked= encoded messages. Used
 316%   by library(http_open).
 317
 318http:encoding_filter(chunked, In0, In) :-
 319    http_chunked_open(In0, In,
 320                      [ close_parent(true)
 321                      ]).
 322
 323%!  http:current_transfer_encoding(?Encoding) is semidet.
 324%
 325%   True if Encoding is supported. Used by library(http_open).
 326
 327http:current_transfer_encoding(chunked).
 328
 329%!  cgi_statistics(?Term)
 330%
 331%   Return statistics on the CGI stream subsystem. Currently defined
 332%   statistics are:
 333%
 334%       * requests(-Integer)
 335%       Total number of requests processed
 336%       * bytes_sent(-Integer)
 337%       Total number of bytes sent.
 338
 339cgi_statistics(requests(Requests)) :-
 340    cgi_statistics_(Requests, _).
 341cgi_statistics(bytes_sent(Bytes)) :-
 342    cgi_statistics_(_, Bytes).