View source with raw comments or as raw
   1/*  Part of SWI-Prolog
   2
   3    Author:        Jan Wielemaker
   4    E-mail:        J.Wielemaker@vu.nl
   5    WWW:           http://www.swi-prolog.org
   6    Copyright (C): 2008-2016, University of Amsterdam
   7                              VU University Amsterdam
   8    All rights reserved.
   9
  10    Redistribution and use in source and binary forms, with or without
  11    modification, are permitted provided that the following conditions
  12    are met:
  13
  14    1. Redistributions of source code must retain the above copyright
  15       notice, this list of conditions and the following disclaimer.
  16
  17    2. Redistributions in binary form must reproduce the above copyright
  18       notice, this list of conditions and the following disclaimer in
  19       the documentation and/or other materials provided with the
  20       distribution.
  21
  22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33    POSSIBILITY OF SUCH DAMAGE.
  34*/
  35
  36:- module(pure_input,
  37          [ phrase_from_file/2,         % :Grammar, +File
  38            phrase_from_file/3,         % :Grammar, +File, +Options
  39            phrase_from_stream/2,       % :Grammar, +Stream
  40            stream_to_lazy_list/2,      % :Stream -List
  41
  42            syntax_error//1,            % +ErrorTerm
  43                                        % Low level interface
  44            lazy_list_location//1,      % -Location
  45            lazy_list_character_count//1 % -CharacterCount
  46          ]).
  47:- use_module(library(error)).
  48:- set_prolog_flag(generate_debug_info, false).
  49
  50/** <module> Pure Input from files and streams
  51
  52This module is part of pio.pl,   dealing with _pure_ _input_: processing
  53input streams from the outside  world   using  pure  predicates, notably
  54grammar rules (DCG).  Using  pure   predicates  makes  non-deterministic
  55processing of input much simpler.
  56
  57Pure input uses attributed variables  to   read  input from the external
  58source into a list _|on demand|_. The   overhead of lazy reading is more
  59than compensated for by using block reads based on read_pending_codes/3.
  60
  61Ulrich Neumerkel came up with the idea to use coroutining for creating a
  62_lazy list_. His implementation  repositioned  the   file  to  deal with
  63re-reading  that  can  be  necessary    on   backtracking.  The  current
  64implementation uses destructive assignment together  with more low-level
  65attribute handling to realise pure input on any (buffered) stream.
  66
  67@tbd    Provide support for alternative input readers, e.g. reading
  68        terms, tokens, etc.
  69*/
  70
  71:- predicate_options(phrase_from_file/3, 3,
  72                     [ pass_to(system:open/4, 4)
  73                     ]).
  74
  75%!  phrase_from_file(:Grammar, +File) is nondet.
  76%
  77%   Process the content of File  using   the  DCG  rule Grammar. The
  78%   space usage of this mechanism depends on   the length of the not
  79%   committed part of Grammar. Committed parts of the temporary list
  80%   are reclaimed by the  garbage  collector,   while  the  list  is
  81%   extended on demand due to  unification   of  the attributed tail
  82%   variable. Below is an example that counts  the number of times a
  83%   string appears in  a  file.   The  library  dcg/basics  provides
  84%   string//1 matching an arbitrary string   and  remainder//1 which
  85%   matches the remainder of the input without parsing.
  86%
  87%   ==
  88%   :- use_module(library(dcg/basics)).
  89%
  90%   file_contains(File, Pattern) :-
  91%           phrase_from_file(match(Pattern), File).
  92%
  93%   match(Pattern) -->
  94%           string(_),
  95%           string(Pattern),
  96%           remainder(_).
  97%
  98%   match_count(File, Pattern, Count) :-
  99%           aggregate_all(count, file_contains(File, Pattern), Count).
 100%   ==
 101%
 102%   This can be called as (note that   the  pattern must be a string
 103%   (code list)):
 104%
 105%   ==
 106%   ?- match_count('pure_input.pl', `file`, Count).
 107%   ==
 108
 109:- meta_predicate
 110    phrase_from_file(//, +),
 111    phrase_from_file(//, +, +),
 112    phrase_from_stream(//, +).
 113
 114phrase_from_file(Grammar, File) :-
 115    phrase_from_file(Grammar, File, []).
 116
 117%!  phrase_from_file(:Grammar, +File, +Options) is nondet.
 118%
 119%   As phrase_from_file/2, providing additional Options. Options are
 120%   passed to open/4.
 121
 122phrase_from_file(Grammar, File, Options) :-
 123    setup_call_cleanup(
 124        open(File, read, In, Options),
 125        phrase_from_stream(Grammar, In),
 126        close(In)).
 127
 128%!  phrase_from_stream(:Grammer, +Stream)
 129%
 130%   Run Grammer against the character codes   on Stream. Stream must
 131%   be buffered.
 132
 133phrase_from_stream(Grammar, In) :-
 134    stream_to_lazy_list(In, List),
 135    phrase(Grammar, List).
 136
 137%!  syntax_error(+Error)//
 138%
 139%   Throw the syntax error Error  at   the  current  location of the
 140%   input. This predicate is designed to  be called from the handler
 141%   of phrase_from_file/3.
 142%
 143%   @throws error(syntax_error(Error), Location)
 144
 145syntax_error(Error) -->
 146    lazy_list_location(Location),
 147    { throw(error(syntax_error(Error), Location))
 148    }.
 149
 150%!  lazy_list_location(-Location)// is det.
 151%
 152%   Determine current (error) location in  a   lazy  list. True when
 153%   Location is an (error) location term that represents the current
 154%   location in the DCG list.
 155%
 156%   @arg    Location is a term file(Name, Line, LinePos, CharNo) or
 157%           stream(Stream, Line, LinePos, CharNo) if no file is
 158%           associated to the stream RestLazyList.  Finally, if the
 159%           Lazy list is fully materialized (ends in =|[]|=), Location
 160%           is unified with `end_of_file-CharCount`.
 161%   @see    lazy_list_character_count//1 only provides the character
 162%           count.
 163
 164lazy_list_location(Location, Here, Here) :-
 165    lazy_list_location(Here, Location).
 166
 167lazy_list_location(Here, Location) :-
 168    '$skip_list'(Skipped, Here, Tail),
 169    (   attvar(Tail)
 170    ->  get_attr(Tail, pure_input, State),
 171        State = lazy_input(Stream, PrevPos, Pos, _),
 172        Details = [Line, LinePos, CharNo],
 173        (   stream_property(Stream, file_name(File))
 174        ->  PosParts = [file, File|Details]
 175        ;   PosParts = [stream, Stream|Details]
 176        ),
 177        Location =.. PosParts,
 178        (   PrevPos == (-)                  % nothing is read.
 179        ->  Line = 1, LinePos = 0, CharNo = 0
 180        ;   stream_position_data(char_count, Pos, EndRecordCharNo),
 181            CharNo is EndRecordCharNo - Skipped,
 182            set_stream_position(Stream, PrevPos),
 183            stream_position_data(char_count, PrevPos, StartRecordCharNo),
 184            Skip is CharNo-StartRecordCharNo,
 185            forall(between(1, Skip, _), get_code(Stream, _)),
 186            stream_property(Stream, position(ErrorPos)),
 187            stream_position_data(line_count, ErrorPos, Line),
 188            stream_position_data(line_position, ErrorPos, LinePos)
 189        )
 190    ;   Tail == []
 191    ->  Location = end_of_file-Skipped
 192    ;   type_error(lazy_list, Here)
 193    ).
 194
 195
 196%!  lazy_list_character_count(-CharCount)//
 197%
 198%   True when CharCount is the current   character count in the Lazy
 199%   list. The character count is computed by finding the distance to
 200%   the next frozen tail of the lazy list. CharCount is one of:
 201%
 202%     - An integer
 203%     - A term end_of_file-Count
 204%
 205%   @see    lazy_list_location//1 provides full details of the location
 206%           for error reporting.
 207
 208lazy_list_character_count(Location, Here, Here) :-
 209    lazy_list_character_count(Here, Location).
 210
 211lazy_list_character_count(Here, CharNo) :-
 212    '$skip_list'(Skipped, Here, Tail),
 213    (   attvar(Tail)
 214    ->  get_attr(Tail, pure_input, State),
 215        arg(3, State, Pos),
 216        stream_position_data(char_count, Pos, EndRecordCharNo),
 217        CharNo is EndRecordCharNo - Skipped
 218    ;   Tail == []
 219    ->  CharNo = end_of_file-Skipped
 220    ;   type_error(lazy_list, Here)
 221    ).
 222
 223
 224%!  stream_to_lazy_list(+Stream, -List) is det.
 225%
 226%   Create a lazy list representing the   character codes in Stream.
 227%   List is a  partial  list  ending   in  an  attributed  variable.
 228%   Unifying this variable reads the next   block of data. The block
 229%   is stored with the attribute value such that there is no need to
 230%   re-read it.
 231%
 232%   @compat Unlike the previous version of this predicate this
 233%           version does not require a repositionable stream.  It
 234%           does require a buffer size of at least the maximum
 235%           number of bytes of a multi-byte sequence (6).
 236
 237stream_to_lazy_list(Stream, List) :-
 238    (   stream_property(Stream, buffer(false))
 239    ->  permission_error(create, lazy_list, Stream)
 240    ;   true
 241    ),
 242    stream_to_lazy_list(Stream, -, List).
 243
 244stream_to_lazy_list(Stream, PrevPos, List) :-
 245    stream_property(Stream, position(Pos)),
 246    put_attr(List, pure_input, lazy_input(Stream, PrevPos, Pos, _)).
 247
 248attr_unify_hook(State, Value) :-
 249    State = lazy_input(Stream, _PrevPos, Pos, Read),
 250    (   var(Read)
 251    ->  fill_buffer(Stream),
 252        read_pending_codes(Stream, NewList, Tail),
 253        (   Tail == []
 254        ->  nb_setarg(4, State, []),
 255            Value = []
 256        ;   stream_to_lazy_list(Stream, Pos, Tail),
 257            nb_linkarg(4, State, NewList),
 258            Value = NewList
 259        )
 260    ;   Value = Read
 261    ).