View source with raw comments or as raw
   1/*  Part of SWI-Prolog
   2
   3    Author:        Jan Wielemaker
   4    E-mail:        J.Wielemaker@vu.nl
   5    WWW:           http://www.swi-prolog.org
   6    Copyright (c)  2012-2016, University of Amsterdam
   7                              VU University Amsterdam
   8    All rights reserved.
   9
  10    Redistribution and use in source and binary forms, with or without
  11    modification, are permitted provided that the following conditions
  12    are met:
  13
  14    1. Redistributions of source code must retain the above copyright
  15       notice, this list of conditions and the following disclaimer.
  16
  17    2. Redistributions in binary form must reproduce the above copyright
  18       notice, this list of conditions and the following disclaimer in
  19       the documentation and/or other materials provided with the
  20       distribution.
  21
  22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33    POSSIBILITY OF SUCH DAMAGE.
  34*/
  35
  36:- module(dcg_basics,
  37	  [ white//0,			% <white inside line>
  38	    whites//0,			% <white inside line>*
  39	    blank//0,			% <blank>
  40	    blanks//0,			% <blank>*
  41	    nonblank//1,		% <nonblank>
  42	    nonblanks//1,		% <nonblank>* --> chars		(long)
  43	    blanks_to_nl//0,		% [space,tab,ret]*nl
  44	    string//1,			% <any>* -->chars		(short)
  45	    string_without//2,		% Exclude, -->chars		(long)
  46					% Characters
  47	    alpha_to_lower//1,		% Get lower|upper, return lower
  48					% Decimal numbers
  49	    digits//1,			% [0-9]* -->chars
  50	    digit//1,			% [0-9] --> char
  51	    integer//1,			% [+-][0-9]+ --> integer
  52	    float//1,			% [+-]?[0-9]+(.[0-9]*)?(e[+-]?[0-9]+)? --> float
  53	    number//1,			% integer | float
  54					% Hexadecimal numbers
  55	    xdigits//1,			% [0-9a-f]* --> 0-15*
  56	    xdigit//1,			% [0-9a-f] --> 0-15
  57	    xinteger//1,		% [0-9a-f]+ --> integer
  58
  59	    prolog_var_name//1,		% Read a Prolog variable name
  60
  61	    eos//0,			% Test end of input.
  62	    remainder//1,		% -List
  63
  64					% generation (TBD)
  65	    atom//1			% generate atom
  66	  ]).
  67:- use_module(library(lists)).
  68:- use_module(library(error)).
  69
  70
  71/** <module> Various general DCG utilities
  72
  73This library provides various commonly  used   DCG  primitives acting on
  74list  of  character  *codes*.  Character   classification  is  based  on
  75code_type/2.
  76
  77This module started its life as  library(http/dcg_basics) to support the
  78HTTP protocol. Since then, it was increasingly  used in code that has no
  79relation to HTTP and therefore  this  library   was  moved  to  the core
  80library.
  81
  82@tbd	This is just a starting point. We need a comprehensive set of
  83	generally useful DCG primitives.
  84*/
  85
  86%%	string_without(+EndCodes, -Codes)// is det.
  87%
  88%	Take as many codes from the input  until the next character code
  89%	appears in the list EndCodes.  The   terminating  code itself is
  90%	left on the input.  Typical  use  is   to  read  upto  a defined
  91%	delimiter such as a newline  or   other  reserved character. For
  92%	example:
  93%
  94%	    ==
  95%	        ...,
  96%	        string_without("\n", RestOfLine)
  97%	    ==
  98%
  99%	@arg EndCodes is a list of character codes.
 100%	@see string//1.
 101
 102string_without(End, Codes) -->
 103	{ string(End), !,
 104	  string_codes(End, EndCodes)
 105	},
 106	list_string_without(EndCodes, Codes).
 107string_without(End, Codes) -->
 108	list_string_without(End, Codes).
 109
 110list_string_without(Not, [C|T]) -->
 111	[C],
 112	{ \+ memberchk(C, Not)
 113	}, !,
 114	list_string_without(Not, T).
 115list_string_without(_, []) -->
 116	[].
 117
 118%%	string(-Codes)// is nondet.
 119%
 120%	Take as few as possible tokens from the input, taking one more
 121%	each time on backtracking. This code is normally followed by a
 122%	test for a delimiter.  For example:
 123%
 124%	==
 125%	upto_colon(Atom) -->
 126%		string(Codes), ":", !,
 127%		{ atom_codes(Atom, Codes) }.
 128%	==
 129%
 130%	@see string_without//2.
 131
 132string([]) -->
 133	[].
 134string([H|T]) -->
 135	[H],
 136	string(T).
 137
 138%%	blanks// is det.
 139%
 140%	Skip zero or more white-space characters.
 141
 142blanks -->
 143	blank, !,
 144	blanks.
 145blanks -->
 146	[].
 147
 148%%	blank// is semidet.
 149%
 150%	Take next =space= character from input. Space characters include
 151%	newline.
 152%
 153%	@see white//0
 154
 155blank -->
 156	[C],
 157	{ nonvar(C),
 158	  code_type(C, space)
 159	}.
 160
 161%%	nonblanks(-Codes)// is det.
 162%
 163%	Take all =graph= characters
 164
 165nonblanks([H|T]) -->
 166	[H],
 167	{ code_type(H, graph)
 168	}, !,
 169	nonblanks(T).
 170nonblanks([]) -->
 171	[].
 172
 173%%	nonblank(-Code)// is semidet.
 174%
 175%	Code is the next non-blank (=graph=) character.
 176
 177nonblank(H) -->
 178	[H],
 179	{ code_type(H, graph)
 180	}.
 181
 182%%	blanks_to_nl// is semidet.
 183%
 184%	Take a sequence of blank//0 codes if blanks are followed by a
 185%	newline or end of the input.
 186
 187blanks_to_nl -->
 188	"\n", !.
 189blanks_to_nl -->
 190	blank, !,
 191	blanks_to_nl.
 192blanks_to_nl -->
 193	eos.
 194
 195%%	whites// is det.
 196%
 197%	Skip white space _inside_ a line.
 198%
 199%	@see blanks//0 also skips newlines.
 200
 201whites -->
 202	white, !,
 203	whites.
 204whites -->
 205	[].
 206
 207%%	white// is semidet.
 208%
 209%	Take next =white= character from input. White characters do
 210%	_not_ include newline.
 211
 212white -->
 213	[C],
 214	{ nonvar(C),
 215	  code_type(C, white)
 216	}.
 217
 218
 219		 /*******************************
 220		 *	 CHARACTER STUFF	*
 221		 *******************************/
 222
 223%%	alpha_to_lower(?C)// is semidet.
 224%
 225%	Read a letter (class  =alpha=)  and   return  it  as a lowercase
 226%	letter. If C is instantiated and the  DCG list is already bound,
 227%	C must be =lower= and matches both a lower and uppercase letter.
 228%	If the output list is unbound, its first element is bound to C.
 229%	For example:
 230%
 231%	  ==
 232%	  ?- alpha_to_lower(0'a, `AB`, R).
 233%	  R = [66].
 234%	  ?- alpha_to_lower(C, `AB`, R).
 235%	  C = 97, R = [66].
 236%	  ?- alpha_to_lower(0'a, L, R).
 237%	  L = [97|R].
 238%	  ==
 239
 240alpha_to_lower(L) -->
 241	[C],
 242	{   nonvar(C)
 243	->  code_type(C, alpha),
 244	    code_type(C, to_upper(L))
 245	;   L = C
 246	}.
 247
 248
 249		 /*******************************
 250		 *	      NUMBERS		*
 251		 *******************************/
 252
 253%%	digits(?Chars)// is det.
 254%%	digit(?Char)// is det.
 255%%	integer(?Integer)// is det.
 256%
 257%	Number processing. The predicate  digits//1   matches  a posibly
 258%	empty set of digits,  digit//1  processes   a  single  digit and
 259%	integer processes an  optional  sign   followed  by  a non-empty
 260%	sequence of digits into an integer.
 261
 262digits([H|T]) -->
 263	digit(H), !,
 264	digits(T).
 265digits([]) -->
 266	[].
 267
 268digit(C) -->
 269	[C],
 270	{ code_type(C, digit)
 271	}.
 272
 273integer(I, Head, Tail) :-
 274	nonvar(I), !,
 275	format(codes(Head, Tail), '~d', [I]).
 276integer(I) -->
 277	int_codes(Codes),
 278	{ number_codes(I, Codes)
 279	}.
 280
 281int_codes([C,D0|D]) -->
 282	sign(C), !,
 283	digit(D0),
 284	digits(D).
 285int_codes([D0|D]) -->
 286	digit(D0),
 287	digits(D).
 288
 289
 290%%	float(?Float)// is det.
 291%
 292%	Process a floating  point  number.   The  actual  conversion  is
 293%	controlled by number_codes/2.
 294
 295float(F, Head, Tail) :-
 296	float(F), !,
 297	with_output_to(codes(Head, Tail), write(F)).
 298float(F) -->
 299	number(F),
 300	{ float(F) }.
 301
 302%%	number(+Number)// is det.
 303%%	number(-Number)// is semidet.
 304%
 305%	Generate extract a number. Handles   both  integers and floating
 306%	point numbers.
 307
 308number(N, Head, Tail) :-
 309	number(N), !,
 310	format(codes(Head, Tail), '~w', N).
 311number(N) -->
 312	int_codes(I),
 313	(   dot,
 314	    digit(DF0),
 315	    digits(DF)
 316	->  {F = [0'., DF0|DF]}
 317	;   {F = []}
 318	),
 319	(   exp
 320	->  int_codes(DI),
 321	    {E=[0'e|DI]}
 322	;   {E = []}
 323	),
 324	{ append([I, F, E], Codes),
 325	  number_codes(N, Codes)
 326	}.
 327
 328sign(0'-) --> "-".
 329sign(0'+) --> "+".
 330
 331dot --> ".".
 332
 333exp --> "e".
 334exp --> "E".
 335
 336		 /*******************************
 337		 *	    HEX NUMBERS		*
 338		 *******************************/
 339
 340%%	xinteger(+Integer)// is det.
 341%%	xinteger(-Integer)// is semidet.
 342%
 343%	Generate or extract an integer from   a  sequence of hexadecimal
 344%	digits.
 345
 346xinteger(Val, Head, Tail) :-
 347	integer(Val),
 348	format(codes(Head, Tail), '~16r', [Val]).
 349xinteger(Val) -->
 350	xdigit(D0),
 351	xdigits(D),
 352	{ mkval([D0|D], 16, Val)
 353	}.
 354
 355%%	xdigit(-Weight)// is semidet.
 356%
 357%	True if the next code is a  hexdecimal digit with Weight. Weight
 358%	is between 0 and 15.
 359
 360xdigit(D) -->
 361	[C],
 362	{ code_type(C, xdigit(D))
 363	}.
 364
 365%%	xdigits(-WeightList)// is det.
 366%
 367%	List of weights of a sequence of hexadecimal codes.  WeightList
 368%	may be empty.
 369
 370xdigits([D0|D]) -->
 371	xdigit(D0), !,
 372	xdigits(D).
 373xdigits([]) -->
 374	[].
 375
 376mkval([W0|Weights], Base, Val) :-
 377	mkval(Weights, Base, W0, Val).
 378
 379mkval([], _, W, W).
 380mkval([H|T], Base, W0, W) :-
 381	W1 is W0*Base+H,
 382	mkval(T, Base, W1, W).
 383
 384
 385		 /*******************************
 386		 *	   END-OF-STRING	*
 387		 *******************************/
 388
 389%%	eos//
 390%
 391%	Matches  end-of-input.  The  implementation    behaves   as  the
 392%	following portable implementation:
 393%
 394%	  ==
 395%	  eos --> call(eos_).
 396%	  eos_([], []).
 397%	  ==
 398%
 399%	@tbd	This is a difficult concept and violates the _context free_
 400%		property of DCGs.  Explain the exact problems.
 401
 402eos([], []).
 403
 404%%	remainder(-List)//
 405%
 406%	Unify List with the remainder of the input.
 407
 408remainder(List, List, []).
 409
 410
 411		 /*******************************
 412		 *	   PROLOG SYNTAX		*
 413		 *******************************/
 414
 415%%	prolog_var_name(-Name:atom)// is semidet.
 416%
 417%	Matches a Prolog variable name. Primarily  intended to deal with
 418%	quasi quotations that embed Prolog variables.
 419
 420prolog_var_name(Name) -->
 421	[C0], { code_type(C0, prolog_var_start) }, !,
 422	prolog_id_cont(CL),
 423	{ atom_codes(Name, [C0|CL]) }.
 424
 425prolog_id_cont([H|T]) -->
 426	[H], { code_type(H, prolog_identifier_continue) }, !,
 427	prolog_id_cont(T).
 428prolog_id_cont([]) --> "".
 429
 430
 431		 /*******************************
 432		 *	     GENERATION		*
 433		 *******************************/
 434
 435%%	atom(++Atom)// is det.
 436%
 437%	Generate codes of Atom.  Current implementation uses write/1,
 438%	dealing with any Prolog term.  Atom must be ground though.
 439
 440atom(Atom, Head, Tail) :-
 441	must_be(ground, Atom),
 442	format(codes(Head, Tail), '~w', [Atom]).