View source with raw comments or as raw
   1/*  Part of SWI-Prolog
   2
   3    Author:        Jan Wielemaker
   4    E-mail:        J.Wielemaker@vu.nl
   5    WWW:           http://www.swi-prolog.org
   6    Copyright (c)  2013-2016, VU University Amsterdam
   7    All rights reserved.
   8
   9    Redistribution and use in source and binary forms, with or without
  10    modification, are permitted provided that the following conditions
  11    are met:
  12
  13    1. Redistributions of source code must retain the above copyright
  14       notice, this list of conditions and the following disclaimer.
  15
  16    2. Redistributions in binary form must reproduce the above copyright
  17       notice, this list of conditions and the following disclaimer in
  18       the documentation and/or other materials provided with the
  19       distribution.
  20
  21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  32    POSSIBILITY OF SUCH DAMAGE.
  33*/
  34
  35:- module(javascript_grammar,
  36          [ js_token//1
  37          ]).
  38:- use_module(library(dcg/basics)).
  39:- use_module(library(pure_input)).     % syntax_error//1
  40:- set_prolog_flag(double_quotes, codes).
  41
  42/** <module> JavaScript grammar
  43
  44This file provides a tokenizer for   JavaScript  (EcmaScript). This code
  45supports  the  quasi  quotation   syntax    =javascript=,   defined   in
  46library(http/js_write).
  47
  48@see    http://tomcopeland.blogs.com/EcmaScript.html is used for the
  49        high-level syntax.
  50@see    http://www.ecma-international.org/ecma-262/5.1/ is used for
  51        implementing the tokenization code.
  52*/
  53
  54%!  js_token(-TokenType)//
  55%
  56%   Matches and classifies the next JavaScript token.
  57
  58js_token(Type) -->
  59    token(Type).
  60
  61%!  token(-Type) is semidet.
  62%
  63%   Get the next token from the   input. Fails when encountering the
  64%   end of the input.
  65%
  66%   @error syntax_error(Culprit)
  67
  68token(comment)        --> comment, !.
  69token(string)         --> string_literal, !.
  70token(number)         --> numeric_literal, !.
  71token(identifier(Id)) --> identifier_name(Id), !.
  72token(regex)          --> regex_literal, !.
  73token(ws)             --> blank, !, blanks.
  74token(punct(Char))    --> [Code], { char_code(Char, Code) }.
  75
  76%!  comment// is semidet.
  77
  78comment -->
  79    "/*",
  80    !,
  81    (   string(_), "*/"
  82    ->  []
  83    ;   syntax_error(eof_in_comment)
  84    ).
  85comment -->
  86    "//",
  87    !,
  88    (   string(_), eol
  89    ->  []
  90    ;   string(_), eof
  91    ->  []
  92    ).
  93
  94
  95%!  string_literal// is semidet.
  96%
  97%   Matches a string literal
  98
  99string_literal -->
 100    "\"",
 101    !,
 102    (   q_codes, "\""
 103    ->  []
 104    ;   syntax_error(eof_in_string)
 105    ).
 106string_literal -->
 107    "\'",
 108    !,
 109    (   q_codes, "\'"
 110    ->  []
 111    ;   syntax_error(eof_in_string)
 112    ).
 113
 114
 115%!  numeric_literal//
 116%
 117%   Matches JavaScript notion of a numeric constant
 118
 119numeric_literal -->
 120    (   decimal_literal
 121    ->  []
 122    ;   hex_integer
 123    ),
 124    (   (   decimal_digit
 125        ;   js_id_start(_)
 126        )
 127    ->  syntax_error(js(illegal_number))
 128    ;   []
 129    ).
 130
 131decimal_literal -->
 132    decimal_integer, ".", opt_decimal_digits, opt_exponent.
 133decimal_literal -->
 134    ".", decimal_digits, opt_exponent.
 135decimal_literal -->
 136    decimal_integer,
 137    opt_exponent.
 138
 139decimal_integer -->
 140    "0",
 141    !.
 142decimal_integer -->
 143    non_zero_digit, opt_decimal_digits.
 144
 145decimal_digits -->
 146    decimal_digit,
 147    !,
 148    opt_decimal_digits.
 149
 150opt_decimal_digits -->
 151    decimal_digit,
 152    !,
 153    opt_decimal_digits.
 154opt_decimal_digits -->
 155    [].
 156
 157decimal_digit --> [C], { code_type(C, digit) }.
 158non_zero_digit --> [C], { code_type(C, digit), C \== 0'0 }.
 159
 160opt_exponent -->
 161    exponent,
 162    !.
 163opt_exponent -->
 164    [].
 165
 166exponent -->
 167    exponent_indictor,
 168    signed_integer.
 169
 170exponent_indictor --> "e", !.
 171exponent_indictor --> "E".
 172
 173signed_integer --> "+", !, decimal_digits.
 174signed_integer --> "-", !, decimal_digits.
 175signed_integer -->         decimal_digits.
 176
 177hex_integer --> "0", x, hex_digit, hex_digits.
 178
 179x --> "x".
 180x --> "X".
 181
 182
 183%!  regex_literal// is semidet.
 184%
 185%   Matches regex expression /.../flags
 186
 187regex_literal -->
 188    "/", regex_body, "/", !, regex_flags.
 189
 190regex_body -->
 191    regex_first_char,
 192    regex_chars.
 193
 194regex_chars --> regex_char, !, regex_chars.
 195regex_chars --> [].
 196
 197regex_first_char -->
 198    regex_non_terminator(C),
 199    !,
 200    { \+ memberchk(C, "*\\/[") }.
 201regex_first_char -->
 202    regex_backslash_sequence.
 203regex_first_char -->
 204    regex_class.
 205
 206regex_char -->
 207    regex_non_terminator(C),
 208    !,
 209    { \+ memberchk(C, "\\/[") }.
 210regex_char -->
 211    regex_backslash_sequence.
 212regex_char -->
 213    regex_class.
 214
 215regex_backslash_sequence -->
 216    "\\", !, regex_non_terminator(_).
 217
 218regex_class -->
 219    "[", regex_class_chars, "]".
 220
 221regex_class_chars --> regex_class_char, !, regex_class_chars.
 222regex_class_chars --> "".
 223
 224regex_class_char -->
 225    regex_non_terminator(C),
 226    !,
 227    { \+ memberchk(C, "]\\") }.
 228
 229regex_non_terminator(_) -->
 230    eol, !, {fail}.
 231regex_non_terminator(C) -->
 232    source_char(C).
 233
 234regex_flags -->
 235    js_id_conts(_).
 236
 237source_char(C) -->
 238    [C].
 239
 240
 241%!  q_codes//
 242%
 243%   Shortest list of quoted characters.
 244
 245q_codes --> [] ; q_code, q_codes.
 246
 247q_code --> "\\", !, char_esc.
 248q_code --> eol, !, {fail}.
 249q_code --> [_].
 250
 251char_esc --> single_escape_char, !.
 252char_esc --> "x", !, hex_digit, hex_digit.
 253char_esc --> "u", !, hex_digit, hex_digit, hex_digit, hex_digit.
 254char_esc --> eol, !.
 255
 256hex_digits --> hex_digit, !, hex_digits.
 257hex_digits --> [].
 258
 259hex_digit --> [C], {code_type(C, xdigit(_))}.
 260
 261single_escape_char --> "'".
 262single_escape_char --> "\"".
 263single_escape_char --> "\\".
 264single_escape_char --> "b".
 265single_escape_char --> "f".
 266single_escape_char --> "n".
 267single_escape_char --> "r".
 268single_escape_char --> "t".
 269single_escape_char --> "v".
 270
 271eol --> "\r\n", !.
 272eol --> "\n", !.
 273eol --> "\r".
 274
 275eof -->
 276    \+ [_].
 277
 278
 279%       js_identifier classification. Now  based  on   Prolog.  This  is
 280%       pretty close, but I'm afraid there are corner cases.
 281
 282identifier_name(Id) -->
 283    js_id_start(C0),
 284    !,
 285    js_id_conts(Rest),
 286    { atom_codes(Id, [C0|Rest]),
 287      (   keyword(Id)
 288      ->  fail, syntax_error(reserved(Id))
 289      ;   true
 290      )
 291    }.
 292
 293
 294js_id_start(C) --> [C], {js_id_start(C)}.
 295
 296js_id_start(C) :- code_type(C, prolog_var_start), !.
 297js_id_start(C) :- code_type(C, prolog_atom_start), !.
 298js_id_start(0'$).
 299
 300js_id_conts([H|T]) --> js_id_cont(H), !, js_id_conts(T).
 301js_id_conts([]) --> [].
 302
 303js_id_cont(C) --> [C], {js_id_cont(C)}.
 304
 305js_id_cont(C) :- code_type(C, prolog_identifier_continue), !.
 306js_id_cont(0'$) :- !.
 307
 308
 309keyword(break).                         % standard keywords
 310keyword(do).
 311keyword(instanceof).
 312keyword(typeof).
 313keyword(case).
 314keyword(else).
 315keyword(new).
 316keyword(var).
 317keyword(catch).
 318keyword(finally).
 319keyword(return).
 320keyword(void).
 321keyword(continue).
 322keyword(for).
 323keyword(switch).
 324keyword(while).
 325keyword(debugger).
 326keyword(function).
 327keyword(this).
 328keyword(with).
 329keyword(default).
 330keyword(if).
 331keyword(throw).
 332keyword(delete).
 333keyword(in).
 334keyword(try).
 335
 336keyword(class).                         % reserved keywords
 337keyword(enum).
 338keyword(extends).
 339keyword(super).
 340keyword(const).
 341keyword(export).
 342keyword(import).
 343
 344keyword(implements).                    % future reserved keywords
 345keyword(let).
 346keyword(private).
 347keyword(public).
 348keyword(yield).
 349keyword(interface).
 350keyword(package).
 351keyword(protected).
 352keyword(static).