View source with raw comments or as raw
   1/*  Part of SWI-Prolog
   2
   3    Author:        Jan Wielemaker
   4    E-mail:        J.Wielemaker@vu.nl
   5    WWW:           http://www.swi-prolog.org
   6    Copyright (c)  2006-2015, University of Amsterdam
   7                              VU University Amsterdam
   8    All rights reserved.
   9
  10    Redistribution and use in source and binary forms, with or without
  11    modification, are permitted provided that the following conditions
  12    are met:
  13
  14    1. Redistributions of source code must retain the above copyright
  15       notice, this list of conditions and the following disclaimer.
  16
  17    2. Redistributions in binary form must reproduce the above copyright
  18       notice, this list of conditions and the following disclaimer in
  19       the documentation and/or other materials provided with the
  20       distribution.
  21
  22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33    POSSIBILITY OF SUCH DAMAGE.
  34*/
  35
  36:- module(pldoc_wiki,
  37          [ wiki_codes_to_dom/3,        % +Codes, +Args, -DOM
  38            wiki_lines_to_dom/3,        % +Lines, +Map, -DOM
  39            section_comment_header/3,   % +Lines, -Header, -RestLines
  40            summary_from_lines/2,       % +Lines, -Codes
  41            indented_lines/3,           % +Text, +PrefixChars, -Lines
  42            strip_leading_par/2,        % +DOM0, -DOM
  43            normalise_white_space/3,    % -Text, //
  44            autolink_extension/2,       % ?Extension, ?Type
  45            autolink_file/2             % +FileName, -Type
  46          ]).
  47:- use_module(library(lists)).
  48:- use_module(library(debug)).
  49:- use_module(library(error)).
  50:- use_module(library(memfile)).
  51:- use_module(library(pairs)).
  52:- use_module(library(option)).
  53:- use_module(library(debug)).
  54:- use_module(library(apply)).
  55
  56
  57/** <module> PlDoc wiki parser
  58
  59This file defines the PlDoc wiki parser,  which parses both comments and
  60wiki text files. The original version of this SWI-Prolog wiki format was
  61largely modeled after Twiki (http://twiki.org/).  The current version is
  62extended to take many aspects from   markdown, in particular the doxygen
  63refinement thereof.
  64
  65@see http://www.stack.nl/~dimitri/doxygen/manual/markdown.html
  66*/
  67
  68:- multifile
  69    prolog:doc_wiki_face//2,        % -Out, +VarNames
  70    prolog:doc_url_expansion/3,     % +Alias(Rest), -HREF, -Label
  71    prolog:url_expansion_hook/3,    % +Term, -Ref, -Label
  72    prolog:doc_autolink_extension/2.% +Extension, -Type
  73
  74
  75                 /*******************************
  76                 *          WIKI PARSING        *
  77                 *******************************/
  78
  79%!  wiki_lines_to_dom(+Lines:lines, +Args:list(atom), -Term) is det
  80%
  81%   Translate a Wiki text into  an   HTML  term suitable for html//1
  82%   from the html_write library.
  83
  84wiki_lines_to_dom(Lines, Args, HTML) :-
  85    tokenize_lines(Lines, Tokens0),
  86    normalise_indentation(Tokens0, Tokens),
  87    wiki_structure(Tokens, -1, Pars),
  88    wiki_faces(Pars, Args, HTML).
  89
  90
  91%!  wiki_codes_to_dom(+String, +Args, -DOM) is det.
  92%
  93%   Translate a plain text into a DOM term.
  94%
  95%   @param String   Plain text.  Either a string or a list of codes.
  96
  97wiki_codes_to_dom(Codes, Args, DOM) :-
  98    indented_lines(Codes, [], Lines),
  99    wiki_lines_to_dom(Lines, Args, DOM).
 100
 101
 102%!  wiki_structure(+Lines:lines, +BaseIndent,
 103%!                 -Blocks:list(block)) is det
 104%
 105%   Get the structure in terms  of block-level elements: paragraphs,
 106%   lists and tables. This processing uses   a mixture of layout and
 107%   punctuation.
 108
 109wiki_structure([], _, []) :- !.
 110wiki_structure([_-[]|T], BI, Pars) :-          % empty lines
 111    !,
 112    wiki_structure(T, BI, Pars).
 113wiki_structure(Lines, _, [\tags(Tags)]) :-
 114    tags(Lines, Tags),
 115    !.
 116wiki_structure(Lines, BI, [P1|PL]) :-
 117    take_block(Lines, BI, P1, RestLines),
 118    wiki_structure(RestLines, BI, PL).
 119
 120%!  take_block(+Lines, +BaseIndent, ?Block, -RestLines) is semidet.
 121%
 122%   Take a block-structure from the input.  Defined block elements
 123%   are lists, table, hrule, section header and paragraph.
 124
 125take_block([_-[]|Lines], BaseIndent, Block, Rest) :-
 126    !,
 127    take_block(Lines, BaseIndent, Block, Rest).
 128take_block([N-_|_], BaseIndent, _, _) :-
 129    N < BaseIndent,
 130    !,
 131    fail.                           % less indented
 132take_block(Lines, BaseIndent, List, Rest) :-
 133    list_item(Lines, Type, Indent, LI, LIT, Rest0),
 134    !,
 135    Indent > BaseIndent,
 136    rest_list(Rest0, Type, Indent, LIT, [], Rest),
 137    List0 =.. [Type, LI],
 138    (   ul_to_dl(List0, List)
 139    ->  true
 140    ;   List0 = dl(Items)
 141    ->  List = dl(class=wiki, Items)
 142    ;   List = List0
 143    ).
 144take_block([N-['|'|RL1]|LT], _, Table, Rest) :-
 145    phrase(row(R0), RL1),
 146    rest_table(LT, N, RL, Rest),
 147    !,
 148    Table = table(class=wiki, [tr(R0)|RL]).
 149take_block([0-[-,-|More]|LT], _, Block, LT) :-  % seperation line
 150    maplist(=(-), More),
 151    !,
 152    Block = hr([]).
 153take_block([_-Line|LT], _, Block, LT) :-        % seperation line
 154    ruler(Line),
 155    !,
 156    Block = hr([]).
 157take_block([_-[@|_]], _, _, _) :-              % starts @tags section
 158    !,
 159    fail.
 160take_block(Lines, _BaseIndent, Section, RestLines) :-
 161    section_header(Lines, Section, RestLines),
 162    !.
 163take_block([_-Verb|Lines], _, Verb, Lines) :-
 164    verbatim_term(Verb),
 165    !.
 166take_block([I-L1|LT], BaseIndent, Elem, Rest) :-
 167    !,
 168    append(L1, PT, Par),
 169    rest_par(LT, PT, I, BaseIndent, MaxI, Rest),
 170    (   MaxI >= BaseIndent+16
 171    ->  Elem = center(Par)
 172    ;   MaxI >= BaseIndent+4
 173    ->  Elem = blockquote(Par)
 174    ;   Elem = p(Par)
 175    ).
 176take_block([Verb|Lines], _, Verb, Lines).
 177
 178%!  ruler(+Line) is semidet.
 179%
 180%   True if Line contains 3 ruler chars and otherwise spaces.
 181
 182ruler([C0|Line]) :-
 183    rule_char(C0),
 184    phrase(ruler(C0, 1), Line).
 185
 186ruler(C, N) --> [C], !, { N2 is N+1 }, ruler(C, N2).
 187ruler(C, N) --> [' '], !, ruler(C, N).
 188ruler(_, N) --> { N >= 3 }.
 189
 190rule_char('-').
 191rule_char('_').
 192rule_char('*').
 193
 194%!  list_item(+Lines, ?Type, ?Indent, -LI0, -LIT, -RestLines) is det.
 195%
 196%   Create a list-item. Naturally this should produce a single item,
 197%   but DL lists produce two items, so   we create the list of items
 198%   as a difference list.
 199%
 200%   @tbd    Pass base-indent
 201
 202list_item([Indent-Line|LT], Type, Indent, Items, ItemT, Rest) :-
 203    !,
 204    list_item_prefix(Type, Line, L1),
 205    (   Type == dl
 206    ->  split_dt(L1, DT0, DD1),
 207        append(DD1, LIT, DD),
 208        strip_ws_tokens(DT0, DT),
 209        Items = [dt(DT),dd(DD)|ItemT]
 210    ;   append(L1, LIT, LI0),
 211        Items = [li(LI0)|ItemT]
 212    ),
 213    rest_list_item(LT, Type, Indent, LIT, Rest).
 214
 215%!  rest_list_item(+Lines, +Type, +Indent, -RestItem, -RestLines) is det
 216%
 217%   Extract the remainder (after the first line) of a list item.
 218
 219rest_list_item(Lines, _Type, Indent, RestItem, RestLines) :-
 220    take_blocks_at_indent(Lines, Indent, Blocks, RestLines),
 221    (   Blocks = [p(Par)|MoreBlocks]
 222    ->  append(['\n'|Par], MoreBlocks, RestItem)
 223    ;   RestItem = Blocks
 224    ).
 225
 226%!  take_blocks_at_indent(+Lines, +Indent, -Pars, -RestLines) is det.
 227%
 228%   Process paragraphs and verbatim blocks (==..==) in bullet-lists.
 229
 230take_blocks_at_indent(Lines, _, [], Lines) :-
 231    skip_empty_lines(Lines, Lines1),
 232    section_header(Lines1, _, _),
 233    !.
 234take_blocks_at_indent(Lines, N, [Block|RestBlocks], RestLines) :-
 235    take_block(Lines, N, Block, Rest0),
 236    !,
 237    take_blocks_at_indent(Rest0, N, RestBlocks, RestLines).
 238take_blocks_at_indent(Lines, _, [], Lines).
 239
 240
 241%!  rest_list(+Lines, +Type, +Indent,
 242%!            -Items, -ItemTail, -RestLines) is det.
 243
 244rest_list(Lines, Type, N, Items, IT, Rest) :-
 245    skip_empty_lines(Lines, Lines1),
 246    list_item(Lines1, Type, N, Items, IT0, Rest0),
 247    !,
 248    rest_list(Rest0, Type, N, IT0, IT, Rest).
 249rest_list(Rest, _, _, IT, IT, Rest).
 250
 251%!  list_item_prefix(?Type, +Line, -Rest) is det.
 252
 253list_item_prefix(ul, [*, ' '|T], T) :- !.
 254list_item_prefix(ul, [-, ' '|T], T) :- !.
 255list_item_prefix(dl, [$, ' '|T], T) :-
 256    split_dt(T, _, _),
 257    !.
 258list_item_prefix(ol, [w(N), '.', ' '|T], T) :-
 259    atom_codes(N, [D]),
 260    between(0'0, 0'9, D).
 261
 262%!  split_dt(+LineAfterDollar, -DT, -Rest)
 263%
 264%   First see whether the entire line is the item. This allows
 265%   creating items holding : by using $ <tokens> :\n
 266
 267split_dt(In, DT, []) :-
 268    append(DT, [':'], In),
 269    !.
 270split_dt(In, DT, Rest) :-
 271    append(DT, [':'|Rest0], In),
 272    (   Rest0 == []
 273    ->  Rest = []
 274    ;   Rest0 = [' '|Rest]
 275    ),
 276    !.
 277
 278
 279%!  ul_to_dl(+UL, -DL) is semidet.
 280%
 281%   Translate an UL list into a DL list   if  all entries are of the
 282%   form "* <term> nl, <description>" and at least one <description>
 283%   is   non-empty,   or    all    items     are    of    the   form
 284%   [[PredicateIndicator]].
 285
 286ul_to_dl(ul(Items), Description) :-
 287    term_items(Items, DLItems, []),
 288    (   terms_to_predicate_includes(DLItems, Preds)
 289    ->  Description = dl(class(predicates), Preds)
 290    ;   member(dd(DD), DLItems), DD \== []
 291    ->  Description = dl(class(termlist), DLItems)
 292    ).
 293
 294term_items([], T, T).
 295term_items([LI|LIs], DLItems, Tail) :-
 296    term_item(LI, DLItems, Tail1),
 297    term_items(LIs, Tail1, Tail).
 298
 299%!  term_item(+LI, -DLItem, ?Tail) is semidet.
 300%
 301%   If LI is of the form <Term> followed  by a newline, return it as
 302%   dt-dd  tuple.  The  <dt>  item    contains  a  term
 303%
 304%       \term(Text, Term, Bindings).
 305
 306term_item(li(Tokens),
 307          [ dt(class=term, \term(Text, Term, Bindings)),
 308            dd(Descr)
 309          | Tail
 310          ], Tail) :-
 311    (   (   append(TermTokens, ['\n'|Descr], Tokens)
 312        ->  true
 313        ;   TermTokens = Tokens,
 314            Descr = []
 315        )
 316    ->  setup_call_cleanup(
 317            ( new_memory_file(MemFile),
 318              open_memory_file(MemFile, write, Out)
 319            ),
 320            ( forall(member(T, TermTokens),
 321                     write_token(Out, T)),
 322              write(Out, ' .\n')
 323            ),
 324            close(Out)),
 325        catch(setup_call_cleanup(
 326                  open_memory_file(MemFile, read, In,
 327                                   [ free_on_close(true)
 328                                   ]),
 329                  ( read_dt_term(In, Term, Bindings),
 330                    read_dt_term(In, end_of_file, []),
 331                    memory_file_to_atom(MemFile, Text)
 332                  ),
 333                  close(In)),
 334              _, fail)
 335    ).
 336
 337write_token(Out, w(X)) :-
 338    !,
 339    write(Out, X).
 340write_token(Out, X) :-
 341    write(Out, X).
 342
 343read_dt_term(In, Term, Bindings) :-
 344    read_term(In, Term,
 345              [ variable_names(Bindings),
 346                module(pldoc_modes)
 347              ]).
 348
 349terms_to_predicate_includes([], []).
 350terms_to_predicate_includes([dt(class=term, \term(_, [[PI]], [])), dd([])|T0],
 351                            [\include(PI, predicate, [])|T]) :-
 352    is_pi(PI),
 353    terms_to_predicate_includes(T0, T).
 354
 355is_pi(Name/Arity) :-
 356    atom(Name),
 357    integer(Arity),
 358    between(0, 20, Arity).
 359is_pi(Name//Arity) :-
 360    atom(Name),
 361    integer(Arity),
 362    between(0, 20, Arity).
 363
 364
 365%!  row(-Cells)// is det.
 366
 367row([C0|CL]) -->
 368    cell(C0),
 369    !,
 370    row(CL).
 371row([]) -->
 372    [].
 373
 374cell(td(C)) -->
 375    face_tokens(C0),
 376    ['|'],
 377    !,
 378    { strip_ws_tokens(C0, C)
 379    }.
 380
 381face_tokens([]) -->
 382    [].
 383face_tokens(Tokens) -->
 384    face_token(H),                          % Deal with embedded *|...|*, etc.
 385    token('|'),
 386    face_tokens(Face),
 387    token('|'),
 388    face_token(H),
 389    !,
 390    { append([[H,'|'], Face, ['|', H], Rest], Tokens) },
 391    face_tokens(Rest).
 392face_tokens([H|T]) -->
 393    token(H),
 394    face_tokens(T).
 395
 396face_token(=) --> [=].
 397face_token(*) --> [*].
 398face_token('_') --> ['_'].
 399
 400rest_table([N-Line|LT], N, RL, Rest) :-
 401    md_table_structure_line(Line),
 402    !,
 403    rest_table(LT, N, RL, Rest).
 404rest_table([N-['|'|RL1]|LT], N, [tr(R0)|RL], Rest) :-
 405    !,
 406    phrase(row(R0), RL1),
 407    rest_table(LT, N, RL, Rest).
 408rest_table(Rest, _, [], Rest).
 409
 410%!  md_table_structure_line(+Chars)
 411%
 412%   True if Chars represents Markdown  table structure. We currently
 413%   ignore the structure information.
 414
 415md_table_structure_line(Line) :-
 416    forall(member(Char, Line),
 417           md_table_structure_char(Char)).
 418
 419md_table_structure_char(' ').
 420md_table_structure_char('-').
 421md_table_structure_char('|').
 422md_table_structure_char(':').
 423
 424%!  rest_par(+Lines, -Par,
 425%!           +BaseIndent, +MaxI0, -MaxI, -RestLines) is det.
 426%
 427%   Take the rest of a paragraph. Paragraphs   are  ended by a blank
 428%   line or the start of a list-item.   The latter is a bit dubious.
 429%   Why not a general  block-level   object?  The current definition
 430%   allows for writing lists without a blank line between the items.
 431
 432rest_par([], [], BI, MaxI0, MaxI, []) :-
 433    !,
 434    MaxI is max(BI, MaxI0).
 435rest_par([_-[]|Rest], [], _, MaxI, MaxI, Rest) :- !.
 436rest_par(Lines, [], _, MaxI, MaxI, Lines) :-
 437    Lines = [_-Verb|_],
 438    verbatim_term(Verb),
 439    !.
 440rest_par([I-L|Rest], [], _, MaxI, MaxI, [I-L|Rest]) :-
 441    list_item_prefix(_, L, _),
 442    !.
 443rest_par([I-L1|LT], ['\n'|Par], BI, MaxI0, MaxI, Rest) :-
 444    append(L1, PT, Par),
 445    MaxI1 is max(I, MaxI0),
 446    rest_par(LT, PT, BI, MaxI1, MaxI, Rest).
 447
 448
 449%!  section_header(+Lines, -Section, -RestLines) is semidet.
 450%
 451%   Get a section line from the input.
 452
 453section_header([_-L1|LT], Section, LT) :-
 454    twiki_section_line(L1, Section),
 455    !.
 456section_header([0-L1|LT], Section, LT) :-
 457    md_section_line(L1, Section),
 458    !.
 459section_header([_-L1,0-L2|LT], Section, LT) :-
 460    md_section_line(L1, L2, Section),
 461    !.
 462
 463%!  twiki_section_line(+Tokens, -Section) is semidet.
 464%
 465%   Extract a section using the Twiki   conventions. The section may
 466%   be preceeded by [Word], in which case we generate an anchor name
 467%   Word for the section.
 468
 469twiki_section_line([-,-,-|Rest], Section) :-
 470    plusses(Rest, Section).
 471
 472plusses([+, ' '|Rest], h1(Attrs, Content)) :-
 473    hdr_attributes(Rest, Attrs, Content).
 474plusses([+, +, ' '|Rest], h2(Attrs, Content)) :-
 475    hdr_attributes(Rest, Attrs, Content).
 476plusses([+, +, +, ' '|Rest], h3(Attrs, Content)) :-
 477    hdr_attributes(Rest, Attrs, Content).
 478plusses([+, +, +, +, ' '|Rest], h4(Attrs, Content)) :-
 479    hdr_attributes(Rest, Attrs, Content).
 480
 481hdr_attributes(List, Attrs, Content) :-
 482    strip_leading_ws(List, List2),
 483    (   List2 = ['[',w(Name),']'|List3]
 484    ->  strip_ws_tokens(List3, Content),
 485        Attrs = [class(wiki), id(Name)]
 486    ;   Attrs = class(wiki),
 487        strip_ws_tokens(List, Content)
 488    ).
 489
 490%!  md_section_line(+Tokens, -Section) is semidet.
 491%
 492%   Handle markdown section lines staring with #
 493
 494md_section_line([#, ' '|Rest], h1(Attrs, Content)) :-
 495    md_section_attributes(Rest, Attrs, Content).
 496md_section_line([#, #, ' '|Rest], h2(Attrs, Content)) :-
 497    md_section_attributes(Rest, Attrs, Content).
 498md_section_line([#, #, #, ' '|Rest], h3(Attrs, Content)) :-
 499    md_section_attributes(Rest, Attrs, Content).
 500md_section_line([#, #, #, #, ' '|Rest], h4(Attrs, Content)) :-
 501    md_section_attributes(Rest, Attrs, Content).
 502
 503md_section_attributes(List, Attrs, Content) :-
 504    phrase((tokens(Content), [' '], section_label(Label)), List),
 505    !,
 506    Attrs = [class(wiki), id(Label)].
 507md_section_attributes(Content, Attrs, Content) :-
 508    Attrs = [class(wiki)].
 509
 510section_label(Label) -->
 511    [ '{', '#', w(Name) ],
 512    label_conts(Cont), ['}'],
 513    !,
 514    { atomic_list_concat([Name|Cont], Label) }.
 515
 516label_conts([H|T]) --> label_cont(H), !, label_conts(T).
 517label_conts([]) --> [].
 518
 519label_cont(-) --> [-].
 520label_cont(Name) --> [w(Name)].
 521
 522
 523md_section_line(Line1, Line2, Header) :-
 524    Line1 \== [],
 525    section_underline(Line2, Type),
 526    phrase(wiki_words(_), Line1),  % Should not have structure elements
 527    !,
 528    (   phrase(labeled_section_line(Title, Attrs), Line1)
 529    ->  true
 530    ;   Title = Line1,
 531        Attrs = []
 532    ),
 533    Header =.. [Type, [class(wiki)|Attrs], Title].
 534
 535section_underline([=,=,=|T], h1) :-
 536    maplist(=(=), T),
 537    !.
 538section_underline([-,-,-|T], h2) :-
 539    maplist(=(-), T),
 540    !.
 541
 542labeled_section_line(Title, Attrs) -->
 543    tokens(Title), [' '], section_label(Label),
 544    !,
 545    { Attrs = [id(Label)] }.
 546
 547
 548%!  strip_ws_tokens(+Tokens, -Stripped)
 549%
 550%   Strip leading and trailing whitespace from a token list.  Note
 551%   the the whitespace is already normalised.
 552
 553strip_ws_tokens([' '|T0], T) :-
 554    !,
 555    strip_ws_tokens(T0, T).
 556strip_ws_tokens(L0, L) :-
 557    append(L, [' '], L0),
 558    !.
 559strip_ws_tokens(L, L).
 560
 561
 562%!  strip_leading_ws(+Tokens, -Stripped) is det.
 563%
 564%   Strip leading whitespace from a token list.
 565
 566strip_leading_ws([' '|T], T) :- !.
 567strip_leading_ws(T, T).
 568
 569
 570                 /*******************************
 571                 *             TAGS             *
 572                 *******************************/
 573
 574%!  tags(+Lines:lines, -Tags) is semidet.
 575%
 576%   If the first line is a @tag, read the remainder of the lines to
 577%   a list of \tag(Name, Value) terms.
 578
 579tags(Lines, Tags) :-
 580    collect_tags(Lines, Tags0),
 581    keysort(Tags0, Tags1),
 582    pairs_values(Tags1, Tags2),
 583    combine_tags(Tags2, Tags).
 584
 585%!  collect_tags(+IndentedLines, -Tags) is semidet
 586%
 587%   Create a list Order-tag(Tag,Tokens) for   each @tag encountered.
 588%   Order is the desired position as defined by tag_order/2.
 589%
 590%   @tbd Tag content is  often  poorly   aligned.  We  now  find the
 591%   alignment of subsequent lines  and  assume   the  first  line is
 592%   alligned with the remaining lines.
 593
 594collect_tags([], []).
 595collect_tags([Indent-[@,String|L0]|Lines], [Order-tag(Tag,Value)|Tags]) :-
 596    tag_name(String, Tag, Order),
 597    !,
 598    strip_leading_ws(L0, L),
 599    rest_tag(Lines, Indent, VT, RestLines),
 600    normalise_indentation(VT, VT1),
 601    wiki_structure([0-L|VT1], -1, Value0),
 602    strip_leading_par(Value0, Value),
 603    collect_tags(RestLines, Tags).
 604
 605
 606%!  tag_name(+String, -Tag:atom, -Order:int) is semidet.
 607%
 608%   If String denotes a know tag-name,
 609
 610tag_name(w(Name), Tag, Order) :-
 611    (   renamed_tag(Name, Tag, Level),
 612        tag_order(Tag, Order)
 613    ->  print_message(Level, pldoc(deprecated_tag(Name, Tag)))
 614    ;   tag_order(Name, Order)
 615    ->  Tag = Name
 616    ;   print_message(warning, pldoc(unknown_tag(Name))),
 617        fail
 618    ).
 619
 620
 621rest_tag([], _, [], []) :- !.
 622rest_tag(Lines, Indent, [], Lines) :-
 623    Lines = [Indent-[@,Word|_]|_],
 624    tag_name(Word, _, _),
 625    !.
 626rest_tag([L|Lines0], Indent, [L|VT], Lines) :-
 627    rest_tag(Lines0, Indent, VT, Lines).
 628
 629
 630%!  renamed_tag(+DeprecatedTag:atom, -Tag:atom, -Warn) is semidet.
 631%
 632%   Declaration for deprecated tags.
 633
 634renamed_tag(exception, throws, warning).
 635renamed_tag(param,     arg,    silent).
 636
 637
 638%!  tag_order(+Tag:atom, -Order:int) is semidet.
 639%
 640%   Both declares the know tags and  their expected order. Currently
 641%   the tags are forced into  this   order  without  warning. Future
 642%   versions may issue a warning if the order is inconsistent.
 643
 644:- multifile
 645    pldoc:tag_order/2.
 646
 647tag_order(Tag, Order) :-
 648    pldoc:tag_order(Tag, Order),
 649    !.
 650tag_order(arg,         100).
 651tag_order(error,       200).            % same as throw
 652tag_order(throws,      300).
 653tag_order(author,      400).
 654tag_order(version,     500).
 655tag_order(see,         600).
 656tag_order(deprecated,  700).
 657tag_order(compat,      800).            % PlDoc extension
 658tag_order(copyright,   900).
 659tag_order(license,    1000).
 660tag_order(bug,        1100).
 661tag_order(tbd,        1200).
 662
 663%!  combine_tags(+Tags:list(tag(Key, Value)), -Tags:list) is det.
 664%
 665%   Creates the final tag-list.  Tags is a list of
 666%
 667%           * \params(list(param(Name, Descr)))
 668%           * \tag(Name, list(Descr))
 669%
 670%   Descr is a list of tokens.
 671
 672combine_tags([], []).
 673combine_tags([tag(arg, V1)|T0], [\args([P1|PL])|Tags]) :-
 674    !,
 675    arg_tag(V1, P1),
 676    arg_tags(T0, PL, T1),
 677    combine_tags(T1, Tags).
 678combine_tags([tag(Tag,V0)|T0], [\tag(Tag, [V0|Vs])|T]) :-
 679    same_tag(Tag, T0, T1, Vs),
 680    combine_tags(T1, T).
 681
 682arg_tag([PT|Descr0], arg(PN, Descr)) :-
 683    word_of(PT, PN),
 684    strip_leading_ws(Descr0, Descr).
 685
 686word_of(w(W), W) :- !.                  % TBD: check non-word arg
 687word_of(W, W).
 688
 689arg_tags([tag(arg, V1)|T0], [P1|PL], T) :-
 690    !,
 691    arg_tag(V1, P1),
 692    arg_tags(T0, PL, T).
 693arg_tags(T, [], T).
 694
 695same_tag(Tag, [tag(Tag, V)|T0], T, [V|Vs]) :-
 696    !,
 697    same_tag(Tag, T0, T, Vs).
 698same_tag(_, L, L, []).
 699
 700
 701                 /*******************************
 702                 *             FACES            *
 703                 *******************************/
 704
 705%!  wiki_faces(+Structure, +ArgNames, -HTML) is det.
 706%
 707%   Given the wiki structure, analyse the content of the paragraphs,
 708%   list items and table cells and apply font faces and links.
 709
 710wiki_faces([dt(Class, \term(Text, Term, Bindings)), dd(Descr0)|T0],
 711           ArgNames,
 712           [dt(Class, \term(Text, Term, Bindings)), dd(Descr)|T]) :-
 713    !,
 714    varnames(Bindings, VarNames, ArgNames),
 715    wiki_faces(Descr0, VarNames, Descr),
 716    wiki_faces(T0, ArgNames, T).
 717wiki_faces(DOM0, ArgNames, DOM) :-
 718    structure_term(DOM0, Functor, Content0),
 719    !,
 720    wiki_faces_list(Content0, ArgNames, Content),
 721    structure_term(DOM, Functor, Content).
 722wiki_faces(Verb, _, Verb) :-
 723    verbatim_term(Verb),
 724    !.
 725wiki_faces(Content0, ArgNames, Content) :-
 726    assertion(is_list(Content0)),
 727    phrase(wiki_faces(Content, ArgNames), Content0),
 728    !.
 729
 730varnames([], List, List).
 731varnames([Name=_|T0], [Name|T], List) :-
 732    varnames(T0, T, List).
 733
 734wiki_faces_list([], _, []).
 735wiki_faces_list([H0|T0], Args, [H|T]) :-
 736    wiki_faces(H0, Args, H),
 737    wiki_faces_list(T0, Args, T).
 738
 739%!  structure_term(+Term, -Functor, -Content) is semidet.
 740%!  structure_term(-Term, +Functor, +Content) is det.
 741%
 742%   (Un)pack a term describing structure, so  we can process Content
 743%   and re-pack the structure.
 744
 745structure_term(\tags(Tags), tags, [Tags]) :- !.
 746structure_term(\args(Params), args, [Params]) :- !.
 747structure_term(arg(Name,Descr), arg(Name), [Descr]) :- !.
 748structure_term(\tag(Name,Value), tag(Name), [Value]) :- !.
 749structure_term(\include(What,Type,Opts), include(What,Type,Opts), []) :- !.
 750structure_term(dl(Att, Args), dl(Att), [Args]) :- !.
 751structure_term(dt(Att, Args), dt(Att), [Args]) :- !.
 752structure_term(table(Att, Args), table(Att), [Args]) :- !.
 753structure_term(h1(Att, Args), h1(Att), [Args]) :- !.
 754structure_term(h2(Att, Args), h2(Att), [Args]) :- !.
 755structure_term(h3(Att, Args), h3(Att), [Args]) :- !.
 756structure_term(h4(Att, Args), h4(Att), [Args]) :- !.
 757structure_term(hr(Att), hr(Att), []) :- !.
 758structure_term(p(Args), p, [Args]) :- !.
 759structure_term(Term, Functor, Args) :-
 760    functor(Term, Functor, 1),
 761    structure_tag(Functor),
 762    !,
 763    Term =.. [Functor|Args].
 764
 765structure_tag(ul).
 766structure_tag(ol).
 767structure_tag(dl).
 768structure_tag(li).
 769structure_tag(dt).
 770structure_tag(dd).
 771structure_tag(table).
 772structure_tag(tr).
 773structure_tag(td).
 774structure_tag(blockquote).
 775structure_tag(center).
 776
 777%!  verbatim_term(?Term) is det
 778%
 779%   True if Term must be passes verbatim.
 780
 781verbatim_term(pre(_,_)).
 782verbatim_term(\term(_,_,_)).
 783
 784%!  matches(:Goal, -Input, -Last)//
 785%
 786%   True when Goal runs successfully on the DCG input and Input
 787%   is the list of matched tokens.
 788
 789matches(Goal, Input, Last, List, Rest) :-
 790    call(Goal, List, Rest),
 791    input(List, Rest, Input, Last).
 792
 793input([H|T0], Rest, Input, Last) :-
 794    (   T0 == Rest
 795    ->  Input = [H],
 796        Last = H
 797    ;   Input = [H|T],
 798        input(T0, Rest, T, Last)
 799    ).
 800
 801
 802%!  wiki_faces(-WithFaces, +ArgNames)// is nondet.
 803%!  wiki_faces(-WithFaces, +ArgNames, +Options)// is nondet.
 804%
 805%   Apply font-changes and automatic  links   to  running  text. The
 806%   faces are applied after discovering   the structure (paragraphs,
 807%   lists, tables, keywords).
 808%
 809%   @arg Options is a dict, minimally containing `depth`
 810
 811wiki_faces(WithFaces, ArgNames, List, Rest) :-
 812    default_faces_options(Options),
 813    catch(wiki_faces(WithFaces, ArgNames, Options, List, Rest),
 814          pldoc(depth_limit),
 815          failed_faces(WithFaces, List, Rest)).
 816
 817default_faces_options(_{depth:5}).
 818
 819failed_faces(WithFaces) -->
 820    { debug(markdown(overflow), 'Depth limit exceeded', []) },
 821    wiki_words(WithFaces).
 822
 823wiki_faces([EmphTerm|T], ArgNames, Options) -->
 824    emphasis_seq(EmphTerm, ArgNames, Options),
 825    !,
 826    wiki_faces_int(T, ArgNames).
 827wiki_faces(Faces, ArgNames, Options) -->
 828    wiki_faces_int(Faces, ArgNames, Options).
 829
 830wiki_faces_int(WithFaces, ArgNames) -->
 831    { default_faces_options(Options)
 832    },
 833    wiki_faces_int(WithFaces, ArgNames, Options).
 834
 835wiki_faces_int([], _, _) -->
 836    [].
 837wiki_faces_int([H|T], ArgNames, Options) -->
 838    wiki_face(H, ArgNames, Options),
 839    !,
 840    wiki_faces(T, ArgNames, Options).
 841wiki_faces_int([Before,EmphTerm|T], ArgNames, Options) -->
 842    emphasis_before(Before),
 843    emphasis_seq(EmphTerm, ArgNames, Options),
 844    !,
 845    wiki_faces_int(T, ArgNames, Options).
 846wiki_faces_int([H|T], ArgNames, Options) -->
 847    wiki_face_simple(H, ArgNames, Options),
 848    !,
 849    wiki_faces(T, ArgNames, Options).
 850
 851next_level(Options0, Options) -->
 852    {   succ(NewDepth, Options0.depth)
 853    ->  Options = Options0.put(depth, NewDepth)
 854    ;   throw(pldoc(depth_limit))
 855    }.
 856
 857%!  prolog:doc_wiki_face(-Out, +VarNames)// is semidet.
 858%!  prolog:doc_wiki_face(-Out, +VarNames, +Options0)// is semidet.
 859%
 860%   Hook that can be  used  to   provide  additional  processing for
 861%   additional _inline_ wiki constructs.  The DCG list is a list of
 862%   tokens.  Defined tokens are:
 863%
 864%     - w(Atom)
 865%     Recognised word (alphanumerical)
 866%     - Atom
 867%     Single character atom representing punctuation marks or the
 868%     atom =|' '|= (space), representing white-space.
 869%
 870%   The  Out  variable  is  input  for    the  backends  defined  in
 871%   doc_latex.pl and doc_html.pl. Roughly, these   are terms similar
 872%   to what html//1 from library(http/html_write) accepts.
 873
 874wiki_face(Out, Args, _) -->
 875    prolog:doc_wiki_face(Out, Args),
 876    !.
 877wiki_face(var(Arg), ArgNames, _) -->
 878    [w(Arg)],
 879    { memberchk(Arg, ArgNames)
 880    },
 881    !.
 882wiki_face(b(Bold), ArgNames, Options) -->
 883    [*,'|'], next_level(Options, NOptions),
 884    wiki_faces_int(Bold, ArgNames, NOptions), ['|',*],
 885    !.
 886wiki_face(i(Italic), ArgNames, Options) -->
 887    ['_','|'], next_level(Options, NOptions),
 888    wiki_faces_int(Italic, ArgNames, NOptions), ['|','_'],
 889    !.
 890wiki_face(code(Code), _, _) -->
 891    [=], eq_code_words(Words), [=],
 892    !,
 893    { atomic_list_concat(Words, Code) }.
 894wiki_face(code(Code), _, _) -->
 895    [=,'|'], wiki_words(Code), ['|',=],
 896    !.
 897wiki_face(Code, _, _) -->
 898    ['`'], code_words(Words), ['`'],
 899    { atomic_list_concat(Words, Text),
 900      catch(atom_to_term(Text, Term, Vars), _, fail),
 901      !,
 902      code_face(Text, Term, Vars, Code)
 903    }.
 904wiki_face(Face, _, _) -->
 905    [ w(Name) ], arg_list(List),
 906    { atomic_list_concat([Name|List], Text),
 907      catch(atom_to_term(Text, Term, Vars), _, fail),
 908      term_face(Text, Term, Vars, Face)
 909    },
 910    !.
 911        % Below this, we only do links.
 912wiki_face(_, _, Options) -->
 913    { Options.get(link) == false,
 914      !,
 915      fail
 916    }.
 917wiki_face(\predref(Name/Arity), _, _) -->
 918    [ w(Name), '/' ], arity(Arity),
 919    { functor_name(Name)
 920    },
 921    !.
 922wiki_face(\predref(Module:(Name/Arity)), _, _) -->
 923    [ w(Module), ':', w(Name), '/' ], arity(Arity),
 924    { functor_name(Name)
 925    },
 926    !.
 927wiki_face(\predref(Name/Arity), _, _) -->
 928    prolog_symbol_char(S0),
 929    symbol_string(SRest), [ '/' ], arity(Arity),
 930    !,
 931    { atom_chars(Name, [S0|SRest])
 932    }.
 933wiki_face(\predref(Name//Arity), _, _) -->
 934    [ w(Name), '/', '/' ], arity(Arity),
 935    { functor_name(Name)
 936    },
 937    !.
 938wiki_face(\predref(Module:(Name//Arity)), _, _) -->
 939    [ w(Module), ':', w(Name), '/', '/' ], arity(Arity),
 940    { functor_name(Name)
 941    },
 942    !.
 943wiki_face(\include(Name, Type, Options), _, _) -->
 944    ['[','['], file_name(Base, Ext), [']',']'],
 945    { autolink_extension(Ext, Type),
 946      !,
 947      file_name_extension(Base, Ext, Name),
 948      resolve_file(Name, Options, [])
 949    },
 950    !.
 951wiki_face(\include(Name, Type, [caption(Caption)|Options]), _, _) -->
 952    (   ['!','['], tokens(100, Caption), [']','(']
 953    ->  file_name(Base, Ext), [')'],
 954        { autolink_extension(Ext, Type),
 955          !,
 956          file_name_extension(Base, Ext, Name),
 957          resolve_file(Name, Options, [])
 958        }
 959    ),
 960    !.
 961wiki_face(Link, ArgNames, Options) -->          % TWiki: [[Label][Link]]
 962    (   ['[','['], wiki_label(Label, ArgNames, Options), [']','[']
 963    ->  wiki_link(Link, [label(Label), relative(true), end(']')]),
 964        [']',']'], !
 965    ).
 966wiki_face(Link, ArgNames, Options) -->          % Markdown: [Label](Link)
 967    (   ['['], wiki_label(Label, ArgNames, Options), [']','(']
 968    ->  wiki_link(Link, [label(Label), relative(true), end(')')]),
 969        [')'], !
 970    ).
 971wiki_face(Link, _ArgNames, _) -->
 972    wiki_link(Link, []),
 973    !.
 974
 975wiki_label(Label, _ArgNames, _Options) -->
 976    image_label(Label).
 977wiki_label(Label, ArgNames, Options) -->
 978    next_level(Options, NOptions),
 979    limit(40, wiki_faces(Label, ArgNames, NOptions.put(link,false))).
 980
 981%!  wiki_face_simple(-Out, +ArgNames, +Options)
 982%
 983%   Skip simple (non-markup) wiki.
 984
 985wiki_face_simple(Word, _, _) -->
 986    [ w(Word) ],
 987    !.
 988wiki_face_simple(SpaceOrPunct, _, _) -->
 989    [ SpaceOrPunct ],
 990    { atomic(SpaceOrPunct) },
 991    !.
 992wiki_face_simple(FT, ArgNames, _) -->
 993    [Structure],
 994    { wiki_faces(Structure, ArgNames, FT)
 995    }.
 996
 997wiki_words([]) --> [].
 998wiki_words([Word|T]) --> [w(Word)], !, wiki_words(T).
 999wiki_words([Punct|T]) --> [Punct], {atomic(Punct)}, wiki_words(T).
1000
1001%!  code_words(-Words)//
1002%
1003%   True when Words is the  content   as  it  appears in =|`code`|=,
1004%   where =|``|= is mapped to =|`|=.
1005
1006code_words([]) --> [].
1007code_words([Word|T]) --> [w(Word)], code_words(T).
1008code_words(CodeL) --> ['`','`'], {CodeL = ['`'|T]}, code_words(T).
1009code_words([Punct|T]) --> [Punct], {atomic(Punct)}, code_words(T).
1010
1011%!  eq_code_words(-Words)//
1012%
1013%   Stuff that can be between single `=`.  This is limited to
1014%
1015%           - Start and end must be a word
1016%           - In between may be the following punctuation chars:
1017%             =|.-:/|=, notably dealing with file names and
1018%             identifiers in various external languages.
1019
1020eq_code_words([Word]) -->
1021    [ w(Word) ].
1022eq_code_words([Word|T]) -->
1023    [ w(Word) ], eq_code_internals(T, [End]), [w(End)].
1024
1025eq_code_internals(T, T) --> [].
1026eq_code_internals([H|T], Tail) -->
1027    eq_code_internal(H),
1028    eq_code_internals(T, Tail).
1029
1030eq_code_internal(Word) -->
1031    [w(Word)].
1032eq_code_internal(Punct) -->
1033    [Punct],
1034    { eq_code_internal_punct(Punct) }.
1035
1036eq_code_internal_punct('.').
1037eq_code_internal_punct('-').
1038eq_code_internal_punct(':').
1039eq_code_internal_punct('/').
1040
1041
1042%!  code_face(+Text, +Term, +Vars, -Code) is det.
1043%
1044%   Deal with =|`... code ...`|=  sequences.   Text  is  the matched
1045%   text, Term is the parsed Prolog term   and Code is the resulting
1046%   intermediate code.
1047
1048code_face(Text, Var, _, Code) :-
1049    var(Var),
1050    !,
1051    Code = var(Text).
1052code_face(Text, _, _, code(Text)).
1053
1054
1055%!  emphasis_seq(-Out, +ArgNames, +Options) is semidet.
1056%
1057%   Recognise emphasis sequences
1058
1059emphasis_seq(EmphTerm, ArgNames, Options) -->
1060    emphasis_start(C),
1061    next_level(Options, NOptions),
1062    matches(limit(100, wiki_faces(Emph, ArgNames, NOptions)), Input, Last),
1063    emphasis_end(C),
1064    { emph_markdown(Last, Input),
1065      emphasis_term(C, Emph, EmphTerm)
1066    },
1067    !.
1068
1069
1070%!  emphasis_term(+Emphasis, +Tokens, -Term) is det.
1071%!  emphasis_before(-Before)// is semidet.
1072%!  emphasis_start(-Emphasis)// is semidet.
1073%!  emphasis_end(+Emphasis)// is semidet.
1074%
1075%   Primitives for Doxygen emphasis handling.
1076
1077emphasis_term('_',   Term, i(Term)).
1078emphasis_term('*',   Term, b(Term)).
1079emphasis_term('__',  Term, strong(Term)).
1080emphasis_term('**',  Term, strong(Term)).
1081
1082emph_markdown(_, [w(_)]) :- !.
1083emph_markdown(Last, Tokens) :-
1084    \+ emphasis_after_sep(Last),
1085    catch(b_getval(pldoc_object, Obj), _, Obj = '??'),
1086    debug(markdown(emphasis), '~q: additionally emphasis: ~p',
1087          [Obj, Tokens]).
1088
1089emphasis_before(Before) -->
1090    [Before],
1091    { emphasis_start_sep(Before) }.
1092
1093emphasis_start_sep(' ').
1094emphasis_start_sep('<').
1095emphasis_start_sep('{').
1096emphasis_start_sep('(').
1097emphasis_start_sep('[').
1098emphasis_start_sep(',').
1099emphasis_start_sep(':').
1100emphasis_start_sep(';').
1101
1102emphasis_start(Which), [w(Word)] -->
1103    emphasis(Which),
1104    [w(Word)].
1105
1106emphasis(**)   --> [*, *].
1107emphasis(*)    --> [*].
1108emphasis('__') --> ['_', '_'].
1109emphasis('_')  --> ['_'].
1110
1111emphasis_end(Which), [After] -->
1112    emphasis(Which),
1113    [ After ],
1114    !,
1115    { After \= w(_) }.
1116emphasis_end(Which) -->
1117    emphasis(Which).
1118
1119% these characters should not be before a closing * or _.
1120
1121emphasis_after_sep(' ').
1122emphasis_after_sep('(').
1123emphasis_after_sep('[').
1124emphasis_after_sep('<').
1125emphasis_after_sep('=').
1126emphasis_after_sep('+').
1127emphasis_after_sep('\\').
1128emphasis_after_sep('@').
1129
1130
1131%!  arg_list(-Atoms) is nondet.
1132%
1133%   Atoms  is  a  token-list  for  a    Prolog   argument  list.  An
1134%   argument-list is a sequence of tokens '(' ... ')'.
1135%
1136%   @bug    the current implementation does not deal correctly with
1137%           brackets that are embedded in quoted strings.
1138
1139arg_list(['('|T]) -->
1140    ['('], arg_list_close(T, 1).
1141
1142arg_list_close(Tokens, Depth) -->
1143    [')'],
1144    !,
1145    (   { Depth == 1 }
1146    ->  { Tokens = [')'] }
1147    ;   { Depth > 1 }
1148    ->  { Tokens = [')'|More],
1149          NewDepth is Depth - 1
1150        },
1151        arg_list_close(More, NewDepth)
1152    ).
1153arg_list_close(['('|T], Depth) -->
1154    ['('], { NewDepth is Depth+1 },
1155    arg_list_close(T, NewDepth).
1156arg_list_close([H|T], Depth) -->
1157    [w(H)],
1158    !,
1159    arg_list_close(T, Depth).
1160arg_list_close([H|T], Depth) -->
1161    [H],
1162    arg_list_close(T, Depth).
1163
1164
1165%!  term_face(+Text, +Term, +Vars, -Face) is semidet.
1166%
1167%   Process embedded Prolog-terms. Currently   processes  Alias(Arg)
1168%   terms that refer to files.  Future   versions  will also provide
1169%   pretty-printing of Prolog terms.
1170
1171term_face(_Text, Term, _Vars, \file(Name, FileOptions)) :-
1172    ground(Term),
1173    compound(Term),
1174    compound_name_arguments(Term, Alias, [_]),
1175    user:file_search_path(Alias, _),
1176    existing_file(Term, FileOptions, []),
1177    !,
1178    format(atom(Name), '~q', [Term]).
1179term_face(Text, Term, Vars, Face) :-
1180    code_face(Text, Term, Vars, Face).
1181
1182untag([], []).
1183untag([w(W)|T0], [W|T]) :-
1184    !,
1185    untag(T0, T).
1186untag([H|T0], [H|T]) :-
1187    untag(T0, T).
1188
1189%!  image_label(-Label)//
1190%
1191%   Match File[;param=value[,param=value]*]
1192
1193image_label(\include(Name, image, Options)) -->
1194    file_name(Base, Ext),
1195    { autolink_extension(Ext, image),
1196      file_name_extension(Base, Ext, Name),
1197      resolve_file(Name, Options, RestOptions)
1198    },
1199    file_options(RestOptions).
1200
1201
1202%!  file_options(-Options) is det.
1203%
1204%   Extracts additional processing options for  files. The format is
1205%   ;name="value",name2=value2,... Spaces are not allowed.
1206
1207file_options(Options) -->
1208    [;], nv_pairs(Options),
1209    !.
1210file_options([]) -->
1211    [].
1212
1213nv_pairs([H|T]) -->
1214    nv_pair(H),
1215    (   [',']
1216    ->  nv_pairs(T)
1217    ;   {T=[]}
1218    ).
1219
1220nv_pair(Option) -->
1221    [ w(Name), =,'"'], tokens(Tokens), ['"'],
1222    !,
1223    { untag(Tokens, Atoms),
1224      atomic_list_concat(Atoms, Value0),
1225      (   atom_number(Value0, Value)
1226      ->  true
1227      ;   Value = Value0
1228      ),
1229      Option =.. [Name,Value]
1230    }.
1231
1232
1233%!  wiki_link(-Link, +Options)// is semidet.
1234%
1235%   True if we can find a link to a file or URL. Links are described
1236%   as one of:
1237%
1238%       $ filename :
1239%       A filename defined using autolink_file/2 or
1240%       autolink_extension/2
1241%       $ <url-protocol>://<rest-url> :
1242%       A fully qualified URL
1243%       $ '<' URL '>' :
1244%       Be more relaxed on the URL specification.
1245
1246:- multifile
1247    user:url_path/2.
1248
1249wiki_link(\file(Name, FileOptions), Options) -->
1250    file_name(Base, Ext),
1251    { file_name_extension(Base, Ext, Name),
1252      (   autolink_file(Name, _)
1253      ;   autolink_extension(Ext, _)
1254      ),
1255      !,
1256      resolve_file(Name, FileOptions, Options)
1257    }.
1258wiki_link(\file(Name, FileOptions), Options) -->
1259    [w(Name)],
1260    { autolink_file(Name, _),
1261      !,
1262      resolve_file(Name, FileOptions, Options)
1263    },
1264    !.
1265wiki_link(a(href(Ref), Label), Options) -->
1266    [ w(Prot),:,/,/], { url_protocol(Prot) },
1267    { option(end(End), Options, space)
1268    },
1269    tokens_no_whitespace(Rest), peek_end_url(End),
1270    !,
1271    { atomic_list_concat([Prot, :,/,/ | Rest], Ref),
1272      option(label(Label), Options, Ref)
1273    }.
1274wiki_link(a(href(Ref), Label), _Options) -->
1275    [<, w(Alias), :],
1276    tokens_no_whitespace(Rest), [>],
1277    { Term = (Alias:Rest),
1278      prolog:url_expansion_hook(Term, Ref, Label), !
1279    }.
1280wiki_link(a(href(Ref), Label), Options) -->
1281    [<, w(Alias), :],
1282    { user:url_path(Alias, _)
1283    },
1284    tokens_no_whitespace(Rest), [>],
1285    { atomic_list_concat(Rest, Local),
1286      (   Local == ''
1287      ->  Term =.. [Alias,'.']
1288      ;   Term =.. [Alias,Local]
1289      ),
1290      catch(expand_url_path(Term, Ref), _, fail),
1291      option(label(Label), Options, Ref)
1292    }.
1293wiki_link(a(href(Ref), Label), Options) -->
1294    [<],
1295    (   { option(relative(true), Options),
1296          Parts = Rest
1297        }
1298    ->  tokens_no_whitespace(Rest)
1299    ;   { Parts = [Prot, : | Rest]
1300        },
1301        [w(Prot), :], tokens_no_whitespace(Rest)
1302    ),
1303    [>],
1304    !,
1305    { atomic_list_concat(Parts, Ref),
1306      option(label(Label), Options, Ref)
1307    }.
1308
1309%!  prolog:url_expansion_hook(+Term, -HREF, -Label) is semidet.
1310%
1311%   This hook is called after   recognising  =|<Alias:Rest>|=, where
1312%   Term is of the form Alias(Rest). If   it  succeeds, it must bind
1313%   HREF to an atom or string representing the link target and Label
1314%   to an html//1 expression for the label.
1315
1316%!  file_name(-Name:atom, -Ext:atom)// is semidet.
1317%
1318%   Matches a filename.  A filename is defined as a sequence
1319%   <segment>{/<segment}.<ext>.
1320
1321file_name(FileBase, Extension) -->
1322    segment(S1),
1323    segments(List),
1324    ['.'], file_extension(Extension),
1325    !,
1326    { atomic_list_concat([S1|List], '/', FileBase) }.
1327
1328segment(..) -->
1329    ['.','.'],
1330    !.
1331segment(Word) -->
1332    [w(Word)].
1333segment(Dir) -->
1334    [w(Word),'.',w(d)],
1335    { atom_concat(Word, '.d', Dir) }.
1336
1337segments([H|T]) -->
1338    ['/'],
1339    !,
1340    segment(H),
1341    segments(T).
1342segments([]) -->
1343    [].
1344
1345file_extension(Ext) -->
1346    [w(Ext)],
1347    { autolink_extension(Ext, _)
1348    }.
1349
1350
1351%!  resolve_file(+Name, -Options, ?RestOptions) is det.
1352%
1353%   Find the actual file based on the pldoc_file global variable. If
1354%   present  and  the   file   is    resolvable,   add   an   option
1355%   absolute_path(Path) that reflects the current   location  of the
1356%   file.
1357
1358resolve_file(Name, Options, Rest) :-
1359    existing_file(Name, Options, Rest),
1360    !.
1361resolve_file(_, Options, Options).
1362
1363
1364existing_file(Name, Options, Rest) :-
1365    catch(existing_file_p(Name, Options, Rest), _, fail).
1366
1367existing_file_p(Name, Options, Rest) :-
1368    nb_current(pldoc_file, RelativeTo),
1369    RelativeTo \== [],
1370    (   compound(Name)
1371    ->  Extra = [file_type(prolog)]
1372    ;   Extra = []
1373    ),
1374    absolute_file_name(Name, Path,
1375                       [ relative_to(RelativeTo),
1376                         access(read),
1377                         file_errors(fail)
1378                       | Extra
1379                       ]),
1380    Options = [ absolute_path(Path) | Rest ].
1381
1382%!  arity(-Arity:int)// is semidet.
1383%
1384%   True if the next token can be  interpreted as an arity. That is,
1385%   refers to a non-negative integers of at most 20. Although Prolog
1386%   allows for higher arities, we assume 20   is  a fair maximum for
1387%   user-created predicates that are documented.
1388
1389arity(Arity) -->
1390    [ w(Word) ],
1391    { catch(atom_number(Word, Arity), _, fail),
1392      Arity >= 0, Arity < 20
1393    }.
1394
1395%!  symbol_string(-String)// is nondet
1396%
1397%   Accept a sequence of Prolog symbol characters, starting with the
1398%   shortest (empty) match.
1399
1400symbol_string([]) -->
1401    [].
1402symbol_string([H|T]) -->
1403    [H],
1404    { prolog_symbol_char(H) },
1405    symbol_string(T).
1406
1407prolog_symbol_char(C) -->
1408    [C],
1409    { prolog_symbol_char(C) }.
1410
1411%!  prolog_symbol_char(?Char)
1412%
1413%   True if char is classified by Prolog as a symbol char.
1414
1415prolog_symbol_char(#).
1416prolog_symbol_char($).
1417prolog_symbol_char(&).
1418prolog_symbol_char(*).
1419prolog_symbol_char(+).
1420prolog_symbol_char(-).
1421prolog_symbol_char(.).
1422prolog_symbol_char(/).
1423prolog_symbol_char(:).
1424prolog_symbol_char(<).
1425prolog_symbol_char(=).
1426prolog_symbol_char(>).
1427prolog_symbol_char(?).
1428prolog_symbol_char(@).
1429prolog_symbol_char(\).
1430prolog_symbol_char(^).
1431prolog_symbol_char(~).
1432
1433
1434functor_name(String) :-
1435    sub_atom(String, 0, 1, _, Char),
1436    char_type(Char, lower).
1437
1438url_protocol(http).
1439url_protocol(https).
1440url_protocol(ftp).
1441url_protocol(mailto).
1442
1443peek_end_url(space) -->
1444    peek(End),
1445    { space_token(End) },
1446    !.
1447peek_end_url(space, [], []) :- !.
1448peek_end_url(Token) -->
1449    peek(Token),
1450    !.
1451
1452space_token(' ') :- !.
1453space_token('\r') :- !.
1454space_token('\n') :- !.
1455space_token(T) :-
1456    \+ atom(T),                     % high level format like p(...)
1457    \+ T = w(_).
1458
1459%!  autolink_extension(?Ext, ?Type) is nondet.
1460%
1461%   True if Ext is a filename extensions that create automatic links
1462%   in the documentation.
1463
1464autolink_extension(Ext, Type) :-
1465    prolog:doc_autolink_extension(Ext, Type),
1466    !.
1467autolink_extension(Ext, prolog) :-
1468    user:prolog_file_type(Ext,prolog),
1469    !.
1470autolink_extension(txt, wiki).
1471autolink_extension(md,  wiki).
1472autolink_extension(gif, image).
1473autolink_extension(png, image).
1474autolink_extension(jpg, image).
1475autolink_extension(jpeg, image).
1476autolink_extension(svg, image).
1477
1478%!  autolink_file(?File, -Type) is nondet.
1479%
1480%   Files to which we automatically create links, regardless of the
1481%   extension.
1482
1483autolink_file('README', wiki).
1484autolink_file('TODO', wiki).
1485autolink_file('ChangeLog', wiki).
1486
1487                 /*******************************
1488                 *           SECTIONS           *
1489                 *******************************/
1490
1491%!  section_comment_header(+Lines, -Header, -RestLines) is semidet.
1492%
1493%   Processes   /**   <section>   comments.   Header   is   a   term
1494%   \section(Type, Title), where  Title  is   an  atom  holding  the
1495%   section title and Type is an atom holding the text between <>.
1496%
1497%   @param Lines    List of Indent-Codes.
1498%   @param Header   DOM term of the format \section(Type, Title),
1499%                   where Type is an atom from <type> and Title is
1500%                   a string holding the type.
1501
1502section_comment_header([_-Line|Lines], Header, Lines) :-
1503    phrase(section_line(Header), Line).
1504
1505section_line(\section(Type, Title)) -->
1506    ws, "<", word(Codes), ">", normalise_white_space(TitleCodes),
1507    { atom_codes(Type, Codes),
1508      atom_codes(Title, TitleCodes)
1509    }.
1510
1511
1512%!  normalise_white_space(-Text)// is det.
1513%
1514%   Text is input after deleting leading   and  trailing white space
1515%   and mapping all internal white space to a single space.
1516
1517normalise_white_space(Text) -->
1518    ws,
1519    normalise_white_space2(Text).
1520
1521normalise_white_space2(Text) -->
1522    non_ws(Text, Tail),
1523    ws,
1524    (   eos
1525    ->  { Tail = [] }
1526    ;   { Tail = [0'\s|T2] },
1527        normalise_white_space2(T2)
1528    ).
1529
1530
1531                 /*******************************
1532                 *           TOKENIZER          *
1533                 *******************************/
1534
1535%!  tokenize_lines(+Lines:lines, -TokenLines) is det
1536%
1537%   Convert Indent-Codes into Indent-Tokens
1538
1539tokenize_lines(Lines, TokenLines) :-
1540    tokenize_lines(Lines, -1, TokenLines).
1541
1542tokenize_lines([], _, []) :- !.
1543tokenize_lines(Lines, Indent, [Pre|T]) :-
1544    verbatim(Lines, Indent, Pre, RestLines),
1545    !,
1546    tokenize_lines(RestLines, Indent, T).
1547tokenize_lines([I-H0|T0], Indent0, [I-H|T]) :-
1548    phrase(line_tokens(H), H0),
1549    (   H == []
1550    ->  Indent = Indent0
1551    ;   Indent = I
1552    ),
1553    tokenize_lines(T0, Indent, T).
1554
1555
1556%!  line_tokens(-Tokens:list)// is det.
1557%
1558%   Create a list of tokens, where  is  token   is  either  a ' ' to
1559%   denote spaces, a  term  w(Word)  denoting   a  word  or  an atom
1560%   denoting a punctuation  character.   Underscores  (_)  appearing
1561%   inside an alphanumerical string are considered part of the word.
1562%   E.g., "hello_world_" tokenizes into [w(hello_world), '_'].
1563
1564line_tokens([H|T]) -->
1565    line_token(H),
1566    !,
1567    line_tokens(T).
1568line_tokens([]) -->
1569    [].
1570
1571line_token(T) -->
1572    [C],
1573    (   { code_type(C, space) }
1574    ->  ws,
1575        { T = ' ' }
1576    ;   { code_type(C, alnum) },
1577        word(Rest),
1578        { atom_codes(W, [C|Rest]),
1579          T = w(W)
1580        }
1581    ;   { char_code(T, C) }
1582    ).
1583
1584word([C0|T]) -->
1585    [C0],  { code_type(C0, alnum) },
1586    !,
1587    word(T).
1588word([0'_, C1|T]) -->
1589    [0'_, C1],  { code_type(C1, alnum) },
1590    !,
1591    word(T).
1592word([]) -->
1593    [].
1594
1595alphas([C0|T]) -->
1596    [C0],  { code_type(C0, alpha) },
1597    !,
1598    alphas(T).
1599alphas([]) -->
1600    [].
1601
1602%!  verbatim(+Lines, +EnvIndent, -Pre, -RestLines) is det.
1603%
1604%   Extract a verbatim environment.  The  returned   Pre  is  of the
1605%   format pre(Attributes, String). The indentation   of the leading
1606%   fence is substracted from the indentation of the verbatim lines.
1607%   Two types of fences are supported:   the  traditional =|==|= and
1608%   the Doxygen =|~~~|= (minimum  3   =|~|=  characters), optionally
1609%   followed by =|{.ext}|= to indicate the language.
1610%
1611%   Verbatim environment is delimited as
1612%
1613%     ==
1614%       ...,
1615%       verbatim(Lines, Pre, Rest)
1616%       ...,
1617%     ==
1618%
1619%   In addition, a verbatim environment may  simply be indented. The
1620%   restrictions are described in the documentation.
1621
1622verbatim(Lines, _,
1623         Indent-pre([class(code), ext(Ext)],Pre),
1624         RestLines) :-
1625    skip_empty_lines(Lines, [Indent-FenceLine|CodeLines]),
1626    verbatim_fence(FenceLine, Fence, Ext),
1627    verbatim_body(CodeLines, Indent, [10|PreCodes], [],
1628                  [Indent-Fence|RestLines]),
1629    !,
1630    atom_codes(Pre, PreCodes).
1631verbatim([_-[],Indent-Line|Lines], EnvIndent,
1632         Indent-pre(class(code),Pre),
1633         RestLines) :-
1634    EnvIndent >= 0,
1635    Indent >= EnvIndent+4, Indent =< EnvIndent+8,
1636    valid_verbatim_opening(Line),
1637    indented_verbatim_body([Indent-Line|Lines], Indent,
1638                           CodeLines, RestLines),
1639    !,
1640    lines_code_text(CodeLines, Indent, [10|PreCodes]),
1641    atom_codes(Pre, PreCodes).
1642
1643verbatim_body(Lines, _, PreT, PreT, Lines).
1644verbatim_body([I-L|Lines], Indent, [10|Pre], PreT, RestLines) :-
1645    PreI is I - Indent,
1646    phrase(pre_indent(PreI), Pre, PreT0),
1647    verbatim_line(L, PreT0, PreT1),
1648    verbatim_body(Lines, Indent, PreT1, PreT, RestLines).
1649
1650verbatim_fence(Line, Fence, '') :-
1651    Line == [0'=,0'=],
1652    !,
1653    Fence = Line.
1654verbatim_fence(Line, Fence, Ext) :-
1655    tilde_fence(Line, Fence, 0, Ext).
1656verbatim_fence(Line, Fence, Ext) :-
1657    md_fence(Line, Fence, 0, Ext).
1658
1659tilde_fence([0'~|T0], [0'~|F0], C0, Ext) :-
1660    !,
1661    C1 is C0+1,
1662    tilde_fence(T0, F0, C1, Ext).
1663tilde_fence(List, [], C, Ext) :-
1664    C >= 3,
1665    (   List == []
1666    ->  Ext = ''
1667    ;   phrase(tilde_fence_ext(ExtCodes), List)
1668    ->  atom_codes(Ext, ExtCodes)
1669    ).
1670
1671%!  tilde_fence_ext(-Ext)// is semidet.
1672%
1673%   Detect ```{.prolog} (Doxygen) or ```{prolog} (GitHub)
1674
1675tilde_fence_ext(Ext) -->
1676    "{.", !, alphas(Ext), "}".
1677tilde_fence_ext(Ext) -->
1678    "{", alphas(Ext), "}".
1679
1680md_fence([0'`|T0], [0'`|F0], C0, Ext) :-
1681    !,
1682    C1 is C0+1,
1683    md_fence(T0, F0, C1, Ext).
1684md_fence(List, [], C, Ext) :-
1685    C >= 3,
1686    (   List == []
1687    ->  Ext = ''
1688    ;   phrase(md_fence_ext(ExtCodes), List),
1689        atom_codes(Ext, ExtCodes)
1690    ).
1691
1692% Also support Doxygen's curly bracket notation.
1693md_fence_ext(Ext) -->
1694    tilde_fence_ext(Ext),
1695    !.
1696% In Markdown language names appear without brackets.
1697md_fence_ext(Ext) -->
1698    alphas(Ext).
1699
1700%!  indented_verbatim_body(+Lines, +Indent, -CodeLines, -RestLines)
1701%
1702%   Takes more verbatim lines. The input   ends  with the first line
1703%   that is indented less than Indent. There cannot be more than one
1704%   consequtive empty line in the verbatim body.
1705
1706indented_verbatim_body([I-L|T0], Indent, [I-L|T], RestLines) :-
1707    L \== [], I >= Indent,
1708    !,
1709    indented_verbatim_body(T0, Indent, T, RestLines).
1710indented_verbatim_body([I0-[],I-L|T0], Indent, [I0-[],I-L|T], RestLines) :-
1711    I >= Indent,
1712    valid_verbatim_opening(L),
1713    indented_verbatim_body(T0, Indent, T, RestLines).
1714indented_verbatim_body(Lines, _, [], Lines).
1715
1716%!  valid_verbatim_opening(+Line) is semidet.
1717%
1718%   Tests that line does not look like a list item or table.
1719
1720valid_verbatim_opening([0'||_]) :- !, fail.
1721valid_verbatim_opening(Line) :-
1722    Line \== [],
1723    \+ ( phrase(line_tokens(Tokens), Line),
1724         list_item_prefix(_Type, Tokens, _Rest)
1725       ).
1726
1727%!  lines_code_text(+Lines, +Indent, -Codes) is det.
1728%
1729%   Extract the actual code content from a list of line structures.
1730
1731lines_code_text([], _, []).
1732lines_code_text([_-[]|T0], Indent, [10|T]) :-
1733    !,
1734    lines_code_text(T0, Indent, T).
1735lines_code_text([I-Line|T0], Indent, [10|T]) :-
1736    PreI is I-Indent,
1737    phrase(pre_indent(PreI), T, T1),
1738    verbatim_line(Line, T1, T2),
1739    lines_code_text(T0, Indent, T2).
1740
1741
1742%!  pre_indent(+Indent)// is det.
1743%
1744%   Insert Indent leading spaces.  Note we cannot use tabs as these
1745%   are not expanded by the HTML <pre> element.
1746
1747pre_indent(N) -->
1748    { N > 0,
1749      !,
1750      N2 is N - 1
1751    }, " ",
1752    pre_indent(N2).
1753pre_indent(_) -->
1754    "".
1755
1756verbatim_line(Line, Pre, PreT) :-
1757    append(Line, PreT, Pre).
1758
1759
1760                 /*******************************
1761                 *            SUMMARY           *
1762                 *******************************/
1763
1764%!  summary_from_lines(+Lines:lines, -Summary:list(codes)) is det.
1765%
1766%   Produce a summary for Lines. Similar  to JavaDoc, the summary is
1767%   defined as the first sentence of the documentation. In addition,
1768%   a sentence is also ended by an  empty   line  or  the end of the
1769%   comment.
1770
1771summary_from_lines(Lines, Sentence) :-
1772    skip_empty_lines(Lines, Lines1),
1773    summary2(Lines1, Sentence0),
1774    end_sentence(Sentence0, Sentence).
1775
1776summary2(_, Sentence) :-
1777    Sentence == [],
1778    !.              % we finished our sentence
1779summary2([], []) :- !.
1780summary2([_-[]|_], []) :- !.            % empty line
1781summary2([_-[0'@|_]|_], []) :- !.       % keyword line
1782summary2([_-L0|Lines], Sentence) :-
1783    phrase(sentence(Sentence, Tail), L0, _),
1784    summary2(Lines, Tail).
1785
1786sentence([C,End], []) -->
1787    [C,End],
1788    { \+ code_type(C, period),
1789      code_type(End, period)                % ., !, ?
1790    },
1791    white,
1792    !.
1793sentence([0' |T0], T) -->
1794    space,
1795    !,
1796    ws,
1797    sentence(T0, T).
1798sentence([H|T0], T) -->
1799    [H],
1800    sentence(T0, T).
1801sentence([0' |T], T) -->                % '
1802    eos.
1803
1804white -->
1805    space.
1806white -->
1807    eos.
1808
1809%!  skip_empty_lines(+LinesIn, -LinesOut) is det.
1810%
1811%   Remove empty lines from the start of the input.  Note that
1812%   this is used both to process character and token data.
1813
1814skip_empty_lines([], []).
1815skip_empty_lines([_-[]|Lines0], Lines) :-
1816    !,
1817    skip_empty_lines(Lines0, Lines).
1818skip_empty_lines(Lines, Lines).
1819
1820end_sentence([], []).
1821end_sentence([0'\s], [0'.]) :- !.
1822end_sentence([H|T0], [H|T]) :-
1823    end_sentence(T0, T).
1824
1825
1826                 /*******************************
1827                 *        CREATE LINES          *
1828                 *******************************/
1829
1830%!  indented_lines(+Text:list(codes), +Prefixes:list(codes),
1831%!                 -Lines:list) is det.
1832%
1833%   Extract a list of lines  without   leading  blanks or characters
1834%   from Prefix from Text. Each line   is a term Indent-Codes, where
1835%   Indent specifies the line_position of the real text of the line.
1836
1837indented_lines(Comment, Prefixes, Lines) :-
1838    must_be(codes, Comment),
1839    phrase(split_lines(Prefixes, Lines), Comment),
1840    !.
1841
1842split_lines(_, []) -->
1843    end_of_comment.
1844split_lines(Prefixes, [Indent-L1|Ls]) -->
1845    take_prefix(Prefixes, 0, Indent0),
1846    white_prefix(Indent0, Indent),
1847    take_line(L1),
1848    split_lines(Prefixes, Ls).
1849
1850
1851%!  end_of_comment//
1852%
1853%   Succeeds if we hit the end of the comment.
1854%
1855%   @bug    %*/ will be seen as the end of the comment.
1856
1857end_of_comment -->
1858    eos.
1859end_of_comment -->
1860    ws, stars, "*/".
1861
1862stars --> [].
1863stars --> "*", !, stars.
1864
1865
1866%!  take_prefix(+Prefixes:list(codes), +Indent0:int, -Indent:int)// is det.
1867%
1868%   Get the leading characters  from  the   input  and  compute  the
1869%   line-position at the end of the leading characters.
1870
1871take_prefix(Prefixes, I0, I) -->
1872    { member(Prefix, Prefixes),
1873      string_codes(Prefix, PrefixCodes)
1874    },
1875    prefix(PrefixCodes),
1876    !,
1877    { string_update_linepos(PrefixCodes, I0, I) }.
1878take_prefix(_, I, I) -->
1879    [].
1880
1881prefix([]) --> [].
1882prefix([H|T]) --> [H], prefix(T).
1883
1884white_prefix(I0, I) -->
1885    [C],
1886    {  code_type(C, white),
1887       !,
1888       update_linepos(C, I0, I1)
1889    },
1890    white_prefix(I1, I).
1891white_prefix(I, I) -->
1892    [].
1893
1894%!  string_update_linepos(+Codes, +Pos0, -Pos) is det.
1895%
1896%   Update line-position after adding Codes at Pos0.
1897
1898string_update_linepos([], I, I).
1899string_update_linepos([H|T], I0, I) :-
1900    update_linepos(H, I0, I1),
1901    string_update_linepos(T, I1, I).
1902
1903%!  update_linepos(+Code, +Pos0, -Pos) is det.
1904%
1905%   Update line-position after adding Code.
1906%
1907%   @tbd    Currently assumes tab-width of 8.
1908
1909update_linepos(0'\t, I0, I) :-
1910    !,
1911    I is (I0\/7)+1.
1912update_linepos(0'\b, I0, I) :-
1913    !,
1914    I is max(0, I0-1).
1915update_linepos(0'\r, _, 0) :- !.
1916update_linepos(0'\n, _, 0) :- !.
1917update_linepos(_, I0, I) :-
1918    I is I0 + 1.
1919
1920%!  take_line(-Line:codes)// is det.
1921%
1922%   Take  a  line  from  the  input.   Line  does  not  include  the
1923%   terminating \r or \n character(s), nor trailing whitespace.
1924
1925take_line([]) -->
1926    "\r\n",
1927    !.                      % DOS file
1928take_line([]) -->
1929    "\n",
1930    !.                        % Unix file
1931take_line(Line) -->
1932    [H], { code_type(H, white) },
1933    !,
1934    take_white(White, WT),
1935    (   nl
1936    ->  { Line = [] }
1937    ;   { Line = [H|White] },
1938        take_line(WT)
1939    ).
1940take_line([H|T]) -->
1941    [H],
1942    !,
1943    take_line(T).
1944take_line([]) -->                       % end of string
1945    [].
1946
1947take_white([H|T0], T) -->
1948    [H],  { code_type(H, white) },
1949    !,
1950    take_white(T0, T).
1951take_white(T, T) -->
1952    [].
1953
1954%!  normalise_indentation(+LinesIn, -LinesOut) is det.
1955%
1956%   Re-normalise the indentation, such that the  lef-most line is at
1957%   zero.  Note that we skip empty lines in the computation.
1958
1959normalise_indentation(Lines0, Lines) :-
1960    skip_empty_lines(Lines0, Lines1),
1961    Lines1 = [I0-_|Lines2],
1962    !,
1963    smallest_indentation(Lines2, I0, Subtract),
1964    (   Subtract == 0
1965    ->  Lines = Lines0
1966    ;   maplist(substract_indent(Subtract), Lines0, Lines)
1967    ).
1968normalise_indentation(Lines, Lines).
1969
1970smallest_indentation([], I, I).
1971smallest_indentation([_-[]|T], I0, I) :-
1972    !,
1973    smallest_indentation(T, I0, I).
1974smallest_indentation([X-_|T], I0, I) :-
1975    I1 is min(I0, X),
1976    smallest_indentation(T, I1, I).
1977
1978substract_indent(Subtract, I0-L, I-L) :-
1979    I is max(0,I0-Subtract).
1980
1981
1982                 /*******************************
1983                 *             MISC             *
1984                 *******************************/
1985
1986%!  strip_leading_par(+Dom0, -Dom) is det.
1987%
1988%   Remove the leading paragraph for  environments where a paragraph
1989%   is not required.
1990
1991strip_leading_par([p(C)|T], L) :-
1992    !,
1993    append(C, T, L).
1994strip_leading_par(L, L).
1995
1996
1997                 /*******************************
1998                 *           DCG BASICS         *
1999                 *******************************/
2000
2001%!  eos// is det
2002%
2003%   Peek at end of input
2004
2005eos([], []).
2006
2007%!  ws// is det
2008%
2009%   Eagerly skip layout characters
2010
2011ws -->
2012    [C], {code_type(C, space)},
2013    !,
2014    ws.
2015ws -->
2016    [].
2017
2018%       space// is det
2019%
2020%       True if then next code is layout.
2021
2022space -->
2023    [C],
2024    {code_type(C, space)}.
2025
2026%!  non_ws(-Text, ?Tail) is det.
2027%
2028%   True if the  difference  list  Text-Tail   is  the  sequence  of
2029%   non-white-space characters.
2030
2031non_ws([H|T0], T) -->
2032    [H],
2033    { \+ code_type(H, space) },
2034    !,
2035    non_ws(T0, T).
2036non_ws(T, T) -->
2037    [].
2038
2039
2040%!  nl//
2041%
2042%   Get end-of-line
2043
2044nl -->
2045    "\r\n",
2046    !.
2047nl -->
2048    "\n".
2049
2050%!  peek(H)//
2051%
2052%   True if next token is H without eating it.
2053
2054peek(H, L, L) :-
2055    L = [H|_].
2056
2057%!  tokens(-Tokens:list)// is nondet.
2058%!  tokens(+Max, -Tokens:list)// is nondet.
2059%
2060%   Defensively take tokens from the input.  Backtracking takes more
2061%   tokens.  Do not include structure terms.
2062
2063tokens([]) --> [].
2064tokens([H|T]) --> token(H), tokens(T).
2065
2066tokens(_, []) --> [].
2067tokens(C, [H|T]) --> token(H), {succ(C1, C)}, tokens(C1, T).
2068
2069%!  tokens_no_whitespace(-Tokens:list(atom))// is nondet.
2070%
2071%   Defensively take tokens from the  input. Backtracking takes more
2072%   tokens.  Tokens  cannot  include  whitespace.  Word  tokens  are
2073%   returned as their represented words.
2074
2075tokens_no_whitespace([]) -->
2076    [].
2077tokens_no_whitespace([Word|T]) -->
2078    [ w(Word) ],
2079    !,
2080    tokens_no_whitespace(T).
2081tokens_no_whitespace([H|T]) -->
2082    [H],
2083    { \+ space_token(H) },
2084    tokens_no_whitespace(T).
2085
2086token(Token) -->
2087    [Token],
2088    { token(Token) }.
2089
2090token(w(_)) :- !.
2091token(Token) :- atom(Token).
2092
2093%!  limit(+Count, :Rule)//
2094%
2095%   As limit/2, but for grammar rules.
2096
2097:- meta_predicate limit(+,2,?,?).
2098
2099limit(Count, Rule, Input, Rest) :-
2100    Count > 0,
2101    State = count(0),
2102    call(Rule, Input, Rest),
2103    arg(1, State, N0),
2104    N is N0+1,
2105    (   N =:= Count
2106    ->  !
2107    ;   nb_setarg(1, State, N)
2108    ).
2109
2110
2111                 /*******************************
2112                 *           MESSAGES           *
2113                 *******************************/
2114
2115:- multifile
2116    prolog:message//1.
2117
2118prolog:message(pldoc(deprecated_tag(Name, Tag))) -->
2119    [ 'PlDoc: Deprecated tag @~w (use @~w)'-[Name, Tag]
2120    ].
2121prolog:message(pldoc(unknown_tag(Name))) -->
2122    [ 'PlDoc: unknown tag @~w'-[Name]
2123    ].