View source with raw comments or as raw
   1/*  Part of SWI-Prolog
   2
   3    Author:        Jan Wielemaker
   4    E-mail:        J.Wielemaker@vu.nl
   5    WWW:           http://www.swi-prolog.org
   6    Copyright (c)  2010-2013, University of Amsterdam
   7    All rights reserved.
   8
   9    Redistribution and use in source and binary forms, with or without
  10    modification, are permitted provided that the following conditions
  11    are met:
  12
  13    1. Redistributions of source code must retain the above copyright
  14       notice, this list of conditions and the following disclaimer.
  15
  16    2. Redistributions in binary form must reproduce the above copyright
  17       notice, this list of conditions and the following disclaimer in
  18       the documentation and/or other materials provided with the
  19       distribution.
  20
  21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  32    POSSIBILITY OF SUCH DAMAGE.
  33*/
  34
  35:- module(rdf_ntriples_old,
  36          [ load_rdf_ntriples/2,        % +File, -Triples
  37            rdf_ntriple_part/4          % +Field, -Value, <DCG>
  38          ]).
  39
  40
  41/** <module> RDF N-triples parser (obsolete)
  42
  43This module parses n-triple files as defined   by the W3C RDF working in
  44http://www.w3.org/TR/rdf-testcases/#ntriples.   This   format     is   a
  45simplified version of the RDF N3 notation   used  in the *.nt files that
  46are used to describe the normative outcome of the RDF test-cases.
  47
  48The returned list terms are of the form
  49
  50        rdf(Subject, Predicate, Object)
  51
  52where
  53
  54        * Subject
  55        is an atom or node(Id) for anonymous nodes
  56        * Predicate
  57        is an atom
  58        * Object
  59        is an atom, node(Id), literal(Atom) or xml(Atom)
  60
  61@deprecated     This library will shortly be replaced with a stub that
  62                calls library(semweb/rdf_ntriples).
  63*/
  64
  65%       load_rdf_ntriples(+Source, -Triples)
  66%
  67%       Load a file or stream to a list of rdf(S,P,O) triples.
  68
  69load_rdf_ntriples(File, Triples) :-
  70    open_nt_file(File, In, Close),
  71    call_cleanup(stream_to_triples(In, Triples), Close).
  72
  73%       open_nt_file(+Input, -Stream, -Close)
  74%
  75%       Open Input, returning Stream and a goal to cleanup Stream if it
  76%       was opened.
  77
  78open_nt_file(stream(Stream), Stream, true) :- !.
  79open_nt_file(Stream, Stream, true) :-
  80    is_stream(Stream),
  81    !.
  82open_nt_file(Spec, Stream, close(Stream)) :-
  83    absolute_file_name(Spec,
  84                       [ access(read),
  85                         extensions([nt,''])
  86                       ], Path),
  87    open(Path, read, Stream).
  88
  89
  90%       rdf_ntriple_part(+Type, -Value, <DCG>)
  91%
  92%       Parse one of the fields of  an   ntriple.  This  is used for the
  93%       SWI-Prolog Sesame (www.openrdf.org) implementation   to  realise
  94%       /servlets/removeStatements. I do not think   public  use of this
  95%       predicate should be stimulated.
  96
  97rdf_ntriple_part(subject, Subject) -->
  98    subject(Subject).
  99rdf_ntriple_part(predicate, Predicate) -->
 100    predicate(Predicate).
 101rdf_ntriple_part(object, Object) -->
 102    predicate(Object).
 103
 104
 105%       stream_to_triples(+Stream, -ListOfTriples)
 106%
 107%       Read Stream, returning all its triples
 108
 109stream_to_triples(In, Triples) :-
 110    read_line_to_codes(In, Line),
 111    (   Line == end_of_file
 112    ->  Triples = []
 113    ;   phrase(line(Triples, Tail), Line),
 114        stream_to_triples(In, Tail)
 115    ).
 116
 117line(Triples, Tail) -->
 118    wss,
 119    (   comment
 120    ->  {Triples = Tail}
 121    ;   triple(Triple)
 122    ->  {Triples = [Triple|Tail]}
 123    ).
 124
 125comment -->
 126    "#",
 127    !,
 128    skip_rest.
 129comment -->
 130    end_of_input.
 131
 132triple(rdf(Subject, Predicate, Object)) -->
 133    subject(Subject), ws, wss,
 134    predicate(Predicate), ws, wss,
 135    object(Object), wss, ".", wss.
 136
 137subject(Subject) -->
 138    uniref(Subject),
 139    !.
 140subject(Subject) -->
 141    node_id(Subject).
 142
 143predicate(Predicate) -->
 144    uniref(Predicate).
 145
 146object(Object) -->
 147    uniref(Object),
 148    !.
 149object(Object) -->
 150    node_id(Object).
 151object(Object) -->
 152    literal(Object).
 153
 154
 155uniref(URI) -->
 156    "<",
 157    escaped_uri_codes(Codes),
 158    ">",
 159    !,
 160    { atom_codes(URI, Codes)
 161    }.
 162
 163node_id(node(Id)) -->                   % anonymous nodes
 164    "_:",
 165    name_start(C0),
 166    name_codes(Codes),
 167    { atom_codes(Id, [C0|Codes])
 168    }.
 169
 170literal(Literal) -->
 171    lang_string(Literal),
 172    !.
 173literal(Literal) -->
 174    xml_string(Literal).
 175
 176
 177%       name_start(-Code)
 178%       name_codes(-ListfCodes)
 179%
 180%       Parse identifier names
 181
 182name_start(C) -->
 183    [C],
 184    { code_type(C, alpha)
 185    }.
 186
 187name_codes([C|T]) -->
 188    [C],
 189    { code_type(C, alnum)
 190    },
 191    !,
 192    name_codes(T).
 193name_codes([]) -->
 194    [].
 195
 196
 197%       escaped_uri_codes(-CodeList)
 198%
 199%       Decode string holding %xx escaped characters.
 200
 201escaped_uri_codes([]) -->
 202    [].
 203escaped_uri_codes([C|T]) -->
 204    "%", [D0,D1],
 205    { code_type(D0, xdigit(V0)),
 206      code_type(D1, xdigit(V1)),
 207      !,
 208      C is V0<<4 + V1
 209    },
 210    escaped_uri_codes(T).
 211escaped_uri_codes([C|T]) -->
 212    "\\u", [D0,D1,D2,D3],
 213    !,
 214    { code_type(D0, xdigit(V0)),
 215      code_type(D1, xdigit(V1)),
 216      code_type(D2, xdigit(V2)),
 217      code_type(D3, xdigit(V3)),
 218      C is V0<<12 + V1<<8 + V2<<4 + V3
 219    },
 220    escaped_uri_codes(T).
 221escaped_uri_codes([C|T]) -->
 222    "\\U", [D0,D1,D2,D3,D4,D5,D6,D7],
 223    !,
 224    { code_type(D0, xdigit(V0)),
 225      code_type(D1, xdigit(V1)),
 226      code_type(D2, xdigit(V2)),
 227      code_type(D3, xdigit(V3)),
 228      code_type(D4, xdigit(V4)),
 229      code_type(D5, xdigit(V5)),
 230      code_type(D6, xdigit(V6)),
 231      code_type(D7, xdigit(V7)),
 232      C is V0<<28 + V1<<24 + V2<<20 + V3<<16 +
 233           V4<<12 + V5<<8 + V6<<4 + V7
 234    },
 235    escaped_uri_codes(T).
 236escaped_uri_codes([C|T]) -->
 237    [C],
 238    escaped_uri_codes(T).
 239
 240
 241%       lang_string()
 242%
 243%       Process a language string
 244
 245lang_string(String) -->
 246    "\"",
 247    string(Codes),
 248    "\"",
 249    !,
 250    { atom_codes(Atom, Codes)
 251    },
 252    (   langsep
 253    ->  language(Lang),
 254        { String = literal(lang(Lang, Atom))
 255        }
 256    ;   "^^"
 257    ->  uniref(Type),
 258        { String = literal(type(Type, Atom))
 259        }
 260    ;   { String = literal(Atom)
 261        }
 262    ).
 263
 264langsep -->
 265    "-".
 266langsep -->
 267    "@".
 268
 269%       xml_string(String)
 270%
 271%       Handle xml"..."
 272
 273xml_string(xml(String)) -->
 274    "xml\"",                        % really no whitespace?
 275    string(Codes),
 276    "\"",
 277    { atom_codes(String, Codes)
 278    }.
 279
 280string([]) -->
 281    [].
 282string([C0|T]) -->
 283    string_char(C0),
 284    string(T).
 285
 286string_char(0'\\) -->
 287    "\\\\".
 288string_char(0'") -->
 289    "\\\"".
 290string_char(10) -->
 291    "\\n".
 292string_char(13) -->
 293    "\\r".
 294string_char(9) -->
 295    "\\t".
 296string_char(C) -->
 297    "\\u",
 298    '4xdigits'(C).
 299string_char(C) -->
 300    "\\U",
 301    '4xdigits'(C0),
 302    '4xdigits'(C1),
 303    { C is C0<<16 + C1
 304    }.
 305string_char(C) -->
 306    [C].
 307
 308'4xdigits'(C) -->
 309    [C0,C1,C2,C3],
 310    { code_type(C0, xdigit(V0)),
 311      code_type(C1, xdigit(V1)),
 312      code_type(C2, xdigit(V2)),
 313      code_type(C3, xdigit(V3)),
 314
 315      C is V0<<12 + V1<<8 + V2<<4 + V3
 316    }.
 317
 318%       language(-Lang)
 319%
 320%       Return xml:lang language identifier.
 321
 322language(Lang) -->
 323    lang_code(C0),
 324    lang_codes(Codes),
 325    { atom_codes(Lang, [C0|Codes])
 326    }.
 327
 328lang_code(C) -->
 329    [C],
 330    { C \== 0'.,
 331      \+ code_type(C, white)
 332    }.
 333
 334lang_codes([C|T]) -->
 335    lang_code(C),
 336    !,
 337    lang_codes(T).
 338lang_codes([]) -->
 339    [].
 340
 341
 342                 /*******************************
 343                 *             BASICS           *
 344                 *******************************/
 345
 346skip_rest(_,[]).
 347
 348ws -->
 349    [C],
 350    { code_type(C, white)
 351    }.
 352
 353end_of_input([], []).
 354
 355
 356wss -->
 357    ws,
 358    !,
 359    wss.
 360wss -->
 361    [].