View source with raw comments or as raw
   1/*  Part of SWI-Prolog
   2
   3    Author:        Jeffrey Rosenwald
   4    E-mail:        jeffrose@acm.org
   5    WWW:           http://www.swi-prolog.org
   6    Copyright (c)  2010-2013, Jeffrey Rosenwald
   7    All rights reserved.
   8
   9    Redistribution and use in source and binary forms, with or without
  10    modification, are permitted provided that the following conditions
  11    are met:
  12
  13    1. Redistributions of source code must retain the above copyright
  14       notice, this list of conditions and the following disclaimer.
  15
  16    2. Redistributions in binary form must reproduce the above copyright
  17       notice, this list of conditions and the following disclaimer in
  18       the documentation and/or other materials provided with the
  19       distribution.
  20
  21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  32    POSSIBILITY OF SUCH DAMAGE.
  33*/
  34
  35:- module(protobufs,
  36          [
  37           protobuf_message/2,   % ?Template ?Codes
  38           protobuf_message/3    % ?Template ?Codes ?Rest
  39          ]).
  40
  41/** <module> Google's Protocol Buffers
  42
  43Protocol  buffers  are  Google's    language-neutral,  platform-neutral,
  44extensible mechanism for serializing structured data  --  think XML, but
  45smaller, faster, and simpler. You define how   you  want your data to be
  46structured once. This takes the form of   a  template that describes the
  47data structure. You use this template  to   encode  and decode your data
  48structure into wire-streams that may be sent-to or read-from your peers.
  49The underlying wire stream is platform independent, lossless, and may be
  50used to interwork with a variety of  languages and systems regardless of
  51word size or endianness. Techniques  exist   to  safely extend your data
  52structure without breaking deployed programs   that are compiled against
  53the "old" format.
  54
  55The idea behind Google's  Protocol  Buffers   is  that  you  define your
  56structured messages using a domain-specific language   and  tool set. In
  57SWI-Prolog, you define your message  template   as  a list of predefined
  58Prolog terms that correspond to production  rules in the Definite Clause
  59Grammar (DCG) that realizes the interpreter. Each production rule has an
  60equivalent rule in the  protobuf  grammar.   The  process  is not unlike
  61specifiying the format of a regular  expression. To encode a template to
  62a wire-stream, you pass a grounded template, =X=, and  variable, =Y=, to
  63protobuf_message/2. To decode a wire-stream, =Y=, you pass an ungrounded
  64template, =X=,  along  with  a   grounded    wire-stream,   =Y=,  to
  65protobuf_message/2. The interpreter will unify  the unbound variables in
  66the template with values decoded from the wire-stream.
  67
  68For an overview and tutorial  with examples, see protobufs_overview.txt.
  69Examples of usage may also be found by inspecting test_protobufs.pl.
  70
  71@see http://code.google.com/apis/protocolbuffers
  72@author: Jeffrey Rosenwald (JeffRose@acm.org)
  73@compat: SWI-Prolog
  74*/
  75
  76:- require([ use_foreign_library/1
  77           , atom_codes/2
  78           , call/2
  79           , float32_codes/2
  80           , float64_codes/2
  81           , int32_codes/2
  82           , int64_codes/2
  83           , integer_zigzag/2
  84           , string_codes/2
  85           , succ/2
  86           , between/3
  87           ]).
  88
  89:- use_foreign_library(foreign(protobufs)).
  90:- use_module(library(utf8)).
  91:- use_module(library(error)).
  92:- use_module(library(lists)).
  93
  94wire_type(varint, 0).
  95wire_type(fixed64, 1).
  96wire_type(length_delimited, 2).
  97wire_type(start_group, 3).
  98wire_type(end_group, 4).
  99wire_type(fixed32, 5).
 100
 101%
 102% deal with Google's method of encoding 2's complement integers
 103% such that packed length is proportional to magnitude. We can handle up
 104% to 63 bits, plus sign. Essentially moves sign-bit from MSB to LSB.
 105%
 106:- if(false).  % now done in the C-support code
 107zig_zag(Int, X) :-
 108    integer(Int),
 109    !,
 110    X is (Int << 1) xor (Int >> 63).
 111zig_zag(Int, X) :-
 112    integer(X),
 113    Y is -1 * (X /\ 1),
 114    Int is (X >> 1) xor Y.
 115:- endif.
 116%
 117%  basic wire-type processing handled by C-support code
 118%
 119
 120fixed_int32(X, [A0, A1, A2, A3 | Rest], Rest) :-
 121    int32_codes(X, [A0, A1, A2, A3]).
 122
 123fixed_int64(X, [A0, A1, A2, A3, A4, A5, A6, A7 | Rest], Rest) :-
 124    int64_codes(X, [A0, A1, A2, A3, A4, A5, A6, A7]).
 125
 126fixed_float64(X, [A0, A1, A2, A3, A4, A5, A6, A7 | Rest], Rest) :-
 127    float64_codes(X, [A0, A1, A2, A3, A4, A5, A6, A7]).
 128
 129fixed_float32(X, [A0, A1, A2, A3 | Rest], Rest) :-
 130    float32_codes(X, [A0, A1, A2, A3]).
 131
 132%
 133%   Start of the DCG
 134%
 135
 136code_string(N, Codes, Rest, Rest1) :-
 137    length(Codes, N),
 138    append(Codes, Rest1, Rest),
 139    !.
 140/*
 141code_string(N, Codes) -->
 142        { length(Codes, N)},
 143        Codes, !.
 144*/
 145%
 146% deal with Google's method of packing unsigned integers in variable
 147% length, modulo 128 strings.
 148%
 149% var_int and tag_type productions were rewritten in straight Prolog for
 150% speed's sake.
 151%
 152
 153var_int(A, [A | Rest], Rest) :-
 154    A < 128,
 155    !.
 156var_int(X, [A | Rest], Rest1) :-
 157    nonvar(X),
 158    X1 is X >> 7,
 159    A is 128 + (X /\ 0x7f),
 160    var_int(X1, Rest, Rest1),
 161    !.
 162var_int(X, [A | Rest], Rest1) :-
 163    var_int(X1, Rest, Rest1),
 164    X is (X1 << 7) + A - 128,
 165    !.
 166%
 167%
 168
 169tag_type(Tag, Type, Rest, Rest1) :-
 170    nonvar(Tag), nonvar(Type),
 171    wire_type(Type, X),
 172    A is Tag << 3 \/ X,
 173    var_int(A, Rest, Rest1),
 174    !.
 175tag_type(Tag, Type, Rest, Rest1) :-
 176    var_int(A, Rest, Rest1),
 177    X is A /\ 0x07,
 178    wire_type(Type, X),
 179    Tag is A >> 3.
 180%
 181prolog_type(Tag, double) -->     tag_type(Tag, fixed64).
 182prolog_type(Tag, integer64) -->  tag_type(Tag, fixed64).
 183prolog_type(Tag, float) -->      tag_type(Tag, fixed32).
 184prolog_type(Tag, integer32) -->  tag_type(Tag, fixed32).
 185prolog_type(Tag, integer) -->    tag_type(Tag, varint).
 186prolog_type(Tag, unsigned) -->   tag_type(Tag, varint).
 187prolog_type(Tag, boolean) -->    tag_type(Tag, varint).
 188prolog_type(Tag, enum) -->       tag_type(Tag, varint).
 189prolog_type(Tag, atom) -->       tag_type(Tag, length_delimited).
 190prolog_type(Tag, codes) -->      tag_type(Tag, length_delimited).
 191prolog_type(Tag, utf8_codes) --> tag_type(Tag, length_delimited).
 192prolog_type(Tag, string) -->     tag_type(Tag, length_delimited).
 193prolog_type(Tag, embedded) -->   tag_type(Tag, length_delimited).
 194%
 195%   The protobuf-2.1.0 grammar allows negative values in enums.
 196%   But they are encoded as unsigned in the  golden message.
 197%   Encode as integer and lose. Encode as unsigned and win.
 198%
 199:- meta_predicate enumeration(1,*,*).
 200
 201enumeration(Type) -->
 202    { call(Type, Value) },
 203    payload(unsigned, Value).
 204
 205payload(enum, A) -->
 206    enumeration(A).
 207payload(double,  A) -->
 208    fixed_float64(A).
 209payload(integer64, A) -->
 210    fixed_int64(A).
 211payload(float, A) -->
 212    fixed_float32(A).
 213payload(integer32, A) -->
 214    fixed_int32(A).
 215payload(integer, A) -->
 216    { nonvar(A), integer_zigzag(A,X) },
 217    !,
 218    var_int(X).
 219payload(integer, A) -->
 220    var_int(X),
 221    { integer_zigzag(A, X) }.
 222payload(unsigned, A) -->
 223    {   nonvar(A)
 224    ->  A >= 0
 225    ;   true
 226    },
 227    var_int(A).
 228payload(codes, A) -->
 229    { nonvar(A), !, length(A, Len)},
 230    var_int(Len),
 231    code_string(Len, A).
 232payload(codes, A) -->
 233    var_int(Len),
 234    code_string(Len, A).
 235payload(utf8_codes, A) -->
 236    { nonvar(A),
 237      !,
 238      phrase(utf8_codes(A), B)
 239    },
 240    payload(codes, B).
 241payload(utf8_codes, A) -->
 242    payload(codes, B),
 243    { phrase(utf8_codes(A), B) }.
 244payload(atom, A) -->
 245    { nonvar(A),
 246      atom_codes(A, Codes)
 247    },
 248    payload(utf8_codes, Codes),
 249    !.
 250payload(atom, A) -->
 251    payload(utf8_codes, Codes),
 252    { atom_codes(A, Codes) }.
 253payload(boolean, true) -->
 254    payload(unsigned, 1).
 255payload(boolean, false) -->
 256    payload(unsigned, 0).
 257payload(string, A) -->
 258    {   nonvar(A)
 259    ->  string_codes(A, Codes)
 260    ;   true
 261    },
 262    payload(codes, Codes),
 263    { string_codes(A, Codes) }.
 264payload(embedded, protobuf(A)) -->
 265    { ground(A),
 266      phrase(protobuf(A), Codes)
 267    },
 268    payload(codes, Codes),
 269    !.
 270payload(embedded, protobuf(A)) -->
 271    payload(codes, Codes),
 272    { phrase(protobuf(A), Codes) }.
 273
 274start_group(Tag) -->            tag_type(Tag, start_group).
 275
 276end_group(Tag) -->              tag_type(Tag, end_group).
 277%
 278%
 279nothing([]) --> [], !.
 280
 281protobuf([A | B]) -->
 282    { A =.. [ Type, Tag, Payload] },
 283    message_sequence(Type, Tag, Payload),
 284    !,
 285    (   protobuf(B)
 286    ;   nothing(B)
 287    ).
 288
 289
 290repeated_message_sequence(repeated_enum, Tag, Type, [A | B]) -->
 291    { Compound =.. [Type, A] },
 292    message_sequence(enum, Tag, Compound),
 293    (   repeated_message_sequence(repeated_enum, Tag, Type, B)
 294    ;   nothing(B)
 295    ).
 296repeated_message_sequence(Type, Tag, [A | B]) -->
 297    message_sequence(Type, Tag, A),
 298    repeated_message_sequence(Type, Tag, B).
 299repeated_message_sequence(_Type, _Tag, A) -->
 300    nothing(A).
 301
 302
 303message_sequence(repeated, Tag, enum(Compound)) -->
 304    { Compound =.. [ Type, List] },
 305    repeated_message_sequence(repeated_enum, Tag, Type, List).
 306message_sequence(repeated, Tag, Compound) -->
 307    { Compound =.. [Type, A] },
 308    repeated_message_sequence(Type, Tag, A).
 309message_sequence(group, Tag, A) -->
 310    start_group(Tag),
 311    protobuf(A),
 312    end_group(Tag),
 313    !.
 314message_sequence(PrologType, Tag, Payload) -->
 315    prolog_type(Tag, PrologType),
 316    payload(PrologType, Payload).
 317
 318
 319%!  protobuf_message(?Template, ?Wire_stream) is semidet.
 320%!  protobuf_message(?Template, ?Wire_stream, ?Rest) is nondet.
 321%
 322%   Marshalls  and  unmarshalls  byte  streams  encoded  using  Google's
 323%   Protobuf  grammars.  protobuf_message/2  provides  a  bi-directional
 324%   parser that marshalls a Prolog   structure to Wire_stream, according
 325%   to rules specified by Template. It   can also unmarshall Wire_stream
 326%   into  a  Prolog   structure   according    to   the   same  grammar.
 327%   protobuf_message/3 provides a difference list version.
 328%
 329%   @param Template is a  protobuf   grammar  specification.  On decode,
 330%   unbound variables in the Template are  unified with their respective
 331%   values in the Wire_stream. On encode, Template must be ground.
 332%
 333%   @param Wire_stream is a code list that   was generated by a protobuf
 334%   encoder using an equivalent template.
 335
 336protobuf_message(protobuf(Template), Wirestream) :-
 337    must_be(list, Template),
 338    phrase(protobuf(Template), Wirestream),
 339    !.
 340
 341protobuf_message(protobuf(Template), Wirestream, Residue) :-
 342    must_be(list, Template),
 343    phrase(protobuf(Template), Wirestream, Residue).