View source with raw comments or as raw
   1/*  Part of SWI-Prolog
   2
   3    Author:        Jan Wielemaker and Matt Lilley
   4    E-mail:        J.Wielemaker@cs.vu.nl
   5    WWW:           http://www.swi-prolog.org
   6    Copyright (c)  2012-2016, VU University Amsterdam
   7    All rights reserved.
   8
   9    Redistribution and use in source and binary forms, with or without
  10    modification, are permitted provided that the following conditions
  11    are met:
  12
  13    1. Redistributions of source code must retain the above copyright
  14       notice, this list of conditions and the following disclaimer.
  15
  16    2. Redistributions in binary form must reproduce the above copyright
  17       notice, this list of conditions and the following disclaimer in
  18       the documentation and/or other materials provided with the
  19       distribution.
  20
  21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  32    POSSIBILITY OF SUCH DAMAGE.
  33*/
  34
  35:- module(archive,
  36          [ archive_open/3,             % +Stream, -Archive, +Options
  37            archive_open/4,             % +Stream, +Mode, -Archive, +Options
  38            archive_create/3,           % +OutputFile, +InputFileList, +Options
  39            archive_close/1,            % +Archive
  40            archive_property/2,         % +Archive, ?Property
  41            archive_next_header/2,      % +Archive, -Name
  42            archive_open_entry/2,       % +Archive, -EntryStream
  43            archive_header_property/2,  % +Archive, ?Property
  44            archive_set_header_property/2,      % +Archive, +Property
  45            archive_extract/3,          % +Archive, +Dir, +Options
  46
  47            archive_entries/2,          % +Archive, -Entries
  48            archive_data_stream/3       % +Archive, -DataStream, +Options
  49          ]).
  50:- use_module(library(error)).
  51:- use_module(library(option)).
  52:- use_module(library(filesex)).
  53
  54/** <module> Access several archive formats
  55
  56This library uses _libarchive_ to access   a variety of archive formats.
  57The following example lists the entries in an archive:
  58
  59  ==
  60  list_archive(File) :-
  61        archive_open(File, Archive, []),
  62        repeat,
  63           (   archive_next_header(Archive, Path)
  64           ->  format('~w~n', [Path]),
  65               fail
  66           ;   !,
  67               archive_close(Archive)
  68           ).
  69  ==
  70
  71@see http://code.google.com/p/libarchive/
  72*/
  73
  74:- use_foreign_library(foreign(archive4pl)).
  75
  76archive_open(Stream, Archive, Options) :-
  77    archive_open(Stream, read, Archive, Options).
  78
  79:- predicate_options(archive_open/4, 4,
  80                     [ close_parent(boolean),
  81                       filter(oneof([all,bzip2,compress,gzip,grzip,lrzip,
  82                                     lzip,lzma,lzop,none,rpm,uu,xz])),
  83                       format(oneof([all,'7zip',ar,cab,cpio,empty,gnutar,
  84                                     iso9660,lha,mtree,rar,raw,tar,xar,zip]))
  85                     ]).
  86:- predicate_options(archive_create/3, 3,
  87                     [ directory(atom),
  88                       pass_to(archive_open/4, 4)
  89                     ]).
  90
  91%!  archive_open(+Data, +Mode, -Archive, +Options) is det.
  92%
  93%   Open the archive in Data and unify  Archive with a handle to the
  94%   opened archive. Data is either a file  or a stream that contains
  95%   a valid archive. Details are   controlled by Options. Typically,
  96%   the option close_parent(true) is used  to   close  stream if the
  97%   archive is closed using archive_close/1.  For other options, the
  98%   defaults are typically fine. The option format(raw) must be used
  99%   to process compressed  streams  that   do  not  contain explicit
 100%   entries (e.g., gzip'ed data)  unambibuously.   The  =raw= format
 101%   creates a _pseudo archive_ holding a single member named =data=.
 102%
 103%     * close_parent(+Boolean)
 104%     If this option is =true= (default =false=), Stream is closed
 105%     if archive_close/1 is called on Archive.
 106%
 107%     * compression(+Compression)
 108%     Synomym for filter(Compression).  Deprecated.
 109%
 110%     * filter(+Filter)
 111%     Support the indicated filter. This option may be
 112%     used multiple times to support multiple filters. In read mode,
 113%     If no filter options are provided, =all= is assumed. In write
 114%     mode, none is assumed.
 115%     Supported values are =all=, =bzip2=, =compress=, =gzip=,
 116%     =grzip=, =lrzip=, =lzip=, =lzma=, =lzop=, =none=, =rpm=, =uu=
 117%     and =xz=. The value =all= is default for read, =none= for write.
 118%
 119%     * format(+Format)
 120%     Support the indicated format.  This option may be used
 121%     multiple times to support multiple formats in read mode.
 122%     In write mode, you must supply a single format. If no format
 123%     options are provided, =all= is assumed for read mode. Note that
 124%     =all= does *not* include =raw=. To open both archive
 125%     and non-archive files, _both_ format(all) and
 126%     format(raw) must be specified. Supported values are: =all=,
 127%     =7zip=, =ar=, =cab=, =cpio=, =empty=, =gnutar=, =iso9660=,
 128%     =lha=, =mtree=, =rar=, =raw=, =tar=, =xar= and =zip=. The
 129%     value =all= is default for read.
 130%
 131%   Note that the actually supported   compression types and formats
 132%   may vary depending on the version   and  installation options of
 133%   the underlying libarchive  library.  This   predicate  raises  a
 134%   domain  error  if  the  (explicitly)  requested  format  is  not
 135%   supported.
 136%
 137%   @error  domain_error(filter, Filter) if the requested
 138%           filter is not supported.
 139%   @error  domain_error(format, Format) if the requested
 140%           format type is not supported.
 141
 142archive_open(stream(Stream), Mode, Archive, Options) :-
 143    !,
 144    archive_open_stream(Stream, Mode, Archive, Options).
 145archive_open(Stream, Mode, Archive, Options) :-
 146    is_stream(Stream),
 147    !,
 148    archive_open_stream(Stream, Mode, Archive, Options).
 149archive_open(File, Mode, Archive, Options) :-
 150    open(File, Mode, Stream, [type(binary)]),
 151    catch(archive_open_stream(Stream, Mode, Archive, [close_parent(true)|Options]),
 152          E, (close(Stream, [force(true)]), throw(E))).
 153
 154
 155%!  archive_close(+Archive) is det.
 156%
 157%   Close the archive.  If  close_parent(true)   is  specified,  the
 158%   underlying stream is closed too.  If   there  is an entry opened
 159%   with  archive_open_entry/2,  actually  closing  the  archive  is
 160%   delayed until the stream associated with   the  entry is closed.
 161%   This can be used to open a   stream  to an archive entry without
 162%   having to worry about closing the archive:
 163%
 164%     ==
 165%     archive_open_named(ArchiveFile, EntryName, Stream) :-
 166%         archive_open(ArchiveFile, Handle, []),
 167%         archive_next_header(Handle, Name),
 168%         archive_open_entry(Handle, Stream),
 169%         archive_close(Archive).
 170%     ==
 171
 172
 173%!  archive_property(+Handle, ?Property) is nondet.
 174%
 175%   True when Property is a property  of the archive Handle. Defined
 176%   properties are:
 177%
 178%     * filters(List)
 179%     True when the indicated filters are applied before reaching
 180%     the archive format.
 181
 182archive_property(Handle, Property) :-
 183    defined_archive_property(Property),
 184    Property =.. [Name,Value],
 185    archive_property(Handle, Name, Value).
 186
 187defined_archive_property(filter(_)).
 188
 189
 190%!  archive_next_header(+Handle, -Name) is semidet.
 191%
 192%   Forward to the next entry of the  archive for which Name unifies
 193%   with the pathname of the entry. Fails   silently  if the name of
 194%   the  archive  is  reached  before  success.  Name  is  typically
 195%   specified if a  single  entry  must   be  accessed  and  unbound
 196%   otherwise. The following example opens  a   Prolog  stream  to a
 197%   given archive entry. Note that  _Stream_   must  be closed using
 198%   close/1 and the archive  must   be  closed using archive_close/1
 199%   after the data has been used.   See also setup_call_cleanup/3.
 200%
 201%     ==
 202%     open_archive_entry(ArchiveFile, Entry, Stream) :-
 203%         open(ArchiveFile, read, In, [type(binary)]),
 204%         archive_open(In, Archive, [close_parent(true)]),
 205%         archive_next_header(Archive, Entry),
 206%         archive_open_entry(Archive, Stream).
 207%     ==
 208%
 209%   @error permission_error(next_header, archive, Handle) if a
 210%   previously opened entry is not closed.
 211
 212%!  archive_open_entry(+Archive, -Stream) is det.
 213%
 214%   Open the current entry as a stream. Stream must be closed.
 215%   If the stream is not closed before the next call to
 216%   archive_next_header/2, a permission error is raised.
 217
 218
 219%!  archive_set_header_property(+Archive, +Property)
 220%
 221%   Set Property of the current header.  Write-mode only. Defined
 222%   properties are:
 223%
 224%     * filetype(-Type)
 225%     Type is one of =file=, =link=, =socket=, =character_device=,
 226%     =block_device=, =directory= or =fifo=.  It appears that this
 227%     library can also return other values.  These are returned as
 228%     an integer.
 229%     * mtime(-Time)
 230%     True when entry was last modified at time.
 231%     * size(-Bytes)
 232%     True when entry is Bytes long.
 233%     * link_target(-Target)
 234%     Target for a link. Currently only supported for symbolic
 235%     links.
 236
 237%!  archive_header_property(+Archive, ?Property)
 238%
 239%   True when Property is a property of the current header.  Defined
 240%   properties are:
 241%
 242%     * filetype(-Type)
 243%     Type is one of =file=, =link=, =socket=, =character_device=,
 244%     =block_device=, =directory= or =fifo=.  It appears that this
 245%     library can also return other values.  These are returned as
 246%     an integer.
 247%     * mtime(-Time)
 248%     True when entry was last modified at time.
 249%     * size(-Bytes)
 250%     True when entry is Bytes long.
 251%     * link_target(-Target)
 252%     Target for a link. Currently only supported for symbolic
 253%     links.
 254%     * format(-Format)
 255%     Provides the name of the archive format applicable to the
 256%     current entry.  The returned value is the lowercase version
 257%     of the output of archive_format_name().
 258%     * permissions(-Integer)
 259%     True when entry has the indicated permission mask.
 260
 261archive_header_property(Archive, Property) :-
 262    (   nonvar(Property)
 263    ->  true
 264    ;   header_property(Property)
 265    ),
 266    archive_header_prop_(Archive, Property).
 267
 268header_property(filetype(_)).
 269header_property(mtime(_)).
 270header_property(size(_)).
 271header_property(link_target(_)).
 272header_property(format(_)).
 273header_property(permissions(_)).
 274
 275
 276%!  archive_extract(+ArchiveFile, +Dir, +Options)
 277%
 278%   Extract files from the given archive into Dir. Supported
 279%   options:
 280%
 281%     * remove_prefix(+Prefix)
 282%     Strip Prefix from all entries before extracting
 283%
 284%   @error  existence_error(directory, Dir) if Dir does not exist
 285%           or is not a directory.
 286%   @error  domain_error(path_prefix(Prefix), Path) if a path in
 287%           the archive does not start with Prefix
 288%   @tbd    Add options
 289
 290archive_extract(Archive, Dir, Options) :-
 291    (   exists_directory(Dir)
 292    ->  true
 293    ;   existence_error(directory, Dir)
 294    ),
 295    setup_call_cleanup(
 296        archive_open(Archive, Handle, Options),
 297        extract(Handle, Dir, Options),
 298        archive_close(Handle)).
 299
 300extract(Archive, Dir, Options) :-
 301    archive_next_header(Archive, Path),
 302    !,
 303    (   archive_header_property(Archive, filetype(file))
 304    ->  archive_header_property(Archive, permissions(Perm)),
 305        (   option(remove_prefix(Remove), Options)
 306        ->  (   atom_concat(Remove, ExtractPath, Path)
 307            ->  true
 308            ;   domain_error(path_prefix(Remove), Path)
 309            )
 310        ;   ExtractPath = Path
 311        ),
 312        directory_file_path(Dir, ExtractPath, Target),
 313        file_directory_name(Target, FileDir),
 314        make_directory_path(FileDir),
 315        setup_call_cleanup(
 316            archive_open_entry(Archive, In),
 317            setup_call_cleanup(
 318                open(Target, write, Out, [type(binary)]),
 319                copy_stream_data(In, Out),
 320                close(Out)),
 321            close(In)),
 322        set_permissions(Perm, Target)
 323    ;   true
 324    ),
 325    extract(Archive, Dir, Options).
 326extract(_, _, _).
 327
 328%!  set_permissions(+Perm:integer, +Target:atom)
 329%
 330%   Restore the permissions.  Currently only restores the executable
 331%   permission.
 332
 333set_permissions(Perm, Target) :-
 334    Perm /\ 0o700 =\= 0,
 335    !,
 336    '$mark_executable'(Target).
 337set_permissions(_, _).
 338
 339
 340                 /*******************************
 341                 *    HIGH LEVEL PREDICATES     *
 342                 *******************************/
 343
 344%!  archive_entries(+Archive, -Paths) is det.
 345%
 346%   True when Paths is a list of pathnames appearing in Archive.
 347
 348archive_entries(Archive, Paths) :-
 349    setup_call_cleanup(
 350        archive_open(Archive, Handle, []),
 351        contents(Handle, Paths),
 352        archive_close(Handle)).
 353
 354contents(Handle, [Path|T]) :-
 355    archive_next_header(Handle, Path),
 356    !,
 357    contents(Handle, T).
 358contents(_, []).
 359
 360%!  archive_data_stream(+Archive, -DataStream, +Options) is nondet.
 361%
 362%   True when DataStream  is  a  stream   to  a  data  object inside
 363%   Archive.  This  predicate  transparently   unpacks  data  inside
 364%   _possibly nested_ archives, e.g., a _tar_   file  inside a _zip_
 365%   file. It applies the appropriate  decompression filters and thus
 366%   ensures that Prolog  reads  the   plain  data  from  DataStream.
 367%   DataStream must be closed after the  content has been processed.
 368%   Backtracking opens the next member of the (nested) archive. This
 369%   predicate processes the following options:
 370%
 371%     - meta_data(-Data:list(dict))
 372%     If provided, Data is unified with a list of filters applied to
 373%     the (nested) archive to open the current DataStream. The first
 374%     element describes the outermost archive. Each Data dict
 375%     contains the header properties (archive_header_property/2) as
 376%     well as the keys:
 377%
 378%       - filters(Filters:list(atom))
 379%       Filter list as obtained from archive_property/2
 380%       - name(Atom)
 381%       Name of the entry.
 382%
 383%   Note that this predicate can  handle   a  non-archive files as a
 384%   pseudo archive holding a single   stream by using archive_open/3
 385%   with the options `[format(all), format(raw)]`.
 386
 387archive_data_stream(Archive, DataStream, Options) :-
 388    option(meta_data(MetaData), Options, _),
 389    archive_content(Archive, DataStream, MetaData, []).
 390
 391archive_content(Archive, Entry, [EntryMetadata|PipeMetadataTail], PipeMetadata2) :-
 392    archive_property(Archive, filter(Filters)),
 393    repeat,
 394    (   archive_next_header(Archive, EntryName)
 395    ->  findall(EntryProperty,
 396                archive_header_property(Archive, EntryProperty),
 397                EntryProperties),
 398        dict_create(EntryMetadata, archive_meta_data,
 399                    [ filters(Filters),
 400                      name(EntryName)
 401                    | EntryProperties
 402                    ]),
 403        (   EntryMetadata.filetype == file
 404        ->  archive_open_entry(Archive, Entry0),
 405            (   EntryName == data,
 406                EntryMetadata.format == raw
 407            ->  % This is the last entry in this nested branch.
 408                % We therefore close the choicepoint created by repeat/0.
 409                % Not closing this choicepoint would cause
 410                % archive_next_header/2 to throw an exception.
 411                !,
 412                PipeMetadataTail = PipeMetadata2,
 413                Entry = Entry0
 414            ;   PipeMetadataTail = PipeMetadata1,
 415                open_substream(Entry0,
 416                               Entry,
 417                               PipeMetadata1,
 418                               PipeMetadata2)
 419            )
 420        ;   fail
 421        )
 422    ;   !,
 423        fail
 424    ).
 425
 426open_substream(In, Entry, ArchiveMetadata, PipeTailMetadata) :-
 427    setup_call_cleanup(
 428        archive_open(stream(In),
 429                     Archive,
 430                     [ close_parent(true),
 431                       format(all),
 432                       format(raw)
 433                     ]),
 434        archive_content(Archive, Entry, ArchiveMetadata, PipeTailMetadata),
 435        archive_close(Archive)).
 436
 437
 438%!  archive_create(+OutputFile, +InputFiles, +Options) is det.
 439%
 440%   Convenience predicate to create an   archive  in OutputFile with
 441%   data from a list of InputFiles and the given Options.
 442%
 443%   Besides  options  supported  by  archive_open/4,  the  following
 444%   options are supported:
 445%
 446%     * directory(+Directory)
 447%     Changes the directory before adding input files. If this is
 448%     specified,   paths of  input  files   must   be relative to
 449%     Directory and archived files will not have Directory
 450%     as leading path. This is to simulate =|-C|= option of
 451%     the =tar= program.
 452%
 453%     * format(+Format)
 454%     Write mode supports the following formats: `7zip`, `cpio`,
 455%     `gnutar`, `iso9660`, `xar` and `zip`.  Note that a particular
 456%     installation may support only a subset of these, depending on
 457%     the configuration of `libarchive`.
 458
 459archive_create(OutputFile, InputFiles, Options) :-
 460    must_be(list(text), InputFiles),
 461    option(directory(BaseDir), Options, '.'),
 462    setup_call_cleanup(
 463        archive_open(OutputFile, write, Archive, Options),
 464        archive_create_1(Archive, BaseDir, BaseDir, InputFiles, top),
 465        archive_close(Archive)).
 466
 467archive_create_1(_, _, _, [], _) :- !.
 468archive_create_1(Archive, Base, Current, ['.'|Files], sub) :-
 469    !,
 470    archive_create_1(Archive, Base, Current, Files, sub).
 471archive_create_1(Archive, Base, Current, ['..'|Files], Where) :-
 472    !,
 473    archive_create_1(Archive, Base, Current, Files, Where).
 474archive_create_1(Archive, Base, Current, [File|Files], Where) :-
 475    directory_file_path(Current, File, Filename),
 476    archive_create_2(Archive, Base, Filename),
 477    archive_create_1(Archive, Base, Current, Files, Where).
 478
 479archive_create_2(Archive, Base, Directory) :-
 480    exists_directory(Directory),
 481    !,
 482    entry_name(Base, Directory, Directory0),
 483    archive_next_header(Archive, Directory0),
 484    time_file(Directory, Time),
 485    archive_set_header_property(Archive, mtime(Time)),
 486    archive_set_header_property(Archive, filetype(directory)),
 487    archive_open_entry(Archive, EntryStream),
 488    close(EntryStream),
 489    directory_files(Directory, Files),
 490    archive_create_1(Archive, Base, Directory, Files, sub).
 491archive_create_2(Archive, Base, Filename) :-
 492    entry_name(Base, Filename, Filename0),
 493    archive_next_header(Archive, Filename0),
 494    size_file(Filename, Size),
 495    time_file(Filename, Time),
 496    archive_set_header_property(Archive, size(Size)),
 497    archive_set_header_property(Archive, mtime(Time)),
 498    setup_call_cleanup(
 499        archive_open_entry(Archive, EntryStream),
 500        setup_call_cleanup(
 501            open(Filename, read, DataStream, [type(binary)]),
 502            copy_stream_data(DataStream, EntryStream),
 503            close(DataStream)),
 504        close(EntryStream)).
 505
 506entry_name('.', Name, Name) :- !.
 507entry_name(Base, Name, EntryName) :-
 508    directory_file_path(Base, EntryName, Name).