1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker and Matt Lilley 4 E-mail: J.Wielemaker@cs.vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2012-2016, VU University Amsterdam 7 All rights reserved. 8 9 Redistribution and use in source and binary forms, with or without 10 modification, are permitted provided that the following conditions 11 are met: 12 13 1. Redistributions of source code must retain the above copyright 14 notice, this list of conditions and the following disclaimer. 15 16 2. Redistributions in binary form must reproduce the above copyright 17 notice, this list of conditions and the following disclaimer in 18 the documentation and/or other materials provided with the 19 distribution. 20 21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 29 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 31 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 POSSIBILITY OF SUCH DAMAGE. 33*/ 34 35:- module(archive, 36 [ archive_open/3, % +Stream, -Archive, +Options 37 archive_open/4, % +Stream, +Mode, -Archive, +Options 38 archive_create/3, % +OutputFile, +InputFileList, +Options 39 archive_close/1, % +Archive 40 archive_property/2, % +Archive, ?Property 41 archive_next_header/2, % +Archive, -Name 42 archive_open_entry/2, % +Archive, -EntryStream 43 archive_header_property/2, % +Archive, ?Property 44 archive_set_header_property/2, % +Archive, +Property 45 archive_extract/3, % +Archive, +Dir, +Options 46 47 archive_entries/2, % +Archive, -Entries 48 archive_data_stream/3 % +Archive, -DataStream, +Options 49 ]). 50:- use_module(library(error)). 51:- use_module(library(option)). 52:- use_module(library(filesex)). 53 54/** <module> Access several archive formats 55 56This library uses _libarchive_ to access a variety of archive formats. 57The following example lists the entries in an archive: 58 59 == 60 list_archive(File) :- 61 archive_open(File, Archive, []), 62 repeat, 63 ( archive_next_header(Archive, Path) 64 -> format('~w~n', [Path]), 65 fail 66 ; !, 67 archive_close(Archive) 68 ). 69 == 70 71@see http://code.google.com/p/libarchive/ 72*/ 73 74:- use_foreign_library(foreign(archive4pl)). 75 76archive_open(Stream, Archive, Options) :- 77 archive_open(Stream, read, Archive, Options). 78 79:- predicate_options(archive_open/4, 4, 80 [ close_parent(boolean), 81 filter(oneof([all,bzip2,compress,gzip,grzip,lrzip, 82 lzip,lzma,lzop,none,rpm,uu,xz])), 83 format(oneof([all,'7zip',ar,cab,cpio,empty,gnutar, 84 iso9660,lha,mtree,rar,raw,tar,xar,zip])) 85 ]). 86:- predicate_options(archive_create/3, 3, 87 [ directory(atom), 88 pass_to(archive_open/4, 4) 89 ]). 90 91%! archive_open(+Data, +Mode, -Archive, +Options) is det. 92% 93% Open the archive in Data and unify Archive with a handle to the 94% opened archive. Data is either a file or a stream that contains 95% a valid archive. Details are controlled by Options. Typically, 96% the option close_parent(true) is used to close stream if the 97% archive is closed using archive_close/1. For other options, the 98% defaults are typically fine. The option format(raw) must be used 99% to process compressed streams that do not contain explicit 100% entries (e.g., gzip'ed data) unambibuously. The =raw= format 101% creates a _pseudo archive_ holding a single member named =data=. 102% 103% * close_parent(+Boolean) 104% If this option is =true= (default =false=), Stream is closed 105% if archive_close/1 is called on Archive. 106% 107% * compression(+Compression) 108% Synomym for filter(Compression). Deprecated. 109% 110% * filter(+Filter) 111% Support the indicated filter. This option may be 112% used multiple times to support multiple filters. In read mode, 113% If no filter options are provided, =all= is assumed. In write 114% mode, none is assumed. 115% Supported values are =all=, =bzip2=, =compress=, =gzip=, 116% =grzip=, =lrzip=, =lzip=, =lzma=, =lzop=, =none=, =rpm=, =uu= 117% and =xz=. The value =all= is default for read, =none= for write. 118% 119% * format(+Format) 120% Support the indicated format. This option may be used 121% multiple times to support multiple formats in read mode. 122% In write mode, you must supply a single format. If no format 123% options are provided, =all= is assumed for read mode. Note that 124% =all= does *not* include =raw=. To open both archive 125% and non-archive files, _both_ format(all) and 126% format(raw) must be specified. Supported values are: =all=, 127% =7zip=, =ar=, =cab=, =cpio=, =empty=, =gnutar=, =iso9660=, 128% =lha=, =mtree=, =rar=, =raw=, =tar=, =xar= and =zip=. The 129% value =all= is default for read. 130% 131% Note that the actually supported compression types and formats 132% may vary depending on the version and installation options of 133% the underlying libarchive library. This predicate raises a 134% domain error if the (explicitly) requested format is not 135% supported. 136% 137% @error domain_error(filter, Filter) if the requested 138% filter is not supported. 139% @error domain_error(format, Format) if the requested 140% format type is not supported. 141 142archive_open(stream(Stream), Mode, Archive, Options) :- 143 !, 144 archive_open_stream(Stream, Mode, Archive, Options). 145archive_open(Stream, Mode, Archive, Options) :- 146 is_stream(Stream), 147 !, 148 archive_open_stream(Stream, Mode, Archive, Options). 149archive_open(File, Mode, Archive, Options) :- 150 open(File, Mode, Stream, [type(binary)]), 151 catch(archive_open_stream(Stream, Mode, Archive, [close_parent(true)|Options]), 152 E, (close(Stream, [force(true)]), throw(E))). 153 154 155%! archive_close(+Archive) is det. 156% 157% Close the archive. If close_parent(true) is specified, the 158% underlying stream is closed too. If there is an entry opened 159% with archive_open_entry/2, actually closing the archive is 160% delayed until the stream associated with the entry is closed. 161% This can be used to open a stream to an archive entry without 162% having to worry about closing the archive: 163% 164% == 165% archive_open_named(ArchiveFile, EntryName, Stream) :- 166% archive_open(ArchiveFile, Handle, []), 167% archive_next_header(Handle, Name), 168% archive_open_entry(Handle, Stream), 169% archive_close(Archive). 170% == 171 172 173%! archive_property(+Handle, ?Property) is nondet. 174% 175% True when Property is a property of the archive Handle. Defined 176% properties are: 177% 178% * filters(List) 179% True when the indicated filters are applied before reaching 180% the archive format. 181 182archive_property(Handle, Property) :- 183 defined_archive_property(Property), 184 Property =.. [Name,Value], 185 archive_property(Handle, Name, Value). 186 187defined_archive_property(filter(_)). 188 189 190%! archive_next_header(+Handle, -Name) is semidet. 191% 192% Forward to the next entry of the archive for which Name unifies 193% with the pathname of the entry. Fails silently if the name of 194% the archive is reached before success. Name is typically 195% specified if a single entry must be accessed and unbound 196% otherwise. The following example opens a Prolog stream to a 197% given archive entry. Note that _Stream_ must be closed using 198% close/1 and the archive must be closed using archive_close/1 199% after the data has been used. See also setup_call_cleanup/3. 200% 201% == 202% open_archive_entry(ArchiveFile, Entry, Stream) :- 203% open(ArchiveFile, read, In, [type(binary)]), 204% archive_open(In, Archive, [close_parent(true)]), 205% archive_next_header(Archive, Entry), 206% archive_open_entry(Archive, Stream). 207% == 208% 209% @error permission_error(next_header, archive, Handle) if a 210% previously opened entry is not closed. 211 212%! archive_open_entry(+Archive, -Stream) is det. 213% 214% Open the current entry as a stream. Stream must be closed. 215% If the stream is not closed before the next call to 216% archive_next_header/2, a permission error is raised. 217 218 219%! archive_set_header_property(+Archive, +Property) 220% 221% Set Property of the current header. Write-mode only. Defined 222% properties are: 223% 224% * filetype(-Type) 225% Type is one of =file=, =link=, =socket=, =character_device=, 226% =block_device=, =directory= or =fifo=. It appears that this 227% library can also return other values. These are returned as 228% an integer. 229% * mtime(-Time) 230% True when entry was last modified at time. 231% * size(-Bytes) 232% True when entry is Bytes long. 233% * link_target(-Target) 234% Target for a link. Currently only supported for symbolic 235% links. 236 237%! archive_header_property(+Archive, ?Property) 238% 239% True when Property is a property of the current header. Defined 240% properties are: 241% 242% * filetype(-Type) 243% Type is one of =file=, =link=, =socket=, =character_device=, 244% =block_device=, =directory= or =fifo=. It appears that this 245% library can also return other values. These are returned as 246% an integer. 247% * mtime(-Time) 248% True when entry was last modified at time. 249% * size(-Bytes) 250% True when entry is Bytes long. 251% * link_target(-Target) 252% Target for a link. Currently only supported for symbolic 253% links. 254% * format(-Format) 255% Provides the name of the archive format applicable to the 256% current entry. The returned value is the lowercase version 257% of the output of archive_format_name(). 258% * permissions(-Integer) 259% True when entry has the indicated permission mask. 260 261archive_header_property(Archive, Property) :- 262 ( nonvar(Property) 263 -> true 264 ; header_property(Property) 265 ), 266 archive_header_prop_(Archive, Property). 267 268header_property(filetype(_)). 269header_property(mtime(_)). 270header_property(size(_)). 271header_property(link_target(_)). 272header_property(format(_)). 273header_property(permissions(_)). 274 275 276%! archive_extract(+ArchiveFile, +Dir, +Options) 277% 278% Extract files from the given archive into Dir. Supported 279% options: 280% 281% * remove_prefix(+Prefix) 282% Strip Prefix from all entries before extracting 283% 284% @error existence_error(directory, Dir) if Dir does not exist 285% or is not a directory. 286% @error domain_error(path_prefix(Prefix), Path) if a path in 287% the archive does not start with Prefix 288% @tbd Add options 289 290archive_extract(Archive, Dir, Options) :- 291 ( exists_directory(Dir) 292 -> true 293 ; existence_error(directory, Dir) 294 ), 295 setup_call_cleanup( 296 archive_open(Archive, Handle, Options), 297 extract(Handle, Dir, Options), 298 archive_close(Handle)). 299 300extract(Archive, Dir, Options) :- 301 archive_next_header(Archive, Path), 302 !, 303 ( archive_header_property(Archive, filetype(file)) 304 -> archive_header_property(Archive, permissions(Perm)), 305 ( option(remove_prefix(Remove), Options) 306 -> ( atom_concat(Remove, ExtractPath, Path) 307 -> true 308 ; domain_error(path_prefix(Remove), Path) 309 ) 310 ; ExtractPath = Path 311 ), 312 directory_file_path(Dir, ExtractPath, Target), 313 file_directory_name(Target, FileDir), 314 make_directory_path(FileDir), 315 setup_call_cleanup( 316 archive_open_entry(Archive, In), 317 setup_call_cleanup( 318 open(Target, write, Out, [type(binary)]), 319 copy_stream_data(In, Out), 320 close(Out)), 321 close(In)), 322 set_permissions(Perm, Target) 323 ; true 324 ), 325 extract(Archive, Dir, Options). 326extract(_, _, _). 327 328%! set_permissions(+Perm:integer, +Target:atom) 329% 330% Restore the permissions. Currently only restores the executable 331% permission. 332 333set_permissions(Perm, Target) :- 334 Perm /\ 0o700 =\= 0, 335 !, 336 '$mark_executable'(Target). 337set_permissions(_, _). 338 339 340 /******************************* 341 * HIGH LEVEL PREDICATES * 342 *******************************/ 343 344%! archive_entries(+Archive, -Paths) is det. 345% 346% True when Paths is a list of pathnames appearing in Archive. 347 348archive_entries(Archive, Paths) :- 349 setup_call_cleanup( 350 archive_open(Archive, Handle, []), 351 contents(Handle, Paths), 352 archive_close(Handle)). 353 354contents(Handle, [Path|T]) :- 355 archive_next_header(Handle, Path), 356 !, 357 contents(Handle, T). 358contents(_, []). 359 360%! archive_data_stream(+Archive, -DataStream, +Options) is nondet. 361% 362% True when DataStream is a stream to a data object inside 363% Archive. This predicate transparently unpacks data inside 364% _possibly nested_ archives, e.g., a _tar_ file inside a _zip_ 365% file. It applies the appropriate decompression filters and thus 366% ensures that Prolog reads the plain data from DataStream. 367% DataStream must be closed after the content has been processed. 368% Backtracking opens the next member of the (nested) archive. This 369% predicate processes the following options: 370% 371% - meta_data(-Data:list(dict)) 372% If provided, Data is unified with a list of filters applied to 373% the (nested) archive to open the current DataStream. The first 374% element describes the outermost archive. Each Data dict 375% contains the header properties (archive_header_property/2) as 376% well as the keys: 377% 378% - filters(Filters:list(atom)) 379% Filter list as obtained from archive_property/2 380% - name(Atom) 381% Name of the entry. 382% 383% Note that this predicate can handle a non-archive files as a 384% pseudo archive holding a single stream by using archive_open/3 385% with the options `[format(all), format(raw)]`. 386 387archive_data_stream(Archive, DataStream, Options) :- 388 option(meta_data(MetaData), Options, _), 389 archive_content(Archive, DataStream, MetaData, []). 390 391archive_content(Archive, Entry, [EntryMetadata|PipeMetadataTail], PipeMetadata2) :- 392 archive_property(Archive, filter(Filters)), 393 repeat, 394 ( archive_next_header(Archive, EntryName) 395 -> findall(EntryProperty, 396 archive_header_property(Archive, EntryProperty), 397 EntryProperties), 398 dict_create(EntryMetadata, archive_meta_data, 399 [ filters(Filters), 400 name(EntryName) 401 | EntryProperties 402 ]), 403 ( EntryMetadata.filetype == file 404 -> archive_open_entry(Archive, Entry0), 405 ( EntryName == data, 406 EntryMetadata.format == raw 407 -> % This is the last entry in this nested branch. 408 % We therefore close the choicepoint created by repeat/0. 409 % Not closing this choicepoint would cause 410 % archive_next_header/2 to throw an exception. 411 !, 412 PipeMetadataTail = PipeMetadata2, 413 Entry = Entry0 414 ; PipeMetadataTail = PipeMetadata1, 415 open_substream(Entry0, 416 Entry, 417 PipeMetadata1, 418 PipeMetadata2) 419 ) 420 ; fail 421 ) 422 ; !, 423 fail 424 ). 425 426open_substream(In, Entry, ArchiveMetadata, PipeTailMetadata) :- 427 setup_call_cleanup( 428 archive_open(stream(In), 429 Archive, 430 [ close_parent(true), 431 format(all), 432 format(raw) 433 ]), 434 archive_content(Archive, Entry, ArchiveMetadata, PipeTailMetadata), 435 archive_close(Archive)). 436 437 438%! archive_create(+OutputFile, +InputFiles, +Options) is det. 439% 440% Convenience predicate to create an archive in OutputFile with 441% data from a list of InputFiles and the given Options. 442% 443% Besides options supported by archive_open/4, the following 444% options are supported: 445% 446% * directory(+Directory) 447% Changes the directory before adding input files. If this is 448% specified, paths of input files must be relative to 449% Directory and archived files will not have Directory 450% as leading path. This is to simulate =|-C|= option of 451% the =tar= program. 452% 453% * format(+Format) 454% Write mode supports the following formats: `7zip`, `cpio`, 455% `gnutar`, `iso9660`, `xar` and `zip`. Note that a particular 456% installation may support only a subset of these, depending on 457% the configuration of `libarchive`. 458 459archive_create(OutputFile, InputFiles, Options) :- 460 must_be(list(text), InputFiles), 461 option(directory(BaseDir), Options, '.'), 462 setup_call_cleanup( 463 archive_open(OutputFile, write, Archive, Options), 464 archive_create_1(Archive, BaseDir, BaseDir, InputFiles, top), 465 archive_close(Archive)). 466 467archive_create_1(_, _, _, [], _) :- !. 468archive_create_1(Archive, Base, Current, ['.'|Files], sub) :- 469 !, 470 archive_create_1(Archive, Base, Current, Files, sub). 471archive_create_1(Archive, Base, Current, ['..'|Files], Where) :- 472 !, 473 archive_create_1(Archive, Base, Current, Files, Where). 474archive_create_1(Archive, Base, Current, [File|Files], Where) :- 475 directory_file_path(Current, File, Filename), 476 archive_create_2(Archive, Base, Filename), 477 archive_create_1(Archive, Base, Current, Files, Where). 478 479archive_create_2(Archive, Base, Directory) :- 480 exists_directory(Directory), 481 !, 482 entry_name(Base, Directory, Directory0), 483 archive_next_header(Archive, Directory0), 484 time_file(Directory, Time), 485 archive_set_header_property(Archive, mtime(Time)), 486 archive_set_header_property(Archive, filetype(directory)), 487 archive_open_entry(Archive, EntryStream), 488 close(EntryStream), 489 directory_files(Directory, Files), 490 archive_create_1(Archive, Base, Directory, Files, sub). 491archive_create_2(Archive, Base, Filename) :- 492 entry_name(Base, Filename, Filename0), 493 archive_next_header(Archive, Filename0), 494 size_file(Filename, Size), 495 time_file(Filename, Time), 496 archive_set_header_property(Archive, size(Size)), 497 archive_set_header_property(Archive, mtime(Time)), 498 setup_call_cleanup( 499 archive_open_entry(Archive, EntryStream), 500 setup_call_cleanup( 501 open(Filename, read, DataStream, [type(binary)]), 502 copy_stream_data(DataStream, EntryStream), 503 close(DataStream)), 504 close(EntryStream)). 505 506entry_name('.', Name, Name) :- !. 507entry_name(Base, Name, EntryName) :- 508 directory_file_path(Base, EntryName, Name).