Back to... Zip-Ada

Source file : zip.ads



   1  --   ________  ___   ______       ______      ___
   2  --  /___..._/  |.|   |.___.\     /. __ .\   __|.|   ____
   3  --     /../    |.|   |.____/     |.|__|.|  /....|  __\..\
   4  --   _/../___  |.|   |.|    ===  |..__..| |. = .| | = ..|
   5  --  /_______/  |_|  /__|        /__|  |_|  \__\_|  \__\_|
   6  
   7  --  Zip library
   8  ---------------
   9  --
  10  --  Library for manipulating archive files in the Zip format
  11  --
  12  --  Pure Ada 2012+ code, 100% portable: OS-, CPU- and compiler- independent.
  13  --
  14  --  Version / date / download info: see the version, reference, web strings
  15  --   defined at the end of the public part of this package.
  16  
  17  --  Legal licensing note:
  18  
  19  --  Copyright (c) 1999 .. 2025 Gautier de Montmollin
  20  --  SWITZERLAND
  21  
  22  --  Permission is hereby granted, free of charge, to any person obtaining a copy
  23  --  of this software and associated documentation files (the "Software"), to deal
  24  --  in the Software without restriction, including without limitation the rights
  25  --  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  26  --  copies of the Software, and to permit persons to whom the Software is
  27  --  furnished to do so, subject to the following conditions:
  28  
  29  --  The above copyright notice and this permission notice shall be included in
  30  --  all copies or substantial portions of the Software.
  31  
  32  --  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  33  --  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  34  --  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  35  --  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  36  --  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  37  --  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  38  --  THE SOFTWARE.
  39  
  40  --  NB: this is the MIT License, as found 12-Sep-2007 on the site
  41  --  http://www.opensource.org/licenses/mit-license.php
  42  
  43  with Zip_Streams;
  44  with Ada.Calendar, Ada.Finalization, Ada.Streams.Stream_IO, Ada.Text_IO;
  45  with Interfaces;
  46  with System;
  47  
  48  package Zip is
  49  
  50    -----------------------------------------------------------------
  51    --  Zip_Info                                                   --
  52    -----------------------------------------------------------------
  53    --  Zip_Info contains the Zip file name (if it is a file)      --
  54    --  or its input stream access, and the archive's directory.   --
  55    -----------------------------------------------------------------
  56  
  57    type Zip_Info is
  58      new Ada.Finalization.Controlled with private;
  59  
  60    -----------------------------------------------
  61    --  Load the whole .zip directory contained  --
  62    --  in archive (from) for quick searching.   --
  63    -----------------------------------------------
  64  
  65     type Duplicate_name_policy is
  66       (admit_duplicates,     --  two entries in the Zip archive may have the same full name
  67        error_on_duplicate);  --  raise exception on attempt to add twice the same entry name
  68  
  69    --  Load from a file
  70  
  71    procedure Load
  72      (info            : out Zip_Info;
  73       from            : in  String;  --  Zip file name
  74       case_sensitive  : in  Boolean := False;
  75       duplicate_names : in  Duplicate_name_policy := error_on_duplicate);
  76  
  77    --  Load from a stream
  78  
  79    procedure Load
  80      (info            :    out Zip_Info;
  81       from            : in out Zip_Streams.Root_Zipstream_Type'Class;
  82       case_sensitive  : in     Boolean := False;
  83       duplicate_names : in     Duplicate_name_policy := error_on_duplicate);
  84  
  85    Archive_corrupted,
  86    Archive_open_error,
  87    Duplicate_name : exception;
  88  
  89    Zip_file_open_error : exception renames Archive_open_error;  --  Archive is not always a file!
  90    pragma Obsolescent (Zip_file_open_error, "Better use the name: Archive_open_error");
  91  
  92    --  Zip_File_Error: exception renames Archive_corrupted;   --   Now really obsolete.
  93    --  pragma Obsolescent(Zip_File_Error);                    --   Now really obsolete.
  94  
  95    function Is_loaded (info : in Zip_Info) return Boolean;
  96  
  97    function Zip_Name (info : in Zip_Info) return String;
  98  
  99    function Zip_Comment (info : in Zip_Info) return String;
 100  
 101    function Zip_Stream (info : in Zip_Info) return Zip_Streams.Zipstream_Class_Access;
 102  
 103    function Entries (info : in Zip_Info) return Natural;
 104  
 105    procedure Delete (info : in out Zip_Info);
 106    pragma Obsolescent (Delete, "Delete happens automatically since v.56.");
 107  
 108    Forgot_to_load_zip_info : exception;
 109  
 110    --  Data sizes in archive
 111    subtype Zip_32_Data_Size_Type is Interfaces.Unsigned_32;
 112    subtype Zip_64_Data_Size_Type is Interfaces.Unsigned_64;
 113  
 114    ---------
 115  
 116    --  Compression "methods" - actually, *formats* - in the "official" PKWARE Zip format.
 117    --  Details in appnote.txt, part V.J
 118    --
 119    --     C : supported by Zip-Ada for compressing
 120    --     D : supported by Zip-Ada for decompressing
 121  
 122    type PKZip_method is
 123      (store,       --  C, D
 124       shrink,      --  C, D
 125       reduce_1,    --  C, D
 126       reduce_2,    --  C, D
 127       reduce_3,    --  C, D
 128       reduce_4,    --  C, D
 129       implode,     --     D
 130       tokenize,
 131       deflate,     --  C, D
 132       deflate_e,   --     D  -  "Enhanced deflate" or "Deflate64"
 133       bzip2_meth,  --  C, D
 134       lzma_meth,   --  C, D
 135       zstandard,
 136       mp3_recomp,
 137       xz_recomp,
 138       jpeg_recomp,
 139       wavpack,
 140       ppmd,
 141       unknown);
 142  
 143    subtype Reduce_Format is PKZip_method range reduce_1 .. reduce_4;
 144  
 145    --  Return a String image, nicer than the 'Image attribute.
 146    function Image (m : PKZip_method) return String;
 147  
 148    --  Technical: translates the method code as set in zip archives
 149    function Method_from_Code (x : Interfaces.Unsigned_16) return PKZip_method;
 150    function Method_from_Code (x : Natural) return PKZip_method;
 151  
 152    --  Internal time definition
 153    subtype Time is Zip_Streams.Time;
 154    function Convert (date : in Ada.Calendar.Time) return Time
 155      renames Zip_Streams.Calendar.Convert;
 156    function Convert (date : in Time) return Ada.Calendar.Time
 157      renames Zip_Streams.Calendar.Convert;
 158  
 159    --  Entry names within Zip archives are encoded either with
 160    --      * the IBM PC (the one with a monochrome screen, only text mode)'s
 161    --          character set: IBM 437
 162    --  or
 163    --      * Unicode UTF-8
 164    --
 165    --  Documentation: PKWARE's Appnote.txt, APPENDIX D - Language Encoding (EFS)
 166  
 167    type Zip_Name_Encoding is (IBM_437, UTF_8);
 168  
 169    --  Traverse a whole Zip_Info directory in sorted order, giving the
 170    --  name for each entry to an user-defined "Action" procedure.
 171    --  Concretely, you can process a whole Zip file that way, by extracting data
 172    --  with Extract, or open a reader stream with UnZip.Streams.
 173    --  See the Comp_Zip or Find_Zip tools as application examples.
 174    generic
 175      with procedure Action (name : String);  --  'name' is compressed entry's name
 176    procedure Traverse (z : Zip_Info);
 177  
 178    --  Same as Traverse, but Action gives also full name information.
 179    --  The pair (name, name_encoding) allows for an unambiguous Unicode
 180    --  name decoding. See the AZip project for an implementation.
 181    generic
 182      with procedure Action
 183        (name          : String;  --  'name' is compressed entry's name
 184         name_encoding : Zip_Name_Encoding);
 185    --
 186    procedure Traverse_Unicode (z : Zip_Info);
 187  
 188    --  Same as Traverse, but Action gives also full technical informations
 189    --  about the compressed entry.
 190    generic
 191      with procedure Action
 192        (name             : String;  --  'name' is compressed entry's name
 193         file_index       : Zip_Streams.ZS_Index_Type;
 194         comp_size        : Zip_64_Data_Size_Type;
 195         uncomp_size      : Zip_64_Data_Size_Type;
 196         crc_32           : Interfaces.Unsigned_32;
 197         date_time        : Time;
 198         method           : PKZip_method;
 199         name_encoding    : Zip_Name_Encoding;
 200         read_only        : Boolean;
 201         encrypted_2_x    : Boolean;  --  PKZip 2.x encryption
 202         user_code        : in out Integer);
 203    --
 204    procedure Traverse_verbose (z : Zip_Info);
 205  
 206    --  Academic: see how well the name tree is balanced
 207    procedure Tree_Stat
 208      (z         : in     Zip_Info;
 209       total     :    out Natural;
 210       max_depth :    out Natural;
 211       avg_depth :    out Float);
 212  
 213    --------------------------------------------------------------------------
 214    -- Offsets - various procedures giving 1-based indexes to local headers --
 215    --------------------------------------------------------------------------
 216  
 217    --  Find 1st offset in a Zip stream (i.e. the first's archived entry's offset)
 218  
 219    procedure Find_first_Offset
 220      (file           : in out Zip_Streams.Root_Zipstream_Type'Class;
 221       file_index     :    out Zip_Streams.ZS_Index_Type);
 222  
 223    --  If the archive is empty (the 22 byte .zip file), there is no first entry or offset.
 224    Archive_is_empty : exception;
 225  
 226    --  Find offset of a certain compressed file
 227    --  in a Zip file (file opened and kept open)
 228  
 229    procedure Find_Offset
 230      (file           : in out Zip_Streams.Root_Zipstream_Type'Class;
 231       name           : in     String;
 232       case_sensitive : in     Boolean;
 233       file_index     :    out Zip_Streams.ZS_Index_Type;
 234       comp_size      :    out Zip_64_Data_Size_Type;
 235       uncomp_size    :    out Zip_64_Data_Size_Type;
 236       crc_32         :    out Interfaces.Unsigned_32);
 237  
 238    --  Find offset of a certain compressed file in a pre-loaded Zip_Info data
 239  
 240    procedure Find_Offset
 241      (info           : in     Zip_Info;
 242       name           : in     String;
 243       name_encoding  :    out Zip_Name_Encoding;
 244       file_index     :    out Zip_Streams.ZS_Index_Type;
 245       comp_size      :    out Zip_64_Data_Size_Type;
 246       uncomp_size    :    out Zip_64_Data_Size_Type;
 247       crc_32         :    out Interfaces.Unsigned_32);
 248  
 249    --  Find offset of a certain compressed file in a pre-loaded Zip_Info data.
 250    --  This version scans the whole catalogue and returns the index of the first
 251    --  entry with a matching name, ignoring directory information.
 252    --  For instance, if the Zip archive contains "zip-ada/zip_lib/zip.ads",
 253    --  "zip.ads" will match - or even "ZIP.ads" if info has been loaded in case-insensitive mode.
 254    --  Caution: this may be much slower than the exact search with Find_offset.
 255  
 256    procedure Find_Offset_without_Directory
 257      (info           : in     Zip.Zip_Info;
 258       name           : in     String;
 259       name_encoding  :    out Zip.Zip_Name_Encoding;
 260       file_index     :    out Zip_Streams.ZS_Index_Type;
 261       comp_size      :    out Zip_64_Data_Size_Type;
 262       uncomp_size    :    out Zip_64_Data_Size_Type;
 263       crc_32         :    out Interfaces.Unsigned_32);
 264  
 265    Entry_name_not_found : exception;
 266    File_name_not_found : exception renames Entry_name_not_found;
 267    pragma Obsolescent (File_name_not_found, "Better use the name: Entry_name_not_found");
 268  
 269    function Exists (info : Zip_Info; name : String) return Boolean;
 270  
 271    --  User code: any information e.g. as a result of a string search,
 272    --  archive comparison, archive update, recompression,...
 273  
 274    procedure Set_User_Code (info : Zip_Info; name : String; code : Integer);
 275  
 276    function User_Code (info : Zip_Info; name : String) return Integer;
 277  
 278    procedure Get_Sizes
 279      (info           : in     Zip_Info;
 280       name           : in     String;
 281       comp_size      :    out Zip_64_Data_Size_Type;
 282       uncomp_size    :    out Zip_64_Data_Size_Type);
 283  
 284    --  User-defined procedure for feedback occuring during
 285    --  compression or decompression (entry_skipped meaningful
 286    --  only for the latter)
 287  
 288    type Feedback_Proc is access
 289      procedure
 290        (percents_done : in     Natural;   --  %'s completed
 291         entry_skipped : in     Boolean;   --  indicates one can show "skipped", no %'s
 292         user_abort    :    out Boolean);  --  e.g. transmit a "click on Cancel" here
 293  
 294    -------------------------------------------------------------------------
 295    -- Goodies - things used internally by Zip-Ada but are not bound to    --
 296    -- Zip archive purposes and that might be generally useful.            --
 297    -------------------------------------------------------------------------
 298  
 299    --  Block_Read: general-purpose procedure (nothing really specific to Zip /
 300    --  UnZip): reads either the whole buffer from a file, or if the end of
 301    --  the file lays inbetween, a part of the buffer.
 302    --
 303    --  The procedure's names and parameters corresponds to Borland / Turbo
 304    --  Pascal / Delphi's BlockRead's.
 305  
 306    subtype Byte is Interfaces.Unsigned_8;
 307    type Byte_Buffer is array (Integer range <>) of aliased Byte;
 308    type p_Byte_Buffer is access Byte_Buffer;
 309  
 310    procedure Block_Read
 311      (file          : in     Ada.Streams.Stream_IO.File_Type;
 312       buffer        :    out Byte_Buffer;
 313       actually_read :    out Natural);
 314       --  ^ = buffer'Length if no end of file occurred
 315       --      before last buffer element.
 316  
 317    --  Same for general streams
 318    --
 319    procedure Block_Read
 320      (stream        : in out Zip_Streams.Root_Zipstream_Type'Class;
 321       buffer        :    out Byte_Buffer;
 322       actually_read :    out Natural);
 323       --  ^ = buffer'Length if no end of stream occurred
 324       --      before last buffer element.
 325  
 326    --  Same, but instead of giving actually_read, raises End_Error if
 327    --  the buffer cannot be fully read.
 328    --  This mimics the 'Read stream attribute; can be a lot faster, depending
 329    --  on the compiler's run-time library.
 330    procedure Block_Read
 331      (stream : in out Zip_Streams.Root_Zipstream_Type'Class;
 332       buffer :    out Byte_Buffer);
 333  
 334    --  This mimics the 'Write stream attribute; can be a lot faster, depending
 335    --  on the compiler's run-time library.
 336    --  NB: here we can use the root stream type: no question of size, index,...
 337    procedure Block_Write
 338      (stream : in out Ada.Streams.Root_Stream_Type'Class;
 339       buffer : in     Byte_Buffer);
 340  
 341    --  Copy a chunk from a stream into another one, using a temporary buffer
 342    procedure Copy_Chunk
 343      (from        : in out Zip_Streams.Root_Zipstream_Type'Class;
 344       into        : in out Ada.Streams.Root_Stream_Type'Class;
 345       bytes       : Natural;
 346       buffer_size : Positive := 1024 * 1024;
 347       Feedback    : Feedback_Proc := null);
 348  
 349    --  Copy a whole file into a stream, using a temporary buffer
 350    procedure Copy_File
 351      (file_name   : String;
 352       into        : in out Ada.Streams.Root_Stream_Type'Class;
 353       buffer_size : Positive := 1024 * 1024);
 354  
 355    --  This does the same as Ada 2005's Ada.Directories.Exists
 356    --  Just there as helper for Ada 95 only systems
 357    --
 358    function Exists (file_name : String) return Boolean;
 359  
 360    --  Write a string containing line endings (possibly from another system)
 361    --  into a text file, with the "correct", native line endings.
 362    --  Works for displaying/saving correctly
 363    --  CR&LF (DOS/Win), LF (UNIX), CR (Mac OS < 9)
 364    --
 365    procedure Put_Multi_Line
 366      (out_file :        Ada.Text_IO.File_Type;
 367       text     :        String);
 368  
 369    procedure Write_as_Text
 370      (out_file  :        Ada.Text_IO.File_Type;
 371       buffer    :        Byte_Buffer;
 372       last_char : in out Character);  --  track line-ending characters between writes
 373  
 374    function Hexadecimal (x : Interfaces.Unsigned_32) return String;
 375  
 376    -----------------------------------------------------------------
 377    --  Information about this package - e.g., for an "about" box  --
 378    -----------------------------------------------------------------
 379  
 380    version   : constant String := "61";
 381    reference : constant String := "29-Mar-2025";
 382    --  Hopefully the latest version can be acquired from one of those URLs:
 383    web       : constant String := "https://unzip-ada.sourceforge.io/";
 384    web2      : constant String := "https://sourceforge.net/projects/unzip-ada/";
 385    web3      : constant String := "https://github.com/zertovitch/zip-ada";
 386    web4      : constant String := "https://alire.ada.dev/crates/zipada";
 387  
 388    ---------------------
 389    --  Private items  --
 390    ---------------------
 391  
 392  private
 393  
 394    --  Zip_Info, 23.VI.1999.
 395    --
 396    --  The PKZIP central directory is coded here as a binary tree
 397    --  to allow a fast retrieval of the searched offset in zip file.
 398    --  E.g. for a 1000-file archive, the offset will be found in less
 399    --  than 11 moves: 2**10=1024 (balanced case), without any read
 400    --  in the archive.
 401    --
 402    --  Notes on search dictionary
 403    ------------------------------
 404    --  19-Oct-2018: rev. 670 to 683 used a Vector and a Hashed Map
 405    --      from Ada.Containers. The loading of the dictionary was
 406    --      much faster (2x), but there were performance bottlenecks elsewhere,
 407    --      not solved by profiling. On an archive with 18000 small entries of
 408    --      around 1 KiB each, comp_zip ran 100x slower!
 409    --      Neither the restricted use of Unbounded_String, nor the replacement
 410    --      of the Vector by an array helped solving the performance issue.
 411    --  2022: second attempt with Vectors & Indefinite_Hashed_Maps (both a vector
 412    --      and a map are needed because a Zip archive may contain entries with
 413    --      duplicate keys; otherwise a map would be sufficient).
 414    --         - Test_Zip_Info_Timing: load time on many_65535.zip:
 415    --              0.75 seconds (binary tree) ->  0.44 seconds (vector & map)
 416    --         - But... comp_zip many_4096.zip many_4096.zip -q2:
 417    --              5.5  seconds (binary tree) -> 13.2  seconds (vector & map) !
 418  
 419    type Dir_node;
 420    type p_Dir_node is access Dir_node;
 421  
 422    type Dir_node (name_len : Natural) is record
 423      left, right      : p_Dir_node;
 424      dico_name        : String (1 .. name_len);  --  UPPER if case-insensitive search
 425      file_name        : String (1 .. name_len);
 426      file_index       : Zip_Streams.ZS_Index_Type;
 427      comp_size        : Zip_64_Data_Size_Type;
 428      uncomp_size      : Zip_64_Data_Size_Type;
 429      crc_32           : Interfaces.Unsigned_32;
 430      date_time        : Time;
 431      method           : PKZip_method;
 432      name_encoding    : Zip_Name_Encoding;
 433      read_only        : Boolean;  --  TBD: attributes of most supported systems
 434      encrypted_2_x    : Boolean;
 435      user_code        : Integer;
 436    end record;
 437  
 438    type Zip_archive_format_type is (Zip_32, Zip_64);
 439  
 440    type p_String is access String;
 441  
 442    type Zip_Info is new Ada.Finalization.Controlled with record
 443      loaded             : Boolean := False;
 444      case_sensitive     : Boolean;
 445      zip_file_name      : p_String;                            --  a file name...
 446      zip_input_stream   : Zip_Streams.Zipstream_Class_Access;  --  ...or an input stream
 447      --  ^ when not null, we use this, and not zip_file_name
 448      dir_binary_tree    : p_Dir_node;
 449      total_entries      : Natural;
 450      zip_file_comment   : p_String;
 451      zip_archive_format : Zip_archive_format_type := Zip_32;
 452    end record;
 453  
 454    --  After a copy, need to clone a few things.
 455    overriding procedure Adjust   (info : in out Zip_Info);
 456  
 457    --  Free heap-allocated memory.
 458    overriding procedure Finalize (info : in out Zip_Info);
 459  
 460    --  System.Word_Size: 13.3(8): A word is the largest amount of storage
 461    --  that can be conveniently and efficiently manipulated by the hardware,
 462    --  given the implementation's run-time model.
 463    --
 464    min_bits_32 : constant := Integer'Max (32, System.Word_Size);
 465    min_bits_16 : constant := Integer'Max (16, System.Word_Size);
 466  
 467    --  We define an Integer type which is at least 32 bits, but n bits
 468    --  on a native n (> 32) bits architecture.
 469    --  Integer_M16 is not needed: Integer already guarantees 16 bits
 470    --
 471    type Integer_M32 is range -2**(min_bits_32 - 1) .. 2**(min_bits_32 - 1) - 1;
 472    subtype Natural_M32  is Integer_M32 range 0 .. Integer_M32'Last;
 473    subtype Positive_M32 is Integer_M32 range 1 .. Integer_M32'Last;
 474  
 475    type Unsigned_M16 is mod 2**min_bits_16;
 476    type Unsigned_M32 is mod 2**min_bits_32;
 477  
 478    --  Codes for compression formats in Zip archives
 479    --  See PKWARE's Appnote, "4.4.5 compression method"
 480    --
 481    package Compression_format_code is
 482      store_code        : constant :=  0;
 483      shrink_code       : constant :=  1;
 484      reduce_code       : constant :=  2;
 485      implode_code      : constant :=  6;
 486      tokenize_code     : constant :=  7;
 487      deflate_code      : constant :=  8;
 488      deflate_e_code    : constant :=  9;
 489      bzip2_code        : constant := 12;
 490      lzma_code         : constant := 14;
 491      zstandard_code    : constant := 93;
 492      mp3_code          : constant := 94;
 493      xz_code           : constant := 95;
 494      jpeg_code         : constant := 96;
 495      wavpack_code      : constant := 97;
 496      ppmd_code         : constant := 98;
 497    end Compression_format_code;
 498  
 499  end Zip;

Web view of Ada source code generated by GNATHTML, project: ALI_Parse version 1.0.
Zip-Ada: Ada library for zip archive files (.zip). Ada programming.
Some news about Zip-Ada and other Ada projects on Gautier's blog.