Back to... Zip-Ada

Source file : zip.ads


--   ________  ___   ______       ______      ___
--  /___..._/  |.|   |.___.\     /. __ .\   __|.|   ____
--     /../    |.|   |.____/     |.|__|.|  /....|  __\..\
--   _/../___  |.|   |.|    ===  |..__..| |. = .| | = ..|
--  /_______/  |_|  /__|        /__|  |_|  \__\_|  \__\_|

--  Zip library
---------------
--
--  Library for manipulating archive files in the Zip format
--
--  Pure Ada 2005+ code, 100% portable: OS-, CPU- and compiler- independent.
--
--  Version / date / download info: see the version, reference, web strings
--   defined at the end of the public part of this package.

--  Legal licensing note:

--  Copyright (c) 1999 .. 2023 Gautier de Montmollin
--  SWITZERLAND

--  Permission is hereby granted, free of charge, to any person obtaining a copy
--  of this software and associated documentation files (the "Software"), to deal
--  in the Software without restriction, including without limitation the rights
--  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
--  copies of the Software, and to permit persons to whom the Software is
--  furnished to do so, subject to the following conditions:

--  The above copyright notice and this permission notice shall be included in
--  all copies or substantial portions of the Software.

--  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
--  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
--  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
--  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
--  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
--  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
--  THE SOFTWARE.

--  NB: this is the MIT License, as found 12-Sep-2007 on the site
--  http://www.opensource.org/licenses/mit-license.php

with Zip_Streams;
with Ada.Calendar, Ada.Finalization, Ada.Streams.Stream_IO, Ada.Text_IO;
with Interfaces;
with System;

package Zip is

  -----------------------------------------------------------------
  --  Zip_info                                                   --
  -----------------------------------------------------------------
  --  Zip_info contains the Zip file name (if it is a file)      --
  --  or its input stream access, and the archive's directory.   --
  -----------------------------------------------------------------

  type Zip_info is
    new Ada.Finalization.Controlled with private;

  -----------------------------------------------
  --  Load the whole .zip directory contained  --
  --  in archive (from) for quick searching.   --
  -----------------------------------------------

   type Duplicate_name_policy is
     (admit_duplicates,    --  two entries in the Zip archive may have the same full name
      error_on_duplicate   --  raise exception on attempt to add twice the same entry name
     );

  --  Load from a file

  procedure Load (
    info            : out Zip_info;
    from            : in  String;  --  Zip file name
    case_sensitive  : in  Boolean := False;
    duplicate_names : in  Duplicate_name_policy := error_on_duplicate
  );

  --  Load from a stream

  procedure Load (
    info            :    out Zip_info;
    from            : in out Zip_Streams.Root_Zipstream_Type'Class;
    case_sensitive  : in     Boolean := False;
    duplicate_names : in     Duplicate_name_policy := error_on_duplicate
  );

  Archive_corrupted,
  Archive_open_error,
  Duplicate_name : exception;

  Zip_file_open_error : exception renames Archive_open_error;  --  Archive is not always a file!
  pragma Obsolescent (Zip_file_open_error, "Better use the name: Archive_open_error");

  --  Zip_file_Error: exception renames Archive_corrupted;   --   Now really obsolete.
  --  pragma Obsolescent(Zip_file_Error);                    --   Now really obsolete.

  function Is_loaded (info : in Zip_info) return Boolean;

  function Zip_name (info : in Zip_info) return String;

  function Zip_comment (info : in Zip_info) return String;

  function Zip_stream (info : in Zip_info) return Zip_Streams.Zipstream_Class_Access;

  function Entries (info : in Zip_info) return Natural;

  procedure Delete (info : in out Zip_info);
  pragma Obsolescent (Delete, "Delete happens automatically since v.56.");

  Forgot_to_load_zip_info : exception;

  --  Data sizes in archive
  subtype Zip_32_Data_Size_Type is Interfaces.Unsigned_32;
  subtype Zip_64_Data_Size_Type is Interfaces.Unsigned_64;

  ---------

  --  Compression "methods" - actually, *formats* - in the "official" PKWARE Zip format.
  --  Details in appnote.txt, part V.J
  --
  --     C : supported by Zip-Ada for compressing
  --     D : supported by Zip-Ada for decompressing

  type PKZip_method is
    (store,     --  C, D
     shrink,    --  C, D
     reduce_1,  --  C, D
     reduce_2,  --  C, D
     reduce_3,  --  C, D
     reduce_4,  --  C, D
     implode,   --     D
     tokenize,
     deflate,   --  C, D
     deflate_e, --     D  -  "Enhanced deflate" or "Deflate64"
     bzip2,     --     D
     lzma_meth, --  C, D
     zstandard,
     mp3_recomp,
     xz_recomp,
     jpeg_recomp,
     wavpack,
     ppmd,
     unknown
    );

  subtype Reduce_Format is PKZip_method range reduce_1 .. reduce_4;

  --  Return a String image, nicer than the 'Image attribute.
  function Image (m : PKZip_method) return String;

  --  Technical: translates the method code as set in zip archives
  function Method_from_code (x : Interfaces.Unsigned_16) return PKZip_method;
  function Method_from_code (x : Natural) return PKZip_method;

  --  Internal time definition
  subtype Time is Zip_Streams.Time;
  function Convert (date : in Ada.Calendar.Time) return Time
    renames Zip_Streams.Calendar.Convert;
  function Convert (date : in Time) return Ada.Calendar.Time
    renames Zip_Streams.Calendar.Convert;

  --  Entry names within Zip archives are encoded either with
  --      * the IBM PC (the one with a monochrome screen, only text mode)'s
  --          character set: IBM 437
  --  or
  --      * Unicode UTF-8
  --
  --  Documentation: PKWARE's Appnote.txt, APPENDIX D - Language Encoding (EFS)

  type Zip_name_encoding is (IBM_437, UTF_8);

  --  Traverse a whole Zip_info directory in sorted order, giving the
  --  name for each entry to an user-defined "Action" procedure.
  --  Concretely, you can process a whole Zip file that way, by extracting data
  --  with Extract, or open a reader stream with UnZip.Streams.
  --  See the Comp_Zip or Find_Zip tools as application examples.
  generic
    with procedure Action (name : String);  --  'name' is compressed entry's name
  procedure Traverse (z : Zip_info);

  --  Same as Traverse, but Action gives also full name information.
  --  The pair (name, name_encoding) allows for an unambiguous Unicode
  --  name decoding. See the AZip project for an implementation.
  generic
    with procedure Action (
      name          : String;  --  'name' is compressed entry's name
      name_encoding : Zip_name_encoding
    );
  procedure Traverse_Unicode (z : Zip_info);

  --  Same as Traverse, but Action gives also full technical informations
  --  about the compressed entry.
  generic
    with procedure Action (
      name             : String;  --  'name' is compressed entry's name
      file_index       : Zip_Streams.ZS_Index_Type;
      comp_size        : Zip_64_Data_Size_Type;
      uncomp_size      : Zip_64_Data_Size_Type;
      crc_32           : Interfaces.Unsigned_32;
      date_time        : Time;
      method           : PKZip_method;
      name_encoding    : Zip_name_encoding;
      read_only        : Boolean;
      encrypted_2_x    : Boolean;  --  PKZip 2.x encryption
      user_code        : in out Integer
    );
  procedure Traverse_verbose (z : Zip_info);

  --  Academic: see how well the name tree is balanced
  procedure Tree_stat (
    z         : in     Zip_info;
    total     :    out Natural;
    max_depth :    out Natural;
    avg_depth :    out Float
  );

  --------------------------------------------------------------------------
  -- Offsets - various procedures giving 1-based indexes to local headers --
  --------------------------------------------------------------------------

  --  Find 1st offset in a Zip stream (i.e. the first's archived entry's offset)

  procedure Find_first_offset (
    file           : in out Zip_Streams.Root_Zipstream_Type'Class;
    file_index     :    out Zip_Streams.ZS_Index_Type);

  --  If the archive is empty (the 22 byte .zip file), there is no first entry or offset.
  Archive_is_empty : exception;

  --  Find offset of a certain compressed file
  --  in a Zip file (file opened and kept open)

  procedure Find_offset (
    file           : in out Zip_Streams.Root_Zipstream_Type'Class;
    name           : in     String;
    case_sensitive : in     Boolean;
    file_index     :    out Zip_Streams.ZS_Index_Type;
    comp_size      :    out Zip_64_Data_Size_Type;
    uncomp_size    :    out Zip_64_Data_Size_Type;
    crc_32         :    out Interfaces.Unsigned_32
  );

  --  Find offset of a certain compressed file in a pre-loaded Zip_info data

  procedure Find_offset (
    info           : in     Zip_info;
    name           : in     String;
    name_encoding  :    out Zip_name_encoding;
    file_index     :    out Zip_Streams.ZS_Index_Type;
    comp_size      :    out Zip_64_Data_Size_Type;
    uncomp_size    :    out Zip_64_Data_Size_Type;
    crc_32         :    out Interfaces.Unsigned_32
  );

  --  Find offset of a certain compressed file in a pre-loaded Zip_info data.
  --  This version scans the whole catalogue and returns the index of the first
  --  entry with a matching name, ignoring directory information.
  --  For instance, if the Zip archive contains "zip-ada/zip_lib/zip.ads",
  --  "zip.ads" will match - or even "ZIP.ads" if info has been loaded in case-insensitive mode.
  --  Caution: this may be much slower than the exact search with Find_offset.

  procedure Find_offset_without_directory (
    info           : in     Zip.Zip_info;
    name           : in     String;
    name_encoding  :    out Zip.Zip_name_encoding;
    file_index     :    out Zip_Streams.ZS_Index_Type;
    comp_size      :    out Zip_64_Data_Size_Type;
    uncomp_size    :    out Zip_64_Data_Size_Type;
    crc_32         :    out Interfaces.Unsigned_32
  );

  Entry_name_not_found : exception;
  File_name_not_found : exception renames Entry_name_not_found;
  pragma Obsolescent (File_name_not_found, "Better use the name: Entry_name_not_found");

  function Exists (info : Zip_info; name : String) return Boolean;

  --  User code: any information e.g. as a result of a string search,
  --  archive comparison, archive update, recompression,...

  procedure Set_user_code (
    info           : in Zip_info;
    name           : in String;
    code           : in Integer
  );

  function User_code (
    info           : in Zip_info;
    name           : in String
  )
  return Integer;

  procedure Get_sizes (
    info           : in     Zip_info;
    name           : in     String;
    comp_size      :    out Zip_64_Data_Size_Type;
    uncomp_size    :    out Zip_64_Data_Size_Type
  );

  --  User-defined procedure for feedback occuring during
  --  compression or decompression (entry_skipped meaningful
  --  only for the latter)

  type Feedback_proc is access
    procedure (
      percents_done :  in Natural;  -- %'s completed
      entry_skipped :  in Boolean;  -- indicates one can show "skipped", no %'s
      user_abort    : out Boolean   -- e.g. transmit a "click on Cancel" here
    );

  -------------------------------------------------------------------------
  -- Goodies - things used internally by Zip-Ada but are not bound to    --
  -- Zip archive purposes and that might be generally useful.            --
  -------------------------------------------------------------------------

  --  Block_Read: general-purpose procedure (nothing really specific to Zip /
  --  UnZip): reads either the whole buffer from a file, or if the end of
  --  the file lays inbetween, a part of the buffer.
  --
  --  The procedure's names and parameters corresponds to Borland / Turbo
  --  Pascal / Delphi's BlockRead's.

  subtype Byte is Interfaces.Unsigned_8;
  type Byte_Buffer is array (Integer range <>) of aliased Byte;
  type p_Byte_Buffer is access Byte_Buffer;

  procedure Block_Read (
    file          : in     Ada.Streams.Stream_IO.File_Type;
    buffer        :    out Byte_Buffer;
    actually_read :    out Natural
    --  = buffer'Length if no end of file before last buffer element
  );

  --  Same for general streams
  --
  procedure Block_Read (
    stream        : in out Zip_Streams.Root_Zipstream_Type'Class;
    buffer        :    out Byte_Buffer;
    actually_read :    out Natural
    --  = buffer'Length if no end of stream before last buffer element
  );

  --  Same, but instead of giving actually_read, raises End_Error if
  --  the buffer cannot be fully read.
  --  This mimics the 'Read stream attribute; can be a lot faster, depending
  --  on the compiler's run-time library.
  procedure Block_Read (
    stream : in out Zip_Streams.Root_Zipstream_Type'Class;
    buffer :    out Byte_Buffer
  );

  --  This mimics the 'Write stream attribute; can be a lot faster, depending
  --  on the compiler's run-time library.
  --  NB: here we can use the root stream type: no question of size, index,...
  procedure Block_Write (
    stream : in out Ada.Streams.Root_Stream_Type'Class;
    buffer : in     Byte_Buffer
  );

  --  Copy a chunk from a stream into another one, using a temporary buffer
  procedure Copy_chunk (
    from        : in out Zip_Streams.Root_Zipstream_Type'Class;
    into        : in out Ada.Streams.Root_Stream_Type'Class;
    bytes       : Natural;
    buffer_size : Positive := 1024 * 1024;
    Feedback    : Feedback_proc := null
  );

  --  Copy a whole file into a stream, using a temporary buffer
  procedure Copy_file (
    file_name   : String;
    into        : in out Ada.Streams.Root_Stream_Type'Class;
    buffer_size : Positive := 1024 * 1024
  );

  --  This does the same as Ada 2005's Ada.Directories.Exists
  --  Just there as helper for Ada 95 only systems
  --
  function Exists (file_name : String) return Boolean;

  --  Write a string containing line endings (possibly from another system)
  --  into a text file, with the "correct", native line endings.
  --  Works for displaying/saving correctly
  --  CR&LF (DOS/Win), LF (UNIX), CR (Mac OS < 9)
  --
  procedure Put_Multi_Line (
    out_file :        Ada.Text_IO.File_Type;
    text     :        String
  );

  procedure Write_as_text (
    out_file  :        Ada.Text_IO.File_Type;
    buffer    :        Byte_Buffer;
    last_char : in out Character  --  track line-ending characters between writes
  );

  function Hexadecimal (x : Interfaces.Unsigned_32) return String;

  -----------------------------------------------------------------
  --  Information about this package - e.g., for an "about" box  --
  -----------------------------------------------------------------

  version   : constant String := "58";
  reference : constant String := "07-Apr-2023";
  --  Hopefully the latest version is at one of those URLs:
  web       : constant String := "https://unzip-ada.sourceforge.io/";
  web2      : constant String := "https://sourceforge.net/projects/unzip-ada/";
  web3      : constant String := "https://github.com/zertovitch/zip-ada";

  ---------------------
  --  Private items  --
  ---------------------

private

  --  Zip_info, 23.VI.1999.
  --
  --  The PKZIP central directory is coded here as a binary tree
  --  to allow a fast retrieval of the searched offset in zip file.
  --  E.g. for a 1000-file archive, the offset will be found in less
  --  than 11 moves: 2**10=1024 (balanced case), without any read
  --  in the archive.
  --
  --  Notes on search dictionary
  ------------------------------
  --  19-Oct-2018: rev. 670 to 683 used a Vector and a Hashed Map
  --      from Ada.Containers. The loading of the dictionary was
  --      much faster (2x), but there were performance bottlenecks elsewhere,
  --      not solved by profiling. On an archive with 18000 small entries of
  --      around 1 KiB each, comp_zip ran 100x slower!
  --      Neither the restricted use of Unbounded_String, nor the replacement
  --      of the Vector by an array helped solving the performance issue.
  --  2022: second attempt with Vectors & Indefinite_Hashed_Maps (both a vector
  --      and a map are needed because a Zip archive may contain entries with
  --      duplicate keys; otherwise a map would be sufficient).
  --         - Test_Zip_Info_Timing: load time on many_65535.zip:
  --              0.75 seconds (binary tree) ->  0.44 seconds (vector & map)
  --         - But... comp_zip many_4096.zip many_4096.zip -q2:
  --              5.5  seconds (binary tree) -> 13.2  seconds (vector & map) !

  type Dir_node;
  type p_Dir_node is access Dir_node;

  type Dir_node (name_len : Natural) is record
    left, right      : p_Dir_node;
    dico_name        : String (1 .. name_len);  --  UPPER if case-insensitive search
    file_name        : String (1 .. name_len);
    file_index       : Zip_Streams.ZS_Index_Type;
    comp_size        : Zip_64_Data_Size_Type;
    uncomp_size      : Zip_64_Data_Size_Type;
    crc_32           : Interfaces.Unsigned_32;
    date_time        : Time;
    method           : PKZip_method;
    name_encoding    : Zip_name_encoding;
    read_only        : Boolean;  --  TBD: attributes of most supported systems
    encrypted_2_x    : Boolean;
    user_code        : Integer;
  end record;

  type Zip_archive_format_type is (Zip_32, Zip_64);

  type p_String is access String;

  type Zip_info is new Ada.Finalization.Controlled with record
    loaded             : Boolean := False;
    case_sensitive     : Boolean;
    zip_file_name      : p_String;                            --  a file name...
    zip_input_stream   : Zip_Streams.Zipstream_Class_Access;  --  ...or an input stream
    --  ^ when not null, we use this, and not zip_file_name
    dir_binary_tree    : p_Dir_node;
    total_entries      : Natural;
    zip_file_comment   : p_String;
    zip_archive_format : Zip_archive_format_type := Zip_32;
  end record;

  --  After a copy, need to clone a few things.
  overriding procedure Adjust   (info : in out Zip_info);
  --  Free heap-allocated memory.
  overriding procedure Finalize (info : in out Zip_info);

  --  System.Word_Size: 13.3(8): A word is the largest amount of storage
  --  that can be conveniently and efficiently manipulated by the hardware,
  --  given the implementation's run-time model.
  --
  min_bits_32 : constant := Integer'Max (32, System.Word_Size);
  min_bits_16 : constant := Integer'Max (16, System.Word_Size);

  --  We define an Integer type which is at least 32 bits, but n bits
  --  on a native n (> 32) bits architecture.
  --  Integer_M16 is not needed: Integer already guarantees 16 bits
  --
  type Integer_M32 is range -2**(min_bits_32 - 1) .. 2**(min_bits_32 - 1) - 1;
  subtype Natural_M32  is Integer_M32 range 0 .. Integer_M32'Last;
  subtype Positive_M32 is Integer_M32 range 1 .. Integer_M32'Last;

  type Unsigned_M16 is mod 2**min_bits_16;
  type Unsigned_M32 is mod 2**min_bits_32;

  --  Codes for compression formats in Zip archives
  --  See PKWARE's Appnote, "4.4.5 compression method"
  --
  package Compression_format_code is
    store_code        : constant :=  0;
    shrink_code       : constant :=  1;
    reduce_code       : constant :=  2;
    implode_code      : constant :=  6;
    tokenize_code     : constant :=  7;
    deflate_code      : constant :=  8;
    deflate_e_code    : constant :=  9;
    bzip2_code        : constant := 12;
    lzma_code         : constant := 14;
    zstandard_code    : constant := 93;
    mp3_code          : constant := 94;
    xz_code           : constant := 95;
    jpeg_code         : constant := 96;
    wavpack_code      : constant := 97;
    ppmd_code         : constant := 98;
  end Compression_format_code;

end Zip;

Zip-Ada: Ada library for zip archive files (.zip). Ada programming.
Some news about Zip-Ada and other related Ada projects on Gautier's blog.