Back to... Zip-Ada

Source file : zip-headers.ads



   1  --   ________  ___   ______       ______      ___
   2  --  /___..._/  |.|   |.___.\     /. __ .\   __|.|   ____
   3  --     /../    |.|   |.____/     |.|__|.|  /....|  __\..\
   4  --   _/../___  |.|   |.|    ===  |..__..| |. = .| | = ..|
   5  --  /_______/  |_|  /__|        /__|  |_|  \__\_|  \__\_|
   6  
   7  --  Zip.Headers
   8  ---------------
   9  --
  10  --  This package provides:
  11  --
  12  --  * Definiton of PKZIP information structures (cf appnote.txt),
  13  --  * Reading a header from a data stream (Read_and_check),
  14  --  * Copying a header from a buffer (Copy_and_check)
  15  --  * Writing a header to a data stream (Write)
  16  
  17  --  Legal licensing note:
  18  
  19  --  Copyright (c) 2000 .. 2024 Gautier de Montmollin
  20  --  SWITZERLAND
  21  
  22  --  Permission is hereby granted, free of charge, to any person obtaining a copy
  23  --  of this software and associated documentation files (the "Software"), to deal
  24  --  in the Software without restriction, including without limitation the rights
  25  --  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  26  --  copies of the Software, and to permit persons to whom the Software is
  27  --  furnished to do so, subject to the following conditions:
  28  
  29  --  The above copyright notice and this permission notice shall be included in
  30  --  all copies or substantial portions of the Software.
  31  
  32  --  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  33  --  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  34  --  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  35  --  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  36  --  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  37  --  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  38  --  THE SOFTWARE.
  39  
  40  --  NB: this is the MIT License, as found on the site
  41  --  http://www.opensource.org/licenses/mit-license.php
  42  
  43    --  Some quick explanations about the Zip file structure - GdM 2001, 2012
  44    --
  45    --  The zip archive containing N entries can be roughly seen as
  46    --  a data stream with the following structure:
  47    --
  48    --  1) {local header, then compressed data} - that, N times
  49    --  2) central directory, with a summary of each of the N entries
  50    --  3) end-of-central-directory, with a summary of the central directory
  51    --
  52    --  Since N is not necessarily known before or during the phase 1,
  53    --  the central directory's size is also potentially unknown.
  54    --  Then obvious place for the central directory is *after* the data,
  55    --  it is why it appears on phase 2.
  56    --
  57    --  An advantage of that structure is that the .ZIP archive can be later
  58    --  appended to an .EXE, for self-extracting purposes, or to other
  59    --  kind of files.
  60    --
  61    --  So, the most general infos are at the end, and we crawl back
  62    --  for more precise infos:
  63    --
  64    --  1) end-of-central-directory
  65    --  2) central directory
  66    --  3) zipped data entries
  67  
  68  --  Change log:
  69  --  ==========
  70  --
  71  --  29-May-2022: GdM: Support for Zip64 extensions.
  72  --
  73  --  22-Nov-2012: GdM: End-of-central-directory loaded in a single stream Read
  74  --                      operation instead of up to ~1.4 million Read
  75  --                      operations (for a non Zip file with 65535 times
  76  --                      the letter 'P'). Execution flow simplified, without
  77  --                      use of exceptions. Massive speedup there, on files
  78  --                      that are either invalid Zip files, or Zip files with
  79  --                      a large comment.
  80  --
  81  --  30-Oct-2012: GdM: Removed all profiles using Zip_Streams' objects
  82  --                      with accesses (cf 25-Oct's modifications)
  83  --  25-Oct-2012: GdM: Some procedures using Zip_Streams' objects also with
  84  --                    pointer-free profiles (no more 'access' or access type)
  85  --  16-Nov-2009: GdM: Replaced Ada.Calendar.Time by Zip.Time in headers, due to
  86  --                   perf. issues in some run-times' Ada.Calendar.Time_Of
  87  
  88  package Zip.Headers is
  89  
  90    use Interfaces, Zip_Streams;
  91  
  92    ----------------------------------------------------------------------
  93    -- PKZIP data descriptor, put after streamed compressed data - PK78 --
  94    ----------------------------------------------------------------------
  95  
  96    type Data_Descriptor is record
  97      --  PK78                           --  1 .. 4
  98      crc_32             : Unsigned_32;  --  5 .. 8
  99      compressed_size,
 100      uncompressed_size  : Unsigned_64;
 101      --  ^ Stricto sensu, the data descriptor is 32 bit, but the actual
 102      --    size may be larger (complemented through a Zip64 extra field).
 103    end record;
 104  
 105    data_descriptor_length : constant := 16;
 106  
 107    --  This header needs to be read in continuation of
 108    --  the compressed data -> access to a buffer
 109    procedure Copy_and_Check
 110      (buffer        : in     Byte_Buffer;
 111       the_data_desc :    out Data_Descriptor);
 112  
 113    procedure Read_and_Check
 114      (stream        : in out Root_Zipstream_Type'Class;
 115       the_data_desc :    out Data_Descriptor);
 116  
 117    bad_data_descriptor : exception;
 118  
 119    procedure Write
 120      (stream        : in out Root_Zipstream_Type'Class;
 121       the_data_desc : in     Data_Descriptor);
 122  
 123    -----------------------------------------------------------------------
 124    -- PKZIP local file header, in front of every file in archive - PK34 --
 125    -----------------------------------------------------------------------
 126  
 127    --  Appnote: 4.4.4 general purpose bit flag: (2 bytes)
 128    --
 129    --  Bit 0:  If set, indicates that the file is encrypted.
 130    Encryption_Flag_Bit        : constant := 2 **  0;
 131    --  Bit 1:  If set, indicates an EOS marker is used.
 132    LZMA_EOS_Flag_Bit          : constant := 2 **  1;
 133    --  Bit 11: Language encoding flag (EFS). If this bit is set, the filename and
 134    --          comment fields for this file MUST be encoded using UTF-8.
 135    Language_Encoding_Flag_Bit : constant := 2 ** 11;
 136  
 137    type Local_File_Header is record
 138      --  PK34                                --  1 .. 4
 139      needed_extract_version : Unsigned_16;   --  5 .. 6
 140      bit_flag               : Unsigned_16;   --  Appnote: 4.4.4 general purpose bit flag
 141      zip_type               : Unsigned_16;
 142      file_timedate          : Time;
 143      dd                     : Data_Descriptor;
 144      filename_length,
 145      extra_field_length     : Unsigned_16;
 146    end record;
 147  
 148    local_header_length : constant := 30;
 149  
 150    procedure Read_and_Check
 151      (stream : in out Root_Zipstream_Type'Class;
 152       header :    out Local_File_Header);
 153  
 154    bad_local_header : exception;
 155  
 156    type Extra_Field_Policy_Kind is
 157      (from_header,  --  This policy is for preserving an entry from another Zip
 158                     --  file. The extra field could be a Zip_64 or another kind.
 159       force_empty,
 160       force_zip_64);
 161  
 162    procedure Write
 163      (stream             : in out Root_Zipstream_Type'Class;
 164       header             : in     Local_File_Header;
 165       extra_field_policy : in     Extra_Field_Policy_Kind);
 166  
 167    ---------------------------------------------
 168    -- PKZIP local file header Zip64 extension --
 169    ---------------------------------------------
 170  
 171    --  4.5.3 Zip64 Extended Information Extra Field
 172  
 173    type Values_64 is array (1 .. 3) of Unsigned_64;
 174  
 175    type Local_File_Header_Extension is record
 176      tag      : Unsigned_16;
 177      size     : Unsigned_16;
 178      value_64 : Values_64;
 179    end record;
 180  
 181    local_header_extension_length : constant := 28;
 182  
 183    --  Shorter length (witout the offset field) for the local header
 184    --  extension is not properly documented in appnote.txt but is
 185    --  required by WinZip and 7z (otherwise you get: WARNINGS: Headers Error).
 186    --
 187    --  Fortunately someone published a nice blog post with
 188    --  a Zip64 example, as simple as possible...
 189    --  https://blog.yaakov.online/zip64-go-big-or-go-home/
 190    --
 191    local_header_extension_short_length : constant := 20;
 192  
 193    local_header_extension_tag : constant := 1;
 194  
 195    procedure Read_and_Check
 196      (stream : in out Root_Zipstream_Type'Class;
 197       header :    out Local_File_Header_Extension);
 198  
 199    --  Depending on its size (1, 2, >=3 values) we may
 200    --  need (or not) to change the corresponding variables.
 201    --  E.g. 7z may set only the first item, which is correct
 202    --  but is contrary to the requirement of appnote.txt (4.5.3)
 203    --  to have at least the first two items.
 204    --
 205    procedure Interpret
 206      (header            : in     Local_File_Header_Extension;
 207       uncompressed_size : in out Unsigned_64;
 208       compressed_size   : in out Unsigned_64;
 209       offset            : in out Unsigned_64);
 210  
 211    procedure Write
 212      (stream : in out Root_Zipstream_Type'Class;
 213       header : in     Local_File_Header_Extension;
 214       short  : in     Boolean);
 215  
 216    ---------------------------------------------------------
 217    --  PKZIP file header, as in central directory - PK12  --
 218    ---------------------------------------------------------
 219    --  NB: a central header contains a local header in the middle
 220  
 221    type Central_File_Header is record
 222      made_by_version     : Unsigned_16;
 223      short_info          : Local_File_Header;
 224      comment_length      : Unsigned_16;
 225      disk_number_start   : Unsigned_16;
 226      internal_attributes : Unsigned_16;  --  internal properties of data
 227      external_attributes : Unsigned_32;  --  1st byte if MS-DOS: see below
 228      local_header_offset : Unsigned_64;
 229    end record;
 230  
 231    --  MS-DOS external attributes:
 232    --
 233    --   Bit 0     Read-Only
 234    --   Bit 1     Hidden
 235    --   Bit 2     System
 236    --   Bit 3     Volume Label
 237    --   Bit 4     Directory
 238    --   Bit 5     Archive
 239  
 240    central_header_length : constant := 46;
 241  
 242    procedure Read_and_Check
 243      (stream : in out Root_Zipstream_Type'Class;
 244       header :    out Central_File_Header);
 245  
 246    bad_central_header : exception;
 247  
 248    procedure Write
 249      (stream : in out Root_Zipstream_Type'Class;
 250       header : in     Central_File_Header);
 251  
 252    function Needs_Local_Zip_64_Header_Extension
 253      (header : Local_File_Header;
 254       offset : Unsigned_64)  --  Not part of the Zip32 header but of the Zip64 one...
 255    return Boolean;
 256  
 257    -------------------------------------------
 258    -- PKZIP end-of-central-directory - PK56 --
 259    -------------------------------------------
 260  
 261    type End_of_Central_Dir is record
 262      disknum             : Unsigned_32;
 263      disknum_with_start  : Unsigned_32;
 264      disk_total_entries  : Unsigned_64;
 265      total_entries       : Unsigned_64;
 266      central_dir_size    : Unsigned_64;
 267      central_dir_offset  : Unsigned_64;
 268      main_comment_length : Unsigned_16;
 269      --  The Zip archive may be appended to another file (for instance an
 270      --  executable for self-extracting purposes) of size N.
 271      --  Then, all offsets need to be shifted by N.
 272      --  N=0 if the Zip archive is on its own.
 273      --  The real offset of the end-of-central-dir
 274      --  will be N + central_dir_size + central_dir_offset.
 275      --  This way, we have an unique chance to determine N when reading the
 276      --  end-of-central-dir. N is stored in the field hereafter:
 277      offset_shifting    : ZS_Size_Type;  --  NB: type is at least 32 bits.
 278    end record;
 279  
 280    end_of_central_dir_length : constant := 22;
 281  
 282    --  The End-of-Central-Dir header is followed by a comment of
 283    --  unkown size and hence needs to be searched in special ways (see Load).
 284  
 285    --  Copy_and_check and Read_and_check assume a buffer or a stream
 286    --  pointing to the End-of-Central-Dir signature.
 287    procedure Copy_and_Check
 288      (buffer  : in     Byte_Buffer;
 289       the_end :    out End_of_Central_Dir);
 290  
 291    procedure Read_and_Check
 292      (stream  : in out Root_Zipstream_Type'Class;
 293       the_end :    out End_of_Central_Dir);
 294  
 295    bad_end : exception;
 296  
 297    procedure Write
 298      (stream  : in out Root_Zipstream_Type'Class;
 299       the_end : in     End_of_Central_Dir);
 300  
 301    --  A bit more elaborated variant of Read:
 302    --  find the End-of-Central-Dir and load it.
 303    --  It includes the processing of an eventual Zip64
 304    --  End-of-Central-Dir.
 305  
 306    procedure Load
 307      (stream  : in out Root_Zipstream_Type'Class;
 308       the_end :    out End_of_Central_Dir);
 309  
 310    ------------------------------------------------------------
 311    --  Zip64 extensions for end-of-central directory stuff.  --
 312    ------------------------------------------------------------
 313  
 314    --  References are from PKWare's appnote.txt.
 315  
 316    --  The Zip64 flavor of the end-of-central directory structure appears in
 317    --  the following order. The three records are read in reverse order.
 318    --
 319  
 320    --  4.3.14   Zip64 end of central directory record (variable size)
 321    --  4.3.15   Zip64 end of central directory locator (fixed size)
 322    --  4.3.16   End of central directory record (variable size), disabled with FFFF's
 323  
 324    type Zip64_End_of_Central_Dir is record
 325      size                                                          : Unsigned_64;  --  Size of the remaining record
 326      version_made_by                                               : Unsigned_16;
 327      version_needed_to_extract                                     : Unsigned_16;
 328      number_of_this_disk                                           : Unsigned_32;
 329      number_of_the_disk_with_the_start_of_the_central_directory    : Unsigned_32;
 330      total_number_of_entries_in_the_central_directory_on_this_disk : Unsigned_64;
 331      total_number_of_entries_in_the_central_directory              : Unsigned_64;
 332      size_of_the_central_directory                                 : Unsigned_64;
 333      offset_of_start_of_central_directory                          : Unsigned_64;
 334      --  ^ offset: with respect to the starting disk number
 335      --  zip64_extensible_data_sector (variable_size)
 336    end record;
 337  
 338    zip_64_end_of_central_dir_length : constant := 56;
 339  
 340    procedure Read_and_Check
 341      (stream     : in out Root_Zipstream_Type'Class;
 342       the_end_64 :    out Zip64_End_of_Central_Dir);
 343  
 344    procedure Write
 345      (stream     : in out Root_Zipstream_Type'Class;
 346       the_end_64 : in     Zip64_End_of_Central_Dir);
 347  
 348    type Zip64_End_of_Central_Dir_Locator is record
 349      number_of_the_disk_with_the_start_of_the_zip64_end_of_central_dir : Unsigned_32;
 350      relative_offset_of_the_zip64_end_of_central_dir_record            : Unsigned_64;
 351      total_number_of_disks                                             : Unsigned_32;
 352    end record;
 353  
 354    zip_64_end_of_central_dir_locator_length : constant := 20;
 355  
 356    procedure Read_and_Check
 357      (stream         : in out Root_Zipstream_Type'Class;
 358       the_end_64_loc :    out Zip64_End_of_Central_Dir_Locator);
 359  
 360    procedure Write
 361      (stream         : in out Root_Zipstream_Type'Class;
 362       the_end_64_loc : in     Zip64_End_of_Central_Dir_Locator);
 363  
 364  end Zip.Headers;

Web view of Ada source code generated by GNATHTML, project: ALI_Parse version 1.0.
Zip-Ada: Ada library for zip archive files (.zip). Ada programming.
Some news about Zip-Ada and other Ada projects on Gautier's blog.