Back to... Zip-Ada

Source file : zip-compress-shrink.adb



   1  --  Legal licensing note:
   2  
   3  --  Copyright (c) 2006 .. 2022 Gautier de Montmollin (see spec. for credits)
   4  --  SWITZERLAND
   5  
   6  --  Permission is hereby granted, free of charge, to any person obtaining a copy
   7  --  of this software and associated documentation files (the "Software"), to deal
   8  --  in the Software without restriction, including without limitation the rights
   9  --  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10  --  copies of the Software, and to permit persons to whom the Software is
  11  --  furnished to do so, subject to the following conditions:
  12  
  13  --  The above copyright notice and this permission notice shall be included in
  14  --  all copies or substantial portions of the Software.
  15  
  16  --  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  --  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  --  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19  --  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  --  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  --  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22  --  THE SOFTWARE.
  23  
  24  --  NB: this is the MIT License, as found on the site
  25  --  http://www.opensource.org/licenses/mit-license.php
  26  
  27  with Ada.Unchecked_Deallocation;
  28  
  29  procedure Zip.Compress.Shrink
  30    (input,
  31     output           : in out Zip_Streams.Root_Zipstream_Type'Class;
  32     input_size_known :        Boolean;
  33     input_size       :        Zip_64_Data_Size_Type;  --  ignored if unknown
  34     feedback         :        Feedback_Proc;
  35     CRC              : in out Interfaces.Unsigned_32;  --  only updated here
  36     crypto           : in out CRC_Crypto.Crypto_pack;
  37     output_size      :    out Zip_64_Data_Size_Type;
  38     compression_ok   :    out Boolean)  --  indicates compressed < uncompressed
  39  is
  40    use Interfaces;
  41  
  42    ------------------
  43    -- Buffered I/O --
  44    ------------------
  45  
  46    IO_buffers : IO_Buffers_Type;
  47  
  48    procedure Put_byte (B : Unsigned_8) is
  49    begin
  50      IO_buffers.OutBuf (IO_buffers.OutBufIdx) := B;
  51      IO_buffers.OutBufIdx := IO_buffers.OutBufIdx + 1;
  52      if IO_buffers.OutBufIdx > IO_buffers.OutBuf.all'Last then
  53        Write_Block (IO_buffers, input_size_known, input_size, output, output_size, crypto);
  54      end if;
  55    end Put_byte;
  56  
  57    procedure Flush_output is
  58    begin
  59      if IO_buffers.OutBufIdx > 1 then
  60        Write_Block (IO_buffers, input_size_known, input_size, output, output_size, crypto);
  61      end if;
  62    end Flush_output;
  63  
  64    --------------------------------------------------------------------------
  65  
  66    ------------------------------------------------------
  67    --  Bit code buffer, for sending data at bit level  --
  68    ------------------------------------------------------
  69  
  70    --  Output buffer. Bits are inserted starting at the right (least
  71    --  significant bits). The width of bit_buffer must be at least 16 bits.
  72    subtype U32 is Unsigned_32;
  73    bit_buffer : U32 := 0;
  74    --  Number of valid bits in bit_buffer.  All bits above the last valid bit are always zero.
  75    valid_bits : Integer := 0;
  76  
  77    procedure Flush_bit_buffer is
  78    begin
  79      while valid_bits > 0 loop
  80        Put_byte (Byte (bit_buffer and 16#FF#));
  81        bit_buffer := Shift_Right (bit_buffer, 8);
  82        valid_bits := Integer'Max (0, valid_bits - 8);
  83      end loop;
  84      bit_buffer := 0;
  85    end Flush_bit_buffer;
  86  
  87    Min_bits : constant := 9;    --  Starting code size of 9 bits
  88    Max_bits : constant := 13;   --  Maximum code size of 13 bits
  89  
  90    subtype Code_size_type is Integer range 1 .. Max_bits;
  91    code_size : Code_size_type;     --  Size of codes (in bits) currently being written
  92  
  93    --  Send a value on a given number of bits.
  94    procedure Put_code (code : Natural) is
  95    pragma Inline (Put_code);
  96    begin
  97      --  Put bits from code at the left of existing ones. They might be shifted away
  98      --  partially on the left side (or even entirely if valid_bits is already = 32).
  99      bit_buffer := bit_buffer or Shift_Left (U32 (code), valid_bits);
 100      valid_bits := valid_bits + code_size;
 101      if valid_bits > 32 then
 102        --  Flush 32 bits to output as 4 bytes
 103        Put_byte (Byte (bit_buffer and 16#FF#));
 104        Put_byte (Byte (Shift_Right (bit_buffer,  8) and 16#FF#));
 105        Put_byte (Byte (Shift_Right (bit_buffer, 16) and 16#FF#));
 106        Put_byte (Byte (Shift_Right (bit_buffer, 24) and 16#FF#));
 107        valid_bits := valid_bits - 32;
 108        --  Empty buffer and put on it the rest of the code
 109        bit_buffer := Shift_Right (U32 (code), code_size - valid_bits);
 110      end if;
 111    end Put_code;
 112  
 113    Table_full : Boolean; -- Flag indicating a full symbol table
 114  
 115    --  Define data types needed to implement a code table for LZW compression
 116    type CodeRec is record  --  Code Table record format...
 117      Child   : Integer;       --  Index of 1st suffix for this prefix
 118      Sibling : Integer;       --  Index of next suffix in chain
 119      Suffix  : Natural;       --  Suffix
 120    end record;
 121  
 122    TABLESIZE : constant := 8191;  --  We'll need 4K entries in table
 123  
 124    --  PKZip's Shrink is a variant of the LZW algorithm in that the
 125    --  compressor controls the code increase and the table clearing.
 126    --  See appnote.txt, section 5.1.
 127    Special_Code : constant := 256;
 128    Code_for_increasing_code_size : constant := 1;
 129    Code_for_clearing_table       : constant := 2;
 130  
 131    FIRSTENTRY : constant := 257;  --  First available table entry
 132    UNUSED : constant := -1;       --  Prefix indicating an unused code table entry
 133  
 134    type Code_array is array (0 .. TABLESIZE) of CodeRec;
 135    --  Define the code table
 136  
 137    type Table_access is access Code_array;
 138    procedure Dispose is new Ada.Unchecked_Deallocation (Code_array, Table_access);
 139  
 140    Code_table : Table_access := null;  --  Points to code table for LZW compression
 141  
 142    --  Define data types needed to implement a free node list
 143    type Free_list_array is array (FIRSTENTRY .. TABLESIZE) of Natural;
 144    type Free_list_access is access Free_list_array;
 145  
 146    procedure Dispose is
 147      new Ada.Unchecked_Deallocation (Free_list_array, Free_list_access);
 148  
 149    Free_list : Free_list_access := null;  --  Table of free code table entries
 150    Next_free : Integer;                   --  Index into free list table
 151  
 152    ----------------------------------------------------------------------------
 153    --  The following routines are used to allocate, initialize, and de-allocate
 154    --  various dynamic memory structures used by the LZW compression algorithm
 155    ----------------------------------------------------------------------------
 156  
 157    procedure Build_Data_Structures is
 158    begin
 159      Code_table := new Code_array;
 160      Free_list  := new Free_list_array;
 161    end Build_Data_Structures;
 162  
 163    ---------------------------------------------------------------------------
 164    procedure Destroy_Data_Structures is
 165    begin
 166      Dispose (Code_table);
 167      Dispose (Free_list);
 168    end Destroy_Data_Structures;
 169  
 170    ---------------------------------------------------------------------------
 171  
 172    procedure Initialize_Data_Structures is
 173    begin
 174      for I in 0 .. TABLESIZE loop
 175        Code_table (I).Child   := UNUSED;
 176        Code_table (I).Sibling := UNUSED;
 177        if I <= 255 then
 178          Code_table (I).Suffix := I;
 179        end if;
 180        if I >= 257 then
 181          Free_list (I) := I;
 182        end if;
 183      end loop;
 184      Next_free := FIRSTENTRY;
 185      Table_full := False;
 186    end Initialize_Data_Structures;
 187  
 188    ---------------------------------------------------------------------------
 189    --  The following routines handle manipulation of the LZW Code Table
 190    ---------------------------------------------------------------------------
 191  
 192    ClearList : array (0 .. 1023) of Unsigned_8;
 193    --  Bit mapped structure used in during adaptive resets
 194  
 195    procedure Prune (Parent : Integer) is
 196      --  Prune leaves from a subtree - Note: this is a recursive procedure
 197      CurrChild : Integer;
 198      NextSibling : Integer;
 199    begin
 200      CurrChild := Code_table (Parent).Child;
 201      --  Find first Child that has descendants .. clear any that don't
 202  
 203      while CurrChild /= UNUSED and then
 204            Code_table (CurrChild).Child = UNUSED
 205      loop
 206        Code_table (Parent).Child := Code_table (CurrChild).Sibling;
 207        Code_table (CurrChild).Sibling := UNUSED;
 208        --  Turn on ClearList bit to indicate a cleared entry
 209        ClearList (CurrChild / 8) :=
 210            ClearList (CurrChild / 8)  or
 211            (Shift_Left (1, CurrChild  mod  8));
 212        CurrChild := Code_table (Parent).Child;
 213      end loop;
 214  
 215      if CurrChild /= UNUSED then    --  If there are any children left ...
 216        Prune (CurrChild);
 217        NextSibling := Code_table (CurrChild).Sibling;
 218        while NextSibling /= UNUSED loop
 219          if  Code_table (NextSibling).Child = UNUSED then
 220            Code_table (CurrChild).Sibling :=
 221              Code_table (NextSibling).Sibling;
 222            Code_table (NextSibling).Sibling := UNUSED;
 223            --  Turn on ClearList bit to indicate a cleared entry
 224  
 225            ClearList (NextSibling / 8) :=
 226              ClearList (NextSibling / 8)  or
 227              (Shift_Left (1, NextSibling  mod  8));
 228            NextSibling := Code_table (CurrChild).Sibling;
 229          else
 230            CurrChild := NextSibling;
 231            Prune (CurrChild);
 232            NextSibling := Code_table (CurrChild).Sibling;
 233          end if;
 234        end loop;
 235      end if;
 236    end Prune;
 237  
 238    ---------------------------------------------------------------------------
 239  
 240    procedure Clear_Table is
 241    begin
 242      ClearList := (others => 0);
 243      --  Remove all leaf nodes by recursively pruning subtrees
 244      for Node in  0 .. 255 loop
 245        Prune (Node);
 246      end loop;
 247      --  Next, re-initialize our list of free table entries
 248      Next_free := TABLESIZE + 1;
 249      for Node in reverse FIRSTENTRY .. TABLESIZE loop
 250        if (ClearList (Node / 8)  and  (Shift_Left (1, Node  mod  8))) /= 0 then
 251          Next_free := Next_free - 1;
 252          Free_list (Next_free) := Node;
 253        end if;
 254      end loop;
 255      --
 256      Table_full := Next_free > TABLESIZE;
 257    end Clear_Table;
 258  
 259    ---------------------------------------------------------------------------
 260  
 261    procedure Table_Add (Prefix_0 : Natural; Suffix : Natural) is
 262      FreeNode : Natural;
 263      Prefix : Natural := Prefix_0;
 264    begin
 265      if Next_free <= TABLESIZE then
 266        FreeNode := Free_list (Next_free);
 267        Next_free := Next_free + 1;
 268        Code_table (FreeNode).Child := UNUSED;
 269        Code_table (FreeNode).Sibling := UNUSED;
 270        Code_table (FreeNode).Suffix := Suffix;
 271        if Code_table (Prefix).Child = UNUSED then
 272          Code_table (Prefix).Child := FreeNode;
 273        else
 274          Prefix := Code_table (Prefix).Child;
 275          while Code_table (Prefix).Sibling /= UNUSED loop
 276            Prefix := Code_table (Prefix).Sibling;
 277          end loop;
 278          Code_table (Prefix).Sibling := FreeNode;
 279        end if;
 280      end if;
 281      --
 282      Table_full := Next_free > TABLESIZE;
 283    end Table_Add;
 284  
 285    ---------------------------------------------------------------------------
 286  
 287    ---------------------------------------------------------------------------
 288    --  Search for a Prefix:Suffix pair in our Symbol table. If found, return
 289    --  the index value where found.  If not found, return False and set
 290    --  Found_at to UNUSED.
 291    ---------------------------------------------------------------------------
 292    procedure Table_Lookup (
 293        TargetPrefix : Integer;
 294        TargetSuffix : Natural;
 295        Found_at     : out Integer;
 296        Found        : out Boolean
 297    )
 298    is
 299      --  Was in 16-bit ASM
 300      idx : Natural := TargetPrefix;
 301    begin
 302      --  Lookup an entry in the Hash Table. If found, return TRUE and set
 303      --  parameter Found_at with the index of the entry at which the match
 304      --  was found. If not found, return False and plug an UNUSED into Found_at.
 305      if Code_table (idx).Child = UNUSED then
 306        Found_at := UNUSED;
 307        Found := False;
 308      else
 309        idx := Code_table (idx).Child;
 310        loop
 311          if Code_table (idx).Suffix = TargetSuffix then
 312            Found_at := idx;
 313            Found := True;
 314            return;
 315          elsif Code_table (idx).Sibling = UNUSED then
 316            Found_at := UNUSED;
 317            Found := False;
 318            return;
 319          else
 320            idx := Code_table (idx).Sibling;
 321          end if;
 322        end loop;
 323      end if;
 324    end Table_Lookup;
 325  
 326    ---------------------------------------------------------------------------
 327    --  The actual Crunching algorithm
 328    ---------------------------------------------------------------------------
 329  
 330    Last_code : Integer := 0;
 331    First_atom : Boolean;  --  Flag indicating the START of a shrink operation
 332    Max_code : Natural;    --  Largest code that can be written in Code_size bits
 333  
 334    procedure Shrink_Atom (Suffix : Integer) is
 335      WhereFound : Integer;
 336      lookup_ok : Boolean;
 337    begin
 338      if First_atom then            --  If just getting started ...
 339        bit_buffer := 0;
 340        valid_bits := 0;
 341        code_size := Min_bits;    --    Initialize code size to minimum
 342        Max_code  := 2 ** code_size - 1;
 343        Last_code := Suffix;      --    get first character from input,
 344        First_atom  := False;       --    and reset the first char flag.
 345      elsif Suffix = UNUSED then  --  Nothing to crunch... must be EOF on input
 346        Put_code (Last_code);         --  Write last prefix code
 347        Flush_bit_buffer;
 348        Flush_output;                 --  Flush our output buffer
 349      elsif Table_full then
 350        Put_code (Last_code);
 351        --  NB: PKZip does not necessarily clear the table when
 352        --  it is full. Hence the need for the special code below.
 353        Put_code (Special_Code);
 354        Put_code (Code_for_clearing_table);
 355        Clear_Table;
 356        Table_Add (Last_code, Suffix);
 357        Last_code := Suffix;
 358      else
 359        Table_Lookup (Last_code, Suffix, WhereFound, lookup_ok);
 360        if lookup_ok then
 361          --  If Last_code:Suffix pair is found in the code table, then ...
 362          --  ... set Last_code to the entry where the pair is located
 363          Last_code := WhereFound;
 364        else
 365          --  Not in table
 366          Put_code (Last_code);           --  Write current Last_code code
 367          Table_Add (Last_code, Suffix);  --  Attempt to add to code table
 368          Last_code := Suffix;            --  Reset Last_code code for new char
 369          if (
 370               code_size < Max_bits and
 371               not Table_full
 372               --  12-Dec-2007: the Pascal code had an out-of-range access
 373               --    with Free_list(Next_free) below when the table was full!
 374               --    NB: according to tests, and surely it can be proven,
 375               --    the case (Code_size < Max_bits and Table_Full) never happens,
 376               --    so that
 377               --      "Code_size < Max_bits and then Free_list(Next_free) > Max_code"
 378               --    could be sufficient. But until it is proven, I prefer to
 379               --    keep the "and not Table_Full"
 380             )
 381             and then
 382             Free_list (Next_free) > Max_code
 383          then
 384            --  Time to increase the code size and change the max. code
 385            Put_code (Special_Code);
 386            Put_code (Code_for_increasing_code_size);
 387            code_size := code_size + 1;
 388            Max_code := 2 **  code_size - 1;
 389          end if;
 390        end if;
 391      end if;
 392    end Shrink_Atom;
 393  
 394    feedback_milestone,
 395    Bytes_in   : Zip_Streams.ZS_Size_Type := 0;   --  Count of input file bytes processed
 396  
 397    procedure Process_Input (Source : Byte_Buffer) is
 398      PctDone : Natural;
 399      user_aborting : Boolean;
 400      Last_processed : Integer := Source'First - 1;
 401      use Zip_Streams;
 402    begin
 403      if Source'Length < 1 then
 404        Shrink_Atom (UNUSED);
 405      else
 406        for I in Source'Range loop
 407          Bytes_in := Bytes_in + 1;
 408          if feedback /= null then
 409            if Bytes_in = 1 then
 410              feedback (0, False, user_aborting);
 411            end if;
 412            if feedback_milestone > 0 and then --  Bugfix GdM 23-Dec-2002
 413               ((Bytes_in - 1) mod feedback_milestone = 0
 414                or Bytes_in = ZS_Size_Type (input_size))
 415            then
 416              if input_size_known then
 417                PctDone := Integer ((100.0 * Float (Bytes_in)) / Float (input_size));
 418                feedback (PctDone, False, user_aborting);
 419              else
 420                feedback (0, False, user_aborting);
 421              end if;
 422              if user_aborting then
 423                raise User_abort;
 424              end if;
 425            end if;
 426          end if;
 427          Shrink_Atom (Integer (Source (I)));
 428          Last_processed := I;
 429          if input_size_known and then Bytes_in >= ZS_Size_Type (input_size) then
 430            --  The job is done, even though there are more in the buffer
 431            IO_buffers.InputEoF := True;
 432            exit;
 433          end if;
 434        end loop;
 435        Zip.CRC_Crypto.Update (CRC, Source (Source'First .. Last_processed));
 436      end if;
 437    end Process_Input;
 438  
 439    procedure Deallocation is
 440    begin
 441      Destroy_Data_Structures;
 442      Deallocate_Buffers (IO_buffers);
 443    end Deallocation;
 444  
 445    Remaining : Natural;
 446  
 447  begin
 448    Allocate_Buffers (IO_buffers, input_size_known, input_size);
 449    Build_Data_Structures;
 450    Initialize_Data_Structures;
 451    output_size := 0;
 452    --
 453    begin
 454      Read_Block (IO_buffers, input);                --  Prime the input buffer
 455      First_atom   := True;         --  1st character flag for Crunch procedure
 456      if input_size_known then
 457        feedback_milestone := Zip_Streams.ZS_Size_Type (input_size / feedback_steps);
 458      end if;
 459      while not IO_buffers.InputEoF loop
 460        Remaining := IO_buffers.MaxInBufIdx - IO_buffers.InBufIdx + 1;
 461        if Remaining = 0 then
 462          Read_Block (IO_buffers, input);
 463        else
 464          Process_Input (IO_buffers.InBuf (IO_buffers.InBufIdx .. IO_buffers.InBufIdx + Remaining - 1));
 465          IO_buffers.InBufIdx := IO_buffers.InBufIdx + Remaining;
 466        end if;
 467      end loop;
 468      Process_Input (IO_buffers.InBuf (1 .. 0));  --  This forces EOF processing
 469      compression_ok := Bytes_in > 0;
 470    exception
 471      when Compression_inefficient =>
 472        compression_ok := False;
 473    end;
 474    --
 475    Deallocation;
 476  exception
 477    when others =>
 478      Deallocation;
 479      raise;
 480  end Zip.Compress.Shrink;

Web view of Ada source code generated by GNATHTML, project: ALI_Parse version 1.0.
Zip-Ada: Ada library for zip archive files (.zip). Ada programming.
Some news about Zip-Ada and other Ada projects on Gautier's blog.