Back to... Zip-Ada

Source file : zip-compress.adb



   1  --  Legal licensing note:
   2  
   3  --  Copyright (c) 2007 .. 2024 Gautier de Montmollin
   4  --  SWITZERLAND
   5  
   6  --  Permission is hereby granted, free of charge, to any person obtaining a copy
   7  --  of this software and associated documentation files (the "Software"), to deal
   8  --  in the Software without restriction, including without limitation the rights
   9  --  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10  --  copies of the Software, and to permit persons to whom the Software is
  11  --  furnished to do so, subject to the following conditions:
  12  
  13  --  The above copyright notice and this permission notice shall be included in
  14  --  all copies or substantial portions of the Software.
  15  
  16  --  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  --  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  --  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19  --  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  --  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  --  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22  --  THE SOFTWARE.
  23  
  24  --  NB: this is the MIT License, as found on the site
  25  --  http://www.opensource.org/licenses/mit-license.php
  26  
  27  with Zip.Create,
  28       Zip.Compress.Shrink,
  29       Zip.Compress.Reduce,
  30       Zip.Compress.Deflate,
  31       Zip.Compress.BZip2_E,
  32       Zip.Compress.LZMA_E;
  33  
  34  with Ada.Characters.Handling,
  35       Ada.Numerics.Discrete_Random,
  36       Ada.Strings.Fixed,
  37       Ada.Unchecked_Deallocation;
  38  
  39  package body Zip.Compress is
  40  
  41    use Zip_Streams, Zip.CRC_Crypto;
  42  
  43    --  The following procedure's purpose is to detect size overflows
  44    --  for Zip data. Even when the input size is known, we can have
  45    --  the situation where data is random and the compressed output size
  46    --  overflows.
  47  
  48    procedure Increment
  49      (out_size : in out Zip_64_Data_Size_Type;
  50       by       : in     Natural)
  51    is
  52      temp_by : constant ZS_Size_Type := ZS_Size_Type (by);
  53      use type Zip_64_Data_Size_Type, ZS_Size_Type;
  54    begin
  55      if temp_by > Create.max_size then
  56        raise Create.Zip_Capacity_Exceeded with
  57          "Compressed data too large: size is 2 EiB (Exbibytes) or more.";
  58      end if;
  59      out_size := out_size + Zip_64_Data_Size_Type (by);
  60    end Increment;
  61  
  62    default_byte_IO_buffer_size : constant := 1024 * 1024;  --  1 MiB
  63  
  64    -------------------
  65    -- Compress_data --
  66    -------------------
  67  
  68    procedure Compress_Data
  69      (input,
  70       output           : in out Zip_Streams.Root_Zipstream_Type'Class;
  71       input_size_known : in     Boolean;
  72       input_size       : in     Zip_64_Data_Size_Type;  --  ignored if input_size_known = False
  73       method           : in     Compression_Method;
  74       feedback         : in     Feedback_Proc;
  75       password         : in     String;
  76       content_hint     : in     Data_Content_Type;
  77       CRC              :    out Interfaces.Unsigned_32;
  78       output_size      :    out Zip_64_Data_Size_Type;
  79       zip_type         :    out Interfaces.Unsigned_16)
  80    is
  81      use Interfaces;
  82      user_aborting : Boolean;
  83      idx_in :  constant ZS_Index_Type := Index (input);
  84      idx_out : constant ZS_Index_Type := Index (output);
  85      compression_ok : Boolean;
  86      first_feedback : Boolean := True;
  87      --
  88      is_encrypted : constant Boolean := password /= "";
  89      encrypt_pack, mem_encrypt_pack : Crypto_pack;
  90      encrypt_header : Byte_Buffer (1 .. 12);
  91      package Byte_soup is new Ada.Numerics.Discrete_Random (Byte);
  92      use Byte_soup;
  93      cg : Byte_soup.Generator;
  94      --
  95      --  Store data as is, or, if do_write = False, just compute CRC (this is for encryption).
  96      --
  97      procedure Store_data (do_write : Boolean) is
  98        Buffer      : Byte_Buffer (1 .. default_byte_IO_buffer_size);
  99        Last_Read   : Natural;
 100        counted     : Zip_64_Data_Size_Type := 0;
 101      begin
 102        zip_type := Compression_format_code.store_code;
 103        while not End_Of_Stream (input) loop
 104          if input_size_known and then counted >= input_size then
 105            exit;
 106          end if;
 107          --  Copy data
 108          Block_Read (input, Buffer, Last_Read);
 109          Increment (counted, Last_Read);
 110          Update (CRC, Buffer (1 .. Last_Read));
 111          if do_write then
 112            Encode (encrypt_pack, Buffer (1 .. Last_Read));
 113            Block_Write (output, Buffer (1 .. Last_Read));
 114          end if;
 115          --  Feedback
 116          if feedback /= null and then
 117            (first_feedback or (counted mod (2**16) = 0) or
 118            (input_size_known and then counted = input_size))
 119          then
 120            if input_size_known then
 121              feedback (
 122                percents_done =>
 123                  Natural ((100.0 * Float (counted)) / Float (input_size)),
 124                entry_skipped => False,
 125                user_abort    => user_aborting);
 126            else
 127              feedback (
 128                percents_done => 0,
 129                entry_skipped => False,
 130                user_abort    => user_aborting);
 131            end if;
 132            first_feedback := False;
 133            if user_aborting then
 134              raise User_abort;
 135            end if;
 136          end if;
 137        end loop;
 138        output_size := counted;
 139        compression_ok := True;
 140      end Store_data;
 141      --
 142      procedure Compress_data_single_method (actual_method : Single_Method) is
 143      begin
 144        Init (CRC);
 145        if is_encrypted then
 146          Init_Keys (encrypt_pack, password);
 147          Set_Mode (encrypt_pack, encrypted);
 148          --  A bit dumb from Zip spec: we need to know the final CRC in order to set up
 149          --  the last byte of the encryption header, that allows for detecting if a password
 150          --  is OK - this, with 255/256 probability of correct detection of a wrong password!
 151          --  Result: 1st scan of the whole input stream with CRC calculation:
 152          Store_data (do_write => False);
 153          Reset (cg);
 154          for i in 1 .. 11 loop
 155            encrypt_header (i) := Random (cg);
 156          end loop;
 157          encrypt_header (12) := Byte (Shift_Right (Final (CRC), 24));
 158          Set_Index (input, idx_in);
 159          Init (CRC);
 160          Encode (encrypt_pack, encrypt_header);
 161          Block_Write (output, encrypt_header);
 162          --
 163          --  We need to remember at this point the encryption keys in case we need
 164          --  to rewrite from here (compression failed, store data).
 165          --
 166          mem_encrypt_pack := encrypt_pack;
 167        else
 168          Set_Mode (encrypt_pack, clear);
 169        end if;
 170        --
 171        --  Dispatch the work to child procedures doing the stream compression
 172        --  in different formats, depending on the actual compression method.
 173        --  For example, for methods LZMA_for_JPEG, LZMA_for_WAV, or LZMA_3, we
 174        --  logically call Zip.Compress.LZMA_E for the job.
 175        --
 176        case actual_method is
 177  
 178          when Store =>
 179            Store_data (do_write => True);
 180  
 181          when Shrink =>
 182            Zip.Compress.Shrink
 183              (input, output, input_size_known, input_size, feedback,
 184               CRC, encrypt_pack, output_size, compression_ok);
 185            zip_type := Compression_format_code.shrink_code;
 186  
 187          when Reduction_Method =>
 188            Zip.Compress.Reduce
 189              (input, output, input_size_known, input_size, feedback,
 190               actual_method,
 191               CRC, encrypt_pack, output_size, compression_ok);
 192            zip_type := Compression_format_code.reduce_code +
 193              Unsigned_16
 194                (Compression_Method'Pos (actual_method) 
 195                 Compression_Method'Pos (Reduce_1));
 196  
 197          when Deflation_Method =>
 198            Zip.Compress.Deflate
 199              (input, output, input_size_known, input_size, feedback,
 200               actual_method,
 201               CRC, encrypt_pack, output_size, compression_ok);
 202            zip_type := Compression_format_code.deflate_code;
 203  
 204          when BZip2_Method =>
 205            Zip.Compress.BZip2_E
 206              (input, output, input_size_known, input_size, feedback,
 207               actual_method,
 208               CRC, encrypt_pack, output_size, compression_ok);
 209            zip_type := Compression_format_code.bzip2_code;
 210  
 211          when LZMA_Method =>
 212            Zip.Compress.LZMA_E
 213              (input, output, input_size_known, input_size, feedback,
 214               actual_method,
 215               CRC, encrypt_pack, output_size, compression_ok);
 216            zip_type := Compression_format_code.lzma_code;
 217        end case;
 218        CRC := Final (CRC);
 219        --
 220        --  Handle case where compression has been unefficient:
 221        --  data to be compressed is too "random"; then compressed data
 222        --  happen to be larger than uncompressed data
 223        --
 224        if not compression_ok then
 225          --  Go back to the beginning and just store the data
 226          Set_Index (input, idx_in);
 227          if is_encrypted then
 228            Set_Index (output, idx_out + 12);
 229            --  Restore the encryption keys to their state just after the encryption header:
 230            encrypt_pack := mem_encrypt_pack;
 231          else
 232            Set_Index (output, idx_out);
 233          end if;
 234          Init (CRC);
 235          Store_data (do_write => True);
 236          CRC := Final (CRC);
 237        end if;
 238        if is_encrypted then
 239          output_size := output_size + 12;
 240        end if;
 241      end Compress_data_single_method;
 242  
 243      fast_presel_threshold : constant := 10_000;
 244      bzip2_threshold       : constant := 15_000;
 245  
 246      fast_presel : constant Boolean :=
 247        method = Preselection_1 or (input_size_known and then input_size < fast_presel_threshold);
 248  
 249      data_type_to_LZMA_method : constant array (Data_Content_Type) of LZMA_Method :=
 250        (JPEG    => LZMA_for_JPEG,
 251         ARW_RW2 => LZMA_for_ARW,
 252         ORF_CR2 => LZMA_for_ORF,
 253         MP3     => LZMA_for_MP3,
 254         MP4     => LZMA_for_MP4,
 255         PGM     => LZMA_for_PGM,
 256         PPM     => LZMA_for_PPM,
 257         PNG     => LZMA_for_PNG,
 258         WAV     => LZMA_for_WAV,
 259         AU      => LZMA_for_AU,
 260         others  => LZMA_1);  --  Fake, should be unused as such.
 261  
 262    begin
 263      case method is
 264        --
 265        when Single_Method =>
 266          Compress_data_single_method (method);
 267        --
 268        when Preselection_Method =>
 269          case content_hint is
 270            when neutral | text_data =>
 271              if input_size_known and then input_size < 9_000 then
 272                Compress_data_single_method (Deflate_3);  --  Deflate
 273              elsif fast_presel then
 274                --  See: Optimum, LZ77 sheet in za_work.xls
 275                --       or l2_vs_l3.xls with a larger data set.
 276                Compress_data_single_method (LZMA_2);                 --  LZMA with IZ_10 match finder
 277              else
 278                Compress_data_single_method (LZMA_3);                 --  LZMA with BT4 match finder
 279              end if;
 280  
 281            when ARW_RW2 | ORF_CR2 | MP3 | MP4 | JPEG | PGM | PPM | PNG | WAV | AU =>
 282              if input_size_known and then input_size < 2_250 then
 283                Compress_data_single_method (Deflate_3);  --  Deflate
 284              else
 285                Compress_data_single_method (data_type_to_LZMA_method (content_hint));
 286              end if;
 287  
 288            when GIF =>
 289              if input_size_known and then input_size < 350 then
 290                Compress_data_single_method (Deflate_1);
 291              else
 292                Compress_data_single_method (LZMA_for_GIF);
 293              end if;
 294  
 295            when Zip_in_Zip =>
 296              if input_size_known and then input_size < 1_000 then
 297                Compress_data_single_method (Deflate_3);  --  Deflate
 298              elsif fast_presel then
 299                Compress_data_single_method (LZMA_2_for_Zip_in_Zip);
 300              else
 301                Compress_data_single_method (LZMA_3_for_Zip_in_Zip);
 302              end if;
 303  
 304            when source_code =>
 305              if input_size_known and then input_size < 8_000 then
 306                Compress_data_single_method (Deflate_3);  --  Deflate
 307              elsif fast_presel then
 308                Compress_data_single_method (LZMA_2_for_Source);
 309              elsif input_size_known and then input_size < bzip2_threshold then
 310                Compress_data_single_method (LZMA_3_for_Source);
 311              else
 312                Compress_data_single_method (BZip2_Method'Last);
 313              end if;
 314  
 315            when text_formatted_text_or_dna =>
 316              if input_size_known and then input_size < 9_000 then
 317                Compress_data_single_method (Deflate_3);
 318              elsif fast_presel then
 319                Compress_data_single_method (LZMA_2);
 320              elsif input_size_known and then input_size < bzip2_threshold then
 321                Compress_data_single_method (LZMA_3);
 322              else
 323                Compress_data_single_method (BZip2_Method'Last);
 324              end if;
 325  
 326          end case;
 327      end case;
 328    end Compress_Data;
 329  
 330    function Guess_Type_from_Name (name : String) return Data_Content_Type is
 331      use Ada.Characters.Handling, Ada.Strings, Ada.Strings.Fixed;
 332      up : constant String := To_Upper (name);
 333      dot : constant Natural := Index (up, ".", Backward);
 334    begin
 335      if dot = 0 then
 336        return neutral;
 337      end if;
 338      declare
 339        ext : constant String := up (dot + 1 .. up'Last);
 340      begin
 341        if ext in "JPG" | "JPEG" then
 342          return JPEG;
 343        end if;
 344        if ext in
 345          "A"    | "ADA"  | "ADS" | "ADB" |     --  Ada
 346          "PRC"  | "PKG"  | "HAC" | "GPR" |
 347          "F"    | "FOR"  |                     --  Fortran
 348          "C"    | "H"    | "CPP" | "HPP" |     --  C/C++
 349          "DEF"  | "ASM"  |                     --  Assembler
 350          "JAVA" | "CS"   |
 351          "PAS"  | "INC"  | "LPR" | "PP" |      --  Pascal
 352          "M"    |                              --  Matlab
 353          "M4"   | "MAK"  | "IN"  |             --  Macro assembler
 354          "SH"   | "BAT"  | "CMD" |             --  Operating System Script
 355          "PO"   |                              --  GNU PO
 356          "XML"  | "XSL"  |
 357          "SGML" |
 358          "AUP"  |                              --  Audacity project (XML)
 359          "HTM"  | "HTML" |
 360          "JS"   | "LSP"  | "SCM" |
 361          "SQL"  | "PDB"  | "PL"
 362        then
 363          return source_code;
 364        end if;
 365        if ext in "CFG" | "INI" | "LOG" | "CSV" | "SVG" | "JSON" then
 366          return text_data;
 367        end if;
 368        if ext in "TXT" | "RTF" | "HTM" | "HTML" | "GB" | "FASTA" then
 369          return text_formatted_text_or_dna;
 370        end if;
 371        --  Zip archives happen to be zipped...
 372        if ext in
 373          "EPUB" |  --  EPUB: e-book reader format
 374          "ZIP"  |
 375          "JAR"  |
 376          "ODB"  | "ODS"  | "ODT" | "OTR" | "OTS" | "OTT" |
 377          "CRX"  | "NTH"  |
 378          "DOCX" | "PPTX" | "XLSX" | "XLSB" | "XLSM"
 379        then
 380          return Zip_in_Zip;
 381        end if;
 382        --  Some raw camera picture data
 383        if ext in "ORF" |  --  Raw Olympus
 384                  "CR2" |  --  Raw Canon
 385                  "RAF" |  --  Raw Fujifilm
 386                  "SRW"    --  Raw Samsung
 387        then
 388          return ORF_CR2;
 389        end if;
 390        if ext in "ARW" |  --  Raw Sony
 391                  "RW2" |  --  Raw Panasonic
 392                  "NEF" |  --  Raw Nikon
 393                  "DNG" |  --  Raw Leica, Pentax
 394                  "X3F"    --  Raw Sigma
 395        then
 396          return ARW_RW2;
 397        end if;
 398        if ext = "PGM" then
 399          return PGM;
 400        end if;
 401        if ext = "PPM" then
 402          return PPM;
 403        end if;
 404        if ext = "MP3" then
 405          return MP3;
 406        end if;
 407        if ext in "MTS" | "MP4" | "M4A" | "M4P" then
 408          return MP4;
 409        end if;
 410        if ext = "PNG" then
 411          return PNG;
 412        end if;
 413        if ext = "GIF" then
 414          return GIF;
 415        end if;
 416        if ext in "WAV" | "UAX" then
 417          return WAV;
 418        end if;
 419        if ext = "AU" then  --  Audacity raw data
 420          return AU;
 421        end if;
 422      end;
 423      return neutral;
 424    end Guess_Type_from_Name;
 425  
 426    -----------------------------------
 427    --  I/O buffers for compression  --
 428    -----------------------------------
 429  
 430    procedure Allocate_Buffers
 431      (b                : in out IO_Buffers_Type;
 432       input_size_known :        Boolean;
 433       input_size       :        Zip_64_Data_Size_Type)
 434    is
 435      calibration : Zip_64_Data_Size_Type := default_byte_IO_buffer_size;
 436    begin
 437      if input_size_known then
 438        calibration :=
 439          Zip_64_Data_Size_Type'Min
 440            (default_byte_IO_buffer_size,
 441             Zip_64_Data_Size_Type'Max (8, input_size));
 442      end if;
 443      b.InBuf  := new Byte_Buffer (1 .. Integer (calibration));
 444      b.OutBuf := new Byte_Buffer (1 .. default_byte_IO_buffer_size);
 445    end Allocate_Buffers;
 446  
 447    procedure Deallocate_Buffers (b : in out IO_Buffers_Type) is
 448      procedure Dispose_Buffer is
 449        new Ada.Unchecked_Deallocation (Byte_Buffer, p_Byte_Buffer);
 450    begin
 451      Dispose_Buffer (b.InBuf);
 452      Dispose_Buffer (b.OutBuf);
 453    end Deallocate_Buffers;
 454  
 455    procedure Read_Block
 456      (b     : in out IO_Buffers_Type;
 457       input : in out Zip_Streams.Root_Zipstream_Type'Class)
 458    is
 459    begin
 460      Zip.Block_Read
 461        (stream        => input,
 462         buffer        => b.InBuf.all,
 463         actually_read => b.MaxInBufIdx);
 464      b.InputEoF := b.MaxInBufIdx = 0;
 465      b.InBufIdx := 1;
 466    end Read_Block;
 467  
 468    procedure Write_Block
 469      (b                : in out IO_Buffers_Type;
 470       input_size_known :        Boolean;
 471       input_size       :        Zip_64_Data_Size_Type;
 472       output           : in out Zip_Streams.Root_Zipstream_Type'Class;
 473       output_size      : in out Zip_64_Data_Size_Type;
 474       crypto           : in out Zip.CRC_Crypto.Crypto_pack)
 475    is
 476      amount : constant Integer := b.OutBufIdx - 1;
 477      use type Zip_64_Data_Size_Type;
 478    begin
 479      Increment (output_size, Integer'Max (0, amount));
 480      if input_size_known and then output_size >= input_size then
 481        --  The compression so far is obviously inefficient for that file.
 482        --  Useless to go further.
 483        --  Stop immediately before growing the file more than the
 484        --  uncompressed size.
 485        raise Compression_inefficient;
 486      end if;
 487      Encode (crypto, b.OutBuf (1 .. amount));
 488      Zip.Block_Write (output, b.OutBuf (1 .. amount));
 489      b.OutBufIdx := 1;
 490    end Write_Block;
 491  
 492  end Zip.Compress;

Web view of Ada source code generated by GNATHTML, project: ALI_Parse version 1.0.
Zip-Ada: Ada library for zip archive files (.zip). Ada programming.
Some news about Zip-Ada and other Ada projects on Gautier's blog.