Back to... Zip-Ada

Source file : rezip_lib.adb


------------------------------------------------------------------------------
--  File:            Rezip_Prc.adb
--  Description:     Recompression tool to make archives smaller.
--                   Core moved from Rezip (main)
--  Author:          Gautier de Montmollin
------------------------------------------------------------------------------
--
-- To do:
--  * In order to facilitate customization, ReZip could have a config file (
--    http://sf.net/projects/ini-files/ ) to store external packer program
--    names. See ZipMax as an example...
--
-- External programs used (feel free to customize/add/remove):
--   7-Zip, KZip, Zip (info-zip), AdvZip, DeflOpt
--   Web URL's: see Zipper_specification below or run ReZip without arguments.

with Zip.Headers, Zip.Compress, UnZip;
with Zip.Create;                        use Zip.Create;
with Zip_Streams;                       use Zip_Streams;

with My_feedback, Flexible_temp_files;

with Ada.Calendar;                      use Ada.Calendar;
with Ada.Directories;                   use Ada.Directories;
with Ada.Text_IO;                       use Ada.Text_IO;
with Dual_IO;                           use Dual_IO;
-- NB about 'use': no worry, Ada detects all conflicts
-- between Dual_IO and Text_IO
with Ada.Integer_Text_IO;               use Ada.Integer_Text_IO;
with Ada.Float_Text_IO;                 use Ada.Float_Text_IO;
with Ada.Streams.Stream_IO;             use Ada.Streams.Stream_IO;
with Ada.Strings.Fixed;                 use Ada.Strings.Fixed, Ada.Strings;
with Ada.Strings.Unbounded;             use Ada.Strings.Unbounded;
with Ada.Characters.Handling;           use Ada.Characters.Handling;
with Ada.Unchecked_Deallocation;
with Ada.Numerics.Discrete_Random;

with Interfaces;                        use Interfaces;

with GNAT.OS_Lib;

package body Rezip_lib is

  function S (Source : Ada.Strings.Unbounded.Unbounded_String) return String
    renames Ada.Strings.Unbounded.To_String;
  function U (Source : String) return Ada.Strings.Unbounded.Unbounded_String
    renames Ada.Strings.Unbounded.To_Unbounded_String;

  -- This might be better read from a config file...
  --
  type Zipper_specification is record
    name, title, URL, options: Unbounded_String;
    expanded_options    : Unbounded_String;
    -- options with dynamically expanded tokens
    made_by_version     : Unsigned_16;
    pkzm                : Zip.PKZip_method;
    limit               : Zip.File_size_type;
    -- Compression is considered too slow or unefficient beyond limit
    -- E.g., kzip's algorithm might be O(N^2) or worse; on large files,
    --   deflate_e or other methods are better anyway
    randomized          : Boolean;
  end record;

  NN: constant Unbounded_String:= Null_Unbounded_String;

  --  Give up recompression above a certain data size for some external packers like KZip
  --  or Zopfli.
  --
  kzip_zopfli_limit: constant:= 2_000_000;

  type Approach is (
    original,
    shrink,
    reduce_4,
    deflate_3,
    lzma_2, lzma_3,
    presel_1, presel_2,
    external_1, external_2, external_3, external_4,
    external_5, external_6, external_7, external_8,
    external_9, external_10, external_11, external_12,
    external_13
  );

  subtype Internal is Approach
    range Approach'Succ(Approach'First) .. Approach'Pred(external_1);
  subtype External is Approach
    range external_1 .. Approach'Last;

  ext: array(External) of Zipper_specification:=
    ( -- Zip 2.32 or later:
      (U("zip"), U("Zip"), U("http://info-zip.org/"),
         U("-9"), NN, 20, Zip.deflate, 0, False),
      -- 7-Zip 4.64 or later; Deflate:
      (U("7z"),
         U("7-Zip"), U("http://7-zip.org/"),
         U("a -tzip -mm=deflate -mfb=258 -mpass=#RAND#(7,15) -mmc=10000"),
         NN, 20, Zip.deflate, 0, True),
      (U("7z"),
         U("7-Zip"), NN,
         U("a -tzip -mm=deflate64 -mfb=257 -mpass=15 -mmc=10000"),
         NN, 21, Zip.deflate_e, 0, False),
      -- KZip:
      (U("kzip"),U("KZIP"),U("http://www.advsys.net/ken/utils.htm"),
         U("/rn /b0"), NN, 20, Zip.deflate, kzip_zopfli_limit, True),
      (U("kzip"),U("KZIP"),NN,
         U("/rn /b#RAND#(0,128)"), NN, 20, Zip.deflate, kzip_zopfli_limit, True),
      (U("kzip"),U("KZIP"),NN,
         U("/rn /b#RAND#(128,2048)"), NN, 20, Zip.deflate, kzip_zopfli_limit, True),
      -- Zip 3.0 or later; BZip2:
      (U("zip"), U("Zip"), U("http://info-zip.org/"),
         U("-#RAND#(1,9) -Z bzip2"), NN, 46, Zip.bzip2, 0, True),
      -- 7-Zip 9.20 or later; LZMA:
      (U("7z"), U("7-Zip"), NN,
         U("a -tzip -mm=LZMA -mx=9"), NN, 63, Zip.lzma_meth, 0, False),
      (U("7z"), U("7-Zip"), NN, -- dictionary size 2**19 = 512 KB
         U("a -tzip -mm=LZMA:a=2:d=19:mf=bt3:fb=128:lc=0:lp=2"), NN, 63, Zip.lzma_meth, 0, False),
      (U("7z"), U("7-Zip"), NN,
         U("a -tzip -mm=LZMA:a=2:d=#RAND#(3200,3700)k:mf=bt4:fb=#RAND#(255,273):lc=2:lp0:pb0"),
         NN, 63, Zip.lzma_meth, 0, True),
      (U("7z"), U("7-Zip"), NN, -- dictionary size 2**25 = 32 MB
         U("a -tzip -mm=LZMA:a=2:d=25:mf=bt3:fb=255:lc=7"), NN, 63, Zip.lzma_meth, 0, False),
      (U("7z"), U("7-Zip"), NN, -- dictionary size 2**26 = 64 MB
         U("a -tzip -mm=LZMA:a=2:d=26:mf=bt3:fb=222:lc=8:lp0:pb1"), NN, 63, Zip.lzma_meth, 0, False),
      -- AdvZip: advancecomp v1.19+ interesting for the Zopfli algorithm
      (U("advzip"), U("AdvZip"), U("http://advancemame.sf.net/comp-readme.html"),
         U("-a -4"), NN, 20, Zip.deflate, kzip_zopfli_limit, False)
    );

  defl_opt: constant Zipper_specification:=
    (U("deflopt"), U("DeflOpt"), U("http://www.walbeehm.com/download/"),
     NN, NN, 0, Zip.deflate, 0, False);

  procedure Rezip(
    from_zip_file      : String;
    to_zip_file        : String;
    format_choice      : Zip_format_set := all_formats;
    touch              : Boolean        := False;      --  set time stamps to now
    lower              : Boolean        := False;      --  set full file names to lower case
    delete_comment     : Boolean        := False;      --  delete zip comment
    randomized_stable  : Positive       := 1;
    log_file           : String         := "";
    html_report        : String         := "")
  is

    package DFIO is new Dual_IO.Float_IO(Float);

    procedure Rip_data(
      archive      : Zip.Zip_info; -- from this archive...
      input        : in out Root_Zipstream_Type'Class;
      data_name    : String;       -- extract this data
      rip_rename   : String;       -- to this file (compressed)
      unzip_rename : String;       -- and this one (uncompressed)
      header       : out Zip.Headers.Local_File_Header
    )
    is
      file_index     : Zip_Streams.ZS_Index_Type;
      comp_size      : Zip.File_size_type;
      uncomp_size    : Zip.File_size_type;
      file_out       : Ada.Streams.Stream_IO.File_Type;
      dummy_encoding : Zip.Zip_name_encoding;
      dummy_crc      : Unsigned_32;
      use UnZip;
    begin
      Zip.Find_offset(
        info           => archive,
        name           => data_name,
        name_encoding  => dummy_encoding,
        file_index     => file_index,
        comp_size      => comp_size,
        uncomp_size    => uncomp_size,
        crc_32         => dummy_crc
      );
      Set_Index(input, file_index);
      Zip.Headers.Read_and_check(input, header);
      -- Skip name and extra field
      Set_Index(input,
        Index(input) +
          Zip_Streams.ZS_Size_Type
           (header.extra_field_length +
            header.filename_length)
      );
      -- * Get the data, compressed
      Ada.Streams.Stream_IO.Create(file_out, Out_File, rip_rename);
      Zip.Copy_chunk(input, Stream(file_out).all, Integer(comp_size));
      Close(file_out);
      if unzip_rename /= "" then
        -- * Get the data, uncompressed
        Extract(
          from    => archive,
          what    => data_name,
          rename  => unzip_rename,
          options =>
          (   test_only => False,
              junk_directories => False,
              case_sensitive_match => True,
              extract_as_text => False
          )
        );
      end if;
    end Rip_data;

    Approach_to_Method: constant array(Internal) of Zip.Compress.Compression_Method:=
      (shrink    => Zip.Compress.Shrink,
       reduce_4  => Zip.Compress.Reduce_4,
       deflate_3 => Zip.Compress.Deflate_3,
       lzma_2    => Zip.Compress.LZMA_2,
       lzma_3    => Zip.Compress.LZMA_3,
       presel_1  => Zip.Compress.Preselection_1,
       presel_2  => Zip.Compress.Preselection_2
      );

    type Packer_info is record
      size            : Zip.File_size_type;
      zfm             : Unsigned_16;
      count           : Natural;
      saved           : Integer_64;
      -- can be negative if -defl chosen: suboptimal recompression,
      -- but compatible method
      uncomp_size     : Unsigned_64;
      -- summed uncompressed sizes might be more than 2**32
      expanded_options: Unbounded_String;
      iter            : Positive; -- iterations needed
      LZMA_EOS        : Boolean;
    end record;

    type Packer_info_array is array(Approach) of Packer_info;

    type Dir_entry;
    type p_Dir_entry is access Dir_entry;
    --
    type Dir_entry is record
      head: Zip.Headers.Central_File_Header;
      name: Unbounded_String;
      next: p_Dir_entry:= null;
      chosen_approach: Approach:= original;
      info: Packer_info_array;
    end record;

    function Temp_name(
      compressed: Boolean;
      appr      : Approach
    )
      return String
    is
      initial: constant array(Boolean) of Character:= ('u','c');
    begin
      return
        Flexible_temp_files.Radix &
        "_!" & initial(compressed) &
        '!' & Trim(Integer'Image(Approach'Pos(appr)), Left) &
        "!_.tmp";
    end Temp_name;

    function Img(a: Approach; html: Boolean) return String is
      function Repl(s: String) return String is
        t: String:= s;
      begin
        for i in t'Range loop
          if html and t(i) = ':' then t(i):= ' '; end if; -- break too long lines
        end loop;
        return t;
      end Repl;
    begin
      if a in External then
        return "External: " & S(ext(a).title) & ", " & Repl(S(ext(a).expanded_options));
      else
        declare
          s: constant String:= Approach'Image(a);
        begin
          return s(s'First) & To_Lower(s(s'First+1..s'Last) & (Approach'Width-s'Length+1) * ' ');
        end;
      end if;
    end Img;

    --  From AZip_Common...
    function Image_1000(r: Zip.File_size_type; separator: Character:= ''') return String is
      s: constant String:= Zip.File_size_type'Image(r);
      t: String(s'First..s'First+(s'Length*4)/3);
      j, c: Natural;
    begin
      --  For signed integers
      --  if r < 0 then
      --    return '-' & Image_1000(abs r, separator);
      --  end if;
      --
      --  We build result string t from right to left
      j:= t'Last + 1;
      c:= 0;
      for i in reverse s'First..s'Last loop
        exit when s(i) = ' ' or s(i) = '-';
        if c > 0 and then c mod 3 = 0 then
          j:= j - 1;
          t(j):= separator;
        end if;
        j:= j - 1;
        t(j):= s(i);
        c:= c + 1;
      end loop;
      return t(j..t'Last);
    end Image_1000;

    function Image_1000(r: Integer_64; separator: Character:= ''') return String is
      s: constant String:= Integer_64'Image(r);
      t: String(s'First..s'First+(s'Length*4)/3);
      j, c: Natural;
    begin
      --  For signed integers
      if r < 0 then
        return '-' & Image_1000(abs r, separator);
      end if;
      --  We build result string t from right to left
      j:= t'Last + 1;
      c:= 0;
      for i in reverse s'First..s'Last loop
        exit when s(i) = ' ' or s(i) = '-';
        if c > 0 and then c mod 3 = 0 then
          j:= j - 1;
          t(j):= separator;
        end if;
        j:= j - 1;
        t(j):= s(i);
        c:= c + 1;
      end loop;
      return t(j..t'Last);
    end Image_1000;

    procedure Call_external(
      packer:        String;
      args  :        String
    )
    is
      use GNAT.OS_Lib;
      procedure Dispose is
        new Ada.Unchecked_Deallocation(Argument_List, Argument_List_Access);
      list: Argument_List_Access;
      ok: Boolean;
    begin
      Dual_IO.Put_Line(packer & " [" & args & ']');
      list:= Argument_String_To_List(args);
      GNAT.OS_Lib.Spawn(packer, list.all, ok);
      Dispose(list);
      if not ok then
        Dual_IO.Put_Line(
          "Warning: cannot call " & packer &
          ". Is it callable through the ""path"" ?"
        );
      end if;
    end Call_external;

    seed_iterator: Natural;

    procedure Call_external_expanded(
      packer    :        String;
      options   :        String;
      other_args:        String;
      expand    : in out Unbounded_String -- expanded arguments
    )
    is
      type Token is (rand);
    begin
      expand:= U(options);
      for t in Token loop
        loop
          declare
            tok: constant String:= '#' & Token'Image(t) & '#';
            idx: constant Natural:= Index(expand, tok);
            par: constant Natural:= Index(expand, ")");
            replace: Unbounded_String;
          begin
            exit when idx = 0;  --  No more of token t to replace
            declare
              opt: constant String:= S(expand); -- partially processed option string
              curr: constant String:= opt(idx+1..opt'Last); -- current option
              par_a: constant Natural:= Index(curr, "(");
              par_z: constant Natural:= Index(curr, ")");
              comma: constant Natural:= Index(curr, ",");
              n1, n2, n: Integer;
            begin
              case t is
                when rand =>
                  n1:= Integer'Value(curr(par_a+1..comma-1));
                  n2:= Integer'Value(curr(comma+1..par_z-1));
                  declare
                    subtype rng is Integer range n1..n2;
                    package Rnd is new Ada.Numerics.Discrete_Random(rng);
                    gen: Rnd.Generator;
                  begin
                    Rnd.Reset(gen, seed_iterator);
                    -- On GNAT the clock-based Reset is too coarse
                    -- (gives many times the same seed when called with small
                    -- time intervals).
                    seed_iterator:= seed_iterator + 1;
                    n:= Rnd.Random(gen);
                  end;
                  replace:= U(Trim(Integer'Image(n),Left));
              end case;
              Replace_Slice(expand, idx, par, S(replace));
            end;
          end;
        end loop;
      end loop;
      Call_external(packer, S(expand) & ' ' & other_args);
    end Call_external_expanded;

    procedure Process_External(
      packer  : String;
      options : String;
      out_name: String;
      is_rand : Boolean;
      info    : out Packer_info
    )
    is
      temp_zip: constant String:= Simple_Name(Flexible_temp_files.Radix) & "_$temp$.zip";
      rand_win: constant String:= Simple_Name(Flexible_temp_files.Radix) & "_$rand$.tmp";
      options_winner: Unbounded_String;
      data_name: constant String:= Simple_Name(Temp_name(False,original));
      zi_ext: Zip.Zip_info;
      header: Zip.Headers.Local_File_Header;
      MyStream   : aliased File_Zipstream;
      cur_dir: constant String:= Current_Directory;
      size_memory: array(1..randomized_stable) of Zip.File_size_type:= (others => 0);
      size: Zip.File_size_type:= 0;
      zfm: Unsigned_16;
      attempt: Positive:= 1;
      dummy_exp_opt: Unbounded_String;
    begin
      -- We jump into the TEMP directory, to avoid putting pathes into the
      -- temporary zip file.
      Set_Directory(Containing_Directory(Flexible_temp_files.Radix));
      loop
        if Exists(temp_zip) then -- remove (eventually broken) zip
          Delete_File(temp_zip);
        end if;
        Call_external_expanded(
          packer,
          options,
          temp_zip & ' ' & data_name,
          info.expanded_options
        );
        if (not Exists(temp_zip)) and then Ada.Directories.Size(data_name) = 0 then
          -- ADVZip 1.19 doesn't create a zip file for an empty entry...
          Call_external_expanded("zip", "", temp_zip & ' ' & data_name, dummy_exp_opt);
        end if;
        -- Post processing of "deflated" entries with DeflOpt:
        Call_external(S(defl_opt.name), temp_zip);
        -- Now, rip
        Set_Name (MyStream, temp_zip);
        Open (MyStream, In_File);
        Zip.Load( zi_ext, MyStream, True );
        Rip_data(
          archive      => zi_ext,
          input        => MyStream,
          data_name    => data_name,
          rip_rename   => out_name,
          unzip_rename => "",
          header       => header
        );
        Close(MyStream);
        Delete_File(temp_zip);
        Zip.Delete(zi_ext);
        --
        if randomized_stable = 1 or not is_rand then -- normal behaviour (1 attempts)
          size:= header.dd.compressed_size;
          zfm := header.zip_type;
          info.iter:= 1;
          exit;
        end if;
        --
        -- Here, we process the cases where compressed sizes need
        -- to be reduced and we expect a stable size over n=randomized_stable
        -- attempts.
        --
        if attempt = 1 or else
          header.dd.compressed_size < size -- better size
        then
          size:= header.dd.compressed_size;
          zfm := header.zip_type;
          if Exists(rand_win) then
            Delete_File(rand_win);
          end if;
          Rename(out_name, rand_win);
          options_winner:= info.expanded_options;
        end if;
        --
        -- Manage the array of last n=randomized_stable sizes
        --
        if attempt > size_memory'Last then
          for i in size_memory'First+1..size_memory'Last loop
            size_memory(i-1):= size_memory(i);
          end loop;
          size_memory(size_memory'Last):= size;
        else
          size_memory(attempt):= size;
        end if;
        --
        -- Check stability over n=randomized_stable attempts
        --
        if attempt >= randomized_stable then
          if size_memory(randomized_stable) = size_memory(1) then
            if Exists(out_name) then
              Delete_File(out_name);
            end if;
            Rename(rand_win, out_name);
            info.expanded_options:= options_winner;
            info.iter:= attempt;
            exit;
          end if;
        end if;
        attempt:= attempt + 1;
      end loop;
      info.size       := size;
      info.uncomp_size:= Unsigned_64(header.dd.uncompressed_size);
      -- uncomp_size should not matter (always the same).
      info.zfm        := zfm;
      info.LZMA_EOS   := (zfm = 14) and (header.bit_flag and Zip.Headers.LZMA_EOS_Flag_Bit) /= 0;
      -- We jump back to the startup directory.
      Set_Directory(cur_dir);
    end Process_External;

    --  Compress data as raw compressed data
    procedure Process_Internal_Raw(a: Approach; e: in out Dir_entry) is
      File_in       : aliased File_Zipstream;
      File_out      : aliased File_Zipstream;
    begin
      Set_Name (File_in, Temp_name(False,original));
      Open (File_in, In_File);
      Set_Name (File_out, Temp_name(True,a));
      Create (File_out, Out_File);
      Zip.Compress.Compress_data
      (
        input            => File_in,
        output           => File_out,
        input_size_known => True,
        input_size       => e.head.short_info.dd.uncompressed_size,
        method           => Approach_to_Method(a),
        feedback         => My_feedback'Access,
        password         => "",
        content_hint     => Zip.Compress.Guess_type_from_name(S(e.name)),
        CRC              => e.head.short_info.dd.crc_32,
        -- we take the occasion to compute the CRC if not
        -- yet available (e.g. JAR)
        output_size      => e.info(a).size,
        zip_type         => e.info(a).zfm
      );
      e.info(a).LZMA_EOS := e.info(a).zfm = 14;
      Close(File_in);
      Close(File_out);
    end Process_Internal_Raw;

    --  Compress data as a Zip archive (like external methods), then call post-processing
    procedure Process_Internal_as_Zip(a: Approach; e: in out Dir_entry) is
      zip_file : aliased File_Zipstream;
      archive : Zip_Create_info;
      temp_zip: constant String:= Simple_Name(Flexible_temp_files.Radix) & "_$temp$.zip";
      data_name: constant String:= Simple_Name(Temp_name(False,original));
      zi_ext: Zip.Zip_info;
      header: Zip.Headers.Local_File_Header;
      MyStream   : aliased File_Zipstream;
      cur_dir: constant String:= Current_Directory;
    begin
      Set_Directory(Containing_Directory(Flexible_temp_files.Radix));
      Create (archive, zip_file'Unchecked_Access, temp_zip);
      Set(archive, Approach_to_Method(a));
      Add_File(archive, data_name);
      Finish (archive);
      -- Post processing of "deflated" entries with DeflOpt:
      Call_external(S(defl_opt.name), temp_zip);
      -- Now, rip
      Set_Name (MyStream, temp_zip);
      Open (MyStream, In_File);
      Zip.Load( zi_ext, MyStream, True );
      Rip_data(
        archive      => zi_ext,
        input        => MyStream,
        data_name    => data_name,
        rip_rename   => Temp_name(True,a),
        unzip_rename => "",
        header       => header
      );
      e.info(a).size:= header.dd.compressed_size;
      e.info(a).zfm := header.zip_type;
      e.info(a).LZMA_EOS :=
        (header.zip_type = 14) and (header.bit_flag and Zip.Headers.LZMA_EOS_Flag_Bit) /= 0;
      Close(MyStream);
      Delete_File(temp_zip);
      Zip.Delete(zi_ext);
      Set_Directory(cur_dir);
    end Process_Internal_as_Zip;

    time_0      : constant Ada.Calendar.Time:= Clock;

    procedure Repack_contents(orig_name, repacked_name, html_report_name: String)
    is
      use type Zip.PKZip_method;
      zi: Zip.Zip_info;
      MyStream   : aliased File_Zipstream;

      list, e, curr: p_Dir_entry:= null;
      repacked_zip_file   : aliased File_Zipstream;
      null_packer_info: constant Packer_info := (0,0,0,0,0,NN,1,False);
      total: Packer_info_array:= (others => null_packer_info);
      -- total(a).count counts the files where approach 'a' was optimal
      -- total(a).saved counts the saved bytes when approach 'a' was optimal
      total_choice: Packer_info:= null_packer_info;
      summary: Ada.Text_IO.File_Type;
      T0, T1 : Ada.Calendar.Time;
      seconds: Duration;
      --
      type Approach_Filtering is array(Approach) of Boolean;
      consider_a_priori: Approach_Filtering;
      --
      lightred: constant String:= "#f43048";

      procedure Process_one(unique_name: String) is
        comp_size  :  Zip.File_size_type;
        uncomp_size:  Zip.File_size_type;
        choice: Approach:= original;
        deco: constant String:= "-->-->-->" & (20+unique_name'Length) * '-';
        mth: Zip.PKZip_method;
        consider: Approach_Filtering;
        gain: Integer_64;
        --
        procedure Winner_color is
        begin
          if e.info(choice).size < e.info(original).size then
            Put(summary,"<td bgcolor=lightgreen><b>");
            -- We were able to reduce the size
          elsif e.info(choice).size = e.info(original).size then
            Put(summary,"<td><b>");
            -- Original was the best, alas...
          else
            Put(summary,"<td bgcolor=" & lightred & "><b>");
            -- Forced method with less efficient compression
          end if;
        end Winner_color;
        --
      begin
        consider:= consider_a_priori;
        if unique_name = "" or else
          (   unique_name(unique_name'Last)='\'
           or unique_name(unique_name'Last)='/'
          )
        then
          return; -- directories are useless entries!
        end if;
        total_choice.count:= total_choice.count + 1;
        Dual_IO.Close_and_Append_Log; -- have an up to date copy on file system
        Dual_IO.Put_Line(deco);
        Dual_IO.Put_Line(
          ' ' &
          Integer'Image((100 * total_choice.count) / Zip.Entries(zi)) &
          "% - Processing " &
          unique_name & ',' &
          Integer'Image(total_choice.count) &
          " of" &
          Integer'Image(Zip.Entries(zi))
        );
        Dual_IO.Put_Line(deco);
        Dual_IO.New_Line;
        --
        e:= new Dir_entry;
        if curr = null then
          curr:= e;
          list:= e;
        else
          curr.next:= e;
          curr:= e;
        end if;
        e.name:= U(unique_name);
        e.head.made_by_version     := 20; -- version 2.0
        e.head.comment_length      := 0;
        e.head.disk_number_start   := 0;
        e.head.internal_attributes := 0; -- 0: seems binary; 1, text
        e.head.external_attributes := 0;
        --
        Dual_IO.Put("    Phase 1:  dump & unzip -");
        Rip_data(
          archive      => zi,
          input        => MyStream,
          data_name    => unique_name,
          rip_rename   => Temp_name(True,original),
          unzip_rename => Temp_name(False,original),
          header       => e.head.short_info
        );
        --
        if touch then
          e.head.short_info.file_timedate:= Zip.Convert(time_0);
        end if;
        if lower then
          e.name:= U(To_Lower(S(e.name)));
        end if;
        -- Get reliable data from zi
        Zip.Get_sizes(
          info           => zi,
          name           => unique_name,
          comp_size      => comp_size,
          uncomp_size    => uncomp_size
        );
        Dual_IO.Put_Line(" done");
        --
        --  Apply limitations: skip some methods if certain conditions are met.
        --  For instance:
        --    Shrink may in rare cases be better for tiny files;
        --    KZip and Zopfli are excellent but really too slow on large files
        --
        for a in Approach loop
          case a is
            when original =>
              null;
            when shrink =>
              consider(a):= consider(a) and uncomp_size <= 6000;
            when reduce_4 =>
              consider(a):= consider(a) and uncomp_size <= 9000;
            when External =>
              consider(a):= consider(a) and (ext(a).limit = 0 or uncomp_size <= ext(a).limit);
            when others =>
              null;
          end case;
        end loop;
        Dual_IO.Put_Line("    Phase 2:  try different tactics...");
        --
        Try_all_approaches:
        --
        for a in Approach loop
          if consider(a) then
            Dual_IO.Put("              -o-> " & Img(a, html => False));
            e.info(a).iter:= 1;
            case a is
              --
              when original =>
                -- This is from the original .zip - just record size and method
                e.info(a).size:= comp_size;
                e.info(a).zfm := e.head.short_info.zip_type;
                e.info(a).LZMA_EOS :=
                  (e.info(a).zfm = 14) and
                  (e.head.short_info.bit_flag and Zip.Headers.LZMA_EOS_Flag_Bit) /= 0;
                mth:= Zip.Method_from_code(e.info(a).zfm);
                --
              when Internal =>
                if Approach_to_Method(a) in Zip.Compress.Deflation_Method then
                  --  We post-process with DeflOpt.
                  Process_Internal_as_Zip(a, e.all);
                else
                  Process_Internal_Raw(a, e.all);
                end if;
              when External =>
                Dual_IO.New_Line;
                Process_External(
                  S(ext(a).name),
                  S(ext(a).options),
                  Temp_name(True,a),
                  ext(a).randomized,
                  e.info(a)
                );
                e.head.made_by_version:= ext(a).made_by_version;
                ext(a).expanded_options:= e.info(a).expanded_options;
                --
            end case;
            total(a).size:= total(a).size + e.info(a).size;
            if e.info(a).size < e.info(choice).size then
              -- Hurra, we found a smaller size!
              choice:= a;
            end if;
            if choice = original and not format_choice(mth) then
              --  This occurs if we want to make an archive with only a certain set of formats,
              --  for instance deflate_or_store, which is the most compatible.
              --  Since approach _a_ uses a format in the desired set, the choice will be
              --  forced out of original, even with a worse size.
              choice:= a;
            end if;
            Dual_IO.New_Line;
          end if;
        end loop Try_all_approaches;
        --
        total_choice.size:= total_choice.size + e.info(choice).size;
        total(choice).count:= total(choice).count + 1;
        total_choice.uncomp_size:=
          total_choice.uncomp_size + Unsigned_64(uncomp_size);
        gain:= Integer_64(e.info(original).size) - Integer_64(e.info(choice).size);
        total(choice).saved:= total(choice).saved + gain;
        total_choice.saved:= total_choice.saved + gain;
        --
        Dual_IO.New_Line;
        Dual_IO.Put(
          "    Phase 3:  Winner is " & Img(choice, html => False) &
          "; gain in bytes:" & Integer_64'Image(gain) &
          "; writing data -"
        );
        -- * Summary outputs
        Put(summary,
          "<tr><td>" &
          Trim(Integer'Image(total_choice.count),Left) &
          -- '/' &
          -- Trim(Integer'Image(Zip.Entries(zi)),Left) &
          "</td>" &
          "<td bgcolor=lightgrey><tt>" & unique_name & "</tt>, " &
          Image_1000(uncomp_size) & "</td>");
        for a in Approach loop
          if consider_a_priori(a) then
            if not consider(a) then
              Put(summary,"<td bgcolor=lightgray>skipped");
            elsif a = choice then
              Winner_color;
            elsif e.info(a).size = e.info(choice).size then -- ex aequo
              Put(summary,"<td bgcolor=lightblue><b>");
            else
              Put(summary,"<td>");
            end if;
            if consider(a) then
              Put(summary, Image_1000(e.info(a).size));
            end if;
            if choice = a then
              Put(summary,"</b>");
            end if;
            Put(summary,"</td>");
          end if;
        end loop;
        -- Recall winner approach, method and size:
        Put(summary,"<td>" & Img(choice, html => True) & "</td>");
        Put(summary,
          "<td bgcolor=#fafa64>" &
          Zip.Image(Zip.Method_from_code(e.info(choice).zfm)) &
          "</td>"
        );
        Put(summary,
          "<td>" &
          Zip.Image(Zip.Method_from_code(e.info(original).zfm)) &
          "</td>"
        );
        Winner_color;
        Put(summary, Image_1000(e.info(choice).size));
        Put(summary,"</b></td><td>");
        if e.info(original).size > 0 then
          Put(
            summary,
            100.0 * Float(e.info(choice).size) / Float(e.info(original).size),
            3,2,0
          );
          Put(summary,"%");
        end if;
        Put(summary,"</td><td>");
        if uncomp_size > 0 then
          Put(
            summary,
            100.0 * Float(e.info(choice).size) / Float(uncomp_size),
            3,2,0
          );
          Put(summary,"%");
        end if;
        Put(summary,"</td><td>");
        Put(summary,Integer'Image(e.info(choice).iter));
        Put_Line(summary,"</td></tr>");
        --
        -- Write winning data:
        --
        e.head.short_info.extra_field_length:= 0; -- We choose to ignore it...
        -- No data descriptor after data (bit 3); no EOS for LZMA (bit 1):
        e.head.short_info.bit_flag:=
          e.head.short_info.bit_flag and (2#1111_1111_1111_0101#);
        --  Set the LZMA EOS flag if present in winner entry (checked by 7-Zip v.17.01):
        if e.info(choice).LZMA_EOS then
          e.head.short_info.bit_flag:= e.head.short_info.bit_flag or Zip.Headers.LZMA_EOS_Flag_Bit;
        end if;
        -- Set or adjust the pre-data data descriptor:
        -- NB: even if missing pre-data, CRC will have been computed
        --     at least with one internal method
        e.head.short_info.dd.uncompressed_size:= uncomp_size;
        -- Put the winning size and method
        e.head.short_info.dd.compressed_size:= e.info(choice).size;
        e.head.short_info.zip_type:= e.info(choice).zfm;
        e.head.local_header_offset:= Unsigned_32(Index(repacked_zip_file))-1;
        Zip.Headers.Write(repacked_zip_file, e.head.short_info);
        String'Write(repacked_zip_file'Access, S(e.name));
        -- Copy the compressed data
        Zip.Copy_file( Temp_name(True,choice), repacked_zip_file );
        Dual_IO.Put_Line(" done");
        Dual_IO.New_Line;
      end Process_one;

      procedure Process_all is new Zip.Traverse(Process_one);

      ed: Zip.Headers.End_of_Central_Dir;

      function Webcolor(a: Approach) return String is
        v: Float;
        sr,sg,sb: String(1..10);
      begin
        if total_choice.saved > 0 and
          -- with options like -defl ot -fast_dec, we may have
          -- negative values or other strange things:
           total(a).saved >= 0
        then
          v:= Float(total(a).saved) / Float(total_choice.saved);
          -- ^ contribution of approach 'a'
        else
          v:= 0.0;
        end if;
        Put(sr, 512 + Integer(144.0 + 111.0 * (1.0 - v)), 16);
        sb:= sr;
        Put(sg, 512 + Integer(238.0 + 17.0 * (1.0 - v)), 16);
        return
          sr(sr'Last-2..sr'Last-1) &
          sg(sg'Last-2..sg'Last-1) &
          sb(sb'Last-2..sb'Last-1);
      end Webcolor;

      meth: Zip.Compress.Compression_Method;

    begin -- Repack_contents
      T0:= Clock;
      for a in Approach loop
        case a is
          when original =>
            consider_a_priori(a):= True;
          when Internal =>
            meth:= Approach_to_Method(a);
            case meth is
              when Zip.Compress.Single_Method =>
                consider_a_priori(a):= format_choice(Zip.Compress.Method_to_Format(meth));
              when Zip.Compress.Multi_Method =>
                --  This doesn't actually happen: the strong methods are
                --  already applied and compared as single methods.
                consider_a_priori(a):= format_choice = all_formats;
            end case;
          when External =>
            consider_a_priori(a):= format_choice(ext(a).pkzm);
        end case;
      end loop;
      Set_Name (MyStream, orig_name);
      Open (MyStream, In_File);
      Zip.Load( zi, MyStream, True );

      Set_Name (repacked_zip_file, repacked_name);
      Create(repacked_zip_file, Out_File);
      Create(summary, Out_File, html_report_name);
      Put_Line(summary,
        "<html><head><title>ReZip summary for file "
         & orig_name & "</title></head>"
      );
      Put_Line(summary, "<style>.container { overflow-y: auto; height: 87%; }");
      Put_Line(summary, "td_approach { width:115px; }");
      Put_Line(summary, "</style><body>");
      Put_Line(summary, "<font face=""Calibri, Arial, Tahoma""> <!-- Set font for the whole page !-->");
      Put_Line(summary,
        "<h2><a target=_blank href=" & Zip.web &
        ">ReZip</a> summary for file " & orig_name & "</h2>"
      );
      Put_Line(summary,
        "ReZip - Zip-Ada Library version " & Zip.version & " dated " & Zip.reference
      );
      if format_choice /= all_formats then
        Put_Line(summary,
          "<br><table border=0 cellpadding=0 cellspacing=0>" &
          "<tr bgcolor=" & lightred &
          "><td><b>An option that filters methods is on, " &
          "result(s) may be sub-optimal - see details at bottom.</b></td></tr></table><br>"
        );
      end if;
      Put_Line(summary, "<div class=""container""><table border=1 cellpadding=1 cellspacing=1>");
      Put(summary,
        "<tr bgcolor=lightyellow><td></td>"&
        "<td align=right valign=top><b>Approach:</b></td>"
      );
      for a in Approach loop
        if consider_a_priori(a) then
          if a in External then
            ext(a).expanded_options:= ext(a).options;
          end if;
          Put(summary, "<td valign=top class=""td_approach"">" & Img(a, html => True) & "</td>");
        end if;
      end loop;
      Put_Line(summary, "</tr>");
      Put(summary,
        "<tr bgcolor=lightyellow><td></td>"&
        "<td bgcolor=lightgrey valign=bottom><b>File name, uncompressed size:</b></td>"
      );
      for a in Approach loop
        if consider_a_priori(a) then
          case a is
            when original =>
              Put(summary, "<td align=right bgcolor=#dddd00 class=""td_approach"">Approach's<br>format &rarr;</td>");
            when Internal =>
              Put(summary, "<td bgcolor=#fafa64>" &
                Zip.Image(Zip.Compress.Method_to_Format(Approach_to_Method(a))) & "</td>");
              -- better: the Zip.PKZip_method, in case 2 Compression_Method's produce the same sub-format
            when External =>
              Put(summary, "<td bgcolor=#fafa64>" & Zip.Image(ext(a).pkzm) & "</td>");
          end case;
        end if;
      end loop;
      Put_Line(summary,
        "<td><b>Choice</b></td>"&
        "<td bgcolor=#dddd00>Choice's<br>method/<br>format</td>"&
        "<td>Original<br>method/<br>format</td>"&
        "<td>Smallest<br>size</td>" &
        "<td>% of<br>original</td><td>% of<br>uncompressed</td><td>Iterations</td></tr>"
      );
      --
      -- 1/ Recompress each file into the new archive:
      --
      Process_all(zi);
      --
      -- 2/ Almost done - write Central Directory:
      --
      ed.central_dir_offset:= Unsigned_32(Index(repacked_zip_file))-1;
      ed.total_entries:= 0;
      ed.central_dir_size:= 0;
      ed.main_comment_length:= 0;
      declare
        comment: constant String:= Zip.Zip_comment(zi);
      begin
        if not delete_comment then
          ed.main_comment_length:= comment'Length;
        end if;
        -- Restart at the beginning of the list
        e:= list;
        while e /= null loop
          ed.total_entries:= ed.total_entries + 1;
          Zip.Headers.Write(repacked_zip_file, e.head);
          String'Write(repacked_zip_file'Access, S(e.name));
          ed.central_dir_size:=
            ed.central_dir_size +
            Zip.Headers.central_header_length +
            Unsigned_32(e.head.short_info.filename_length);
          e:= e.next;
        end loop;
        ed.disknum:= 0;
        ed.disknum_with_start:= 0;
        ed.disk_total_entries:= ed.total_entries;
        Zip.Headers.Write(repacked_zip_file, ed);
        if not delete_comment then
          String'Write(repacked_zip_file'Access, comment);
        end if;
      end;
      Close(repacked_zip_file);
      Close(MyStream);
      --
      -- Cleanup
      --
      for a in Approach loop
        if consider_a_priori(a) then
          if Exists(Temp_name(True,a)) then
            Delete_File( Temp_name(True,a) );
          end if;
          if a = original then -- also an uncompressed data file to delete
            Delete_File( Temp_name(False,a) );
          end if;
        end if;
      end loop;
      -- Report total bytes
      Put(summary,"<tr><td></td><td><b>T<small>OTAL BYTES</small></b></td>");
      for a in Approach loop
        if consider_a_priori(a) then
          Put(summary,
            "<td bgcolor=#" & Webcolor(a) & ">" &
            Image_1000(total(a).size) & "</td>"
          );
        end if;
      end loop;
      Put(summary,
        "<td></td><td></td><td></td><td bgcolor=lightgreen><b>" & Image_1000(total_choice.size) &
        "</b></td><td>"
      );
      if total(original).size > 0 then
        Put(summary,
          100.0 * Float(total_choice.size) / Float(total(original).size),
          3,2,0
        );
        Put(summary,"%");
      end if;
      Put(summary, "</td><td>");
      if total_choice.uncomp_size > 0 then
        Put(summary,
          100.0 * Float(total_choice.size) / Float(total_choice.uncomp_size),
          3,2,0
        );
        Put(summary,"%");
      end if;
      Put_Line(summary, "</td></tr>");
      -- Report total files per approach
      Put(summary,"<tr><td></td><td><b>T<small>OTAL FILES (when optimal)</small></b></td>");
      for a in Approach loop
        if consider_a_priori(a) then
          Put(summary,
            "<td bgcolor=#" & Webcolor(a) & ">" &
            Integer'Image(total(a).count) & "</td>"
          );
        end if;
      end loop;
      Put(summary,
        "<td></td><td></td><td></td><td bgcolor=lightgreen><b>" & Integer'Image(total_choice.count) &
        "</b></td>" &
        "<td>"
      );
      Put_Line(summary, "</td></tr>");
      -- Report total saved bytes per approach
      Put(summary,"<tr><td></td><td><b>T<small>OTAL SAVED BYTES (when optimal)</small></b></td>");
      for a in Approach loop
        if consider_a_priori(a) then
          Put(summary, "<td bgcolor=#" & Webcolor(a) & ">" & Image_1000(total(a).saved) & "</td>");
        end if;
      end loop;
      Put(summary,
        "<td></td><td></td><td></td><td bgcolor=lightgreen><b>" &
        Image_1000(total_choice.saved) & "</b></td>" &
        "<td>"
      );
      if total(original).size > 0 then
        Put(summary,
          100.0 * Float(total_choice.saved) / Float(total(original).size),
          3,2,0
        );
        Put(summary,"%");
      end if;
      Put(summary, "</td><td>");
      if total_choice.uncomp_size > 0 then
        Put(summary,
          100.0 * Float(total_choice.saved) / Float(total_choice.uncomp_size),
          3,2,0
        );
        Put(summary,"%");
      end if;
      Put_Line(summary, "</td></tr></table></div><br><br>");
      Put_Line(summary, "<dt>Options used for ReZip</dt>");
      Put_Line(summary, "<dd>Randomized_stable =" & Integer'Image(randomized_stable) & "<br>");
      Put_Line(summary, "    Formats allowed:<br><table border=1 cellpadding=1 cellspacing=1>");
      for f in format_choice'Range loop
        Put_Line(summary,
          "      <tr><td>" & Zip.Image(f) & "</td><td>" &
          Boolean'Image(format_choice(f)) & "</td></tr>");
      end loop;
      Put_Line(summary, "    </table>");
      Put_Line(summary, "</dd>");
      T1:= Clock;
      seconds:= T1-T0;
      Put(summary, "Time elapsed : ");
      Put(summary,  Float( seconds ), 4, 2, 0 );
      Put(summary,  " seconds, or");
      Put(summary,  Float( seconds ) / 60.0, 4, 2, 0 );
      Put(summary,  " minutes, or");
      Put(summary,  Float( seconds ) / 3600.0, 4, 2, 0 );
      Put_Line(summary,  " hours.</font></body></html>");
      Close(summary);
      Dual_IO.Put("Time elapsed : ");
      DFIO.Put( Float( seconds ), 4, 2, 0 );
      Dual_IO.Put_Line( " sec");
      Dual_IO.Put_Line("All details for " & orig_name & " in " & html_report_name);
    end Repack_contents;

    -- This is for randomizing the above seed_iterator.
    subtype Seed_Range is Integer range 1..1_000_000;
    package Rnd_seed is new Ada.Numerics.Discrete_Random(Seed_Range);
    gen_seed: Rnd_seed.Generator;

  begin
    Rnd_seed.Reset(gen_seed); -- 1x clock-based randomization
    seed_iterator:= Rnd_seed.Random(gen_seed);
    Flexible_temp_files.Initialize;
    Dual_IO.Create_Log(log_file);
    Repack_contents(from_zip_file, to_zip_file, html_report);
    Dual_IO.Close_Log;
    Flexible_temp_files.Finalize;
  end Rezip;

  procedure Show_external_packer_list is
    procedure Display(p: Zipper_specification) is
      fix: String(1..8):= (others => ' ');
    begin
      Insert(fix,fix'First, S(p.title));
      Ada.Text_IO.Put("  " & fix);
      fix:= (others => ' ');
      Insert(fix,fix'First, S(p.name));
      Ada.Text_IO.Put_Line(" Executable: " & fix & " URL: " & S(p.URL));
    end Display;
    name_is_new: Boolean;
  begin
    for e in External loop
      name_is_new:= True;
      for ee in External'First .. External'Pred(e) loop
        name_is_new:= name_is_new and ext(e).name /= ext(ee).name;
      end loop;
      if name_is_new then
        Display(ext(e));
      end if;
    end loop;
    Display(defl_opt);
  end Show_external_packer_list;

end Rezip_lib;

Zip-Ada: Ada library for zip archive files (.zip). Ada programming.
Some news about Zip-Ada and other related Ada projects on Gautier's blog.