Back to... Zip-Ada

Source file : lz77.adb



--  There are four LZ77 encoders at choice in this package:
--
--    1/  LZ77_using_LZHuf, based on LZHuf by Haruhiko Okumura and Haruyasu Yoshizaki.
--
--    2/  LZ77_using_IZ, based on Info-Zip's Zip's deflate.c by Jean-Loup Gailly.
--          deflate.c is actually the LZ77 part of Info-Zip's compression.
--
--    3/  LZ77_using_BT4, based on LZMA SDK's BT4 algorithm by Igor Pavlov.
--
--    4/  LZ77_by_Rich, based on PROG2.C by Rich Geldreich, Jr.
--
--  Variant 1/, LZ77_using_LZHuf, is working since 2009. Two problems: it is slow
--     and not well adapted to the Deflate format (mediocre compression).
--
--  Variant 2/, LZ77_using_IZ, is much faster, and better suited for Deflate.
--     It has been added on 05-Mar-2016.
--     The code is tailored and optimized for a single set of
--     the String_buffer_size, Look_Ahead, Threshold LZ77 parameters - those for Deflate.
--
--  Variant 3/, LZ77_using_BT4, was added on 06-Sep-2016.
--     The seems to be the best match finder for LZMA on data of the >= 1 MiB scale.

--  To do:
--
--  2/
--    - LZ77 / IZ: similar to the test with TOO_FAR, try to cluster distances around
--        values needing less extra bits (may not work at all...)
--    - LZ77 / IZ: tune TOO_FAR (max: 32767), see http://optipng.sf.net/pngtech/too_far.html
--        "TOO_FAR in zlib Is Not Too Far" for discussion

--  Legal licensing note:

--  Copyright (c) 2016 .. 2020 Gautier de Montmollin (maintainer of the Ada version)
--  SWITZERLAND

--  Permission is hereby granted, free of charge, to any person obtaining a copy
--  of this software and associated documentation files (the "Software"), to deal
--  in the Software without restriction, including without limitation the rights
--  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
--  copies of the Software, and to permit persons to whom the Software is
--  furnished to do so, subject to the following conditions:

--  The above copyright notice and this permission notice shall be included in
--  all copies or substantial portions of the Software.

--  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
--  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
--  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
--  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
--  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
--  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
--  THE SOFTWARE.

--  NB: this is the MIT License, as found 21-Aug-2016 on the site
--  http://www.opensource.org/licenses/mit-license.php

with Ada.Text_IO, Ada.Integer_Text_IO;
with Ada.Unchecked_Deallocation;
with Interfaces; use Interfaces;
with System;

package body LZ77 is

  --  System.Word_Size: 13.3(8): A word is the largest amount of storage
  --  that can be conveniently and efficiently manipulated by the hardware,
  --  given the implementation's run-time model.
  --
  min_bits_32 : constant := Integer'Max (32, System.Word_Size);
  min_bits_16 : constant := Integer'Max (16, System.Word_Size);

  --  We define an Integer type which is at least 32 bits, but n bits
  --  on a native n (> 32) bits architecture (no performance hit on 64+
  --  bits architectures).
  --  Integer_M16 not needed: Integer already guarantees 16 bits
  --
  type Integer_M32 is range -2**(min_bits_32 - 1) .. 2**(min_bits_32 - 1) - 1;
  subtype Natural_M32  is Integer_M32 range 0 .. Integer_M32'Last;
  --  subtype Positive_M32 is Integer_M32 range 1 .. Integer_M32'Last;

  type Unsigned_M16 is mod 2**min_bits_16;
  type Unsigned_M32 is mod 2**min_bits_32;

  function Are_Matches_Sorted (m : Matches_Type) return Boolean is
  begin
    for i in 2 .. m.count loop
      if m.dl (i).length < m.dl (i - 1).length then
        return False;
      end if;
    end loop;
    return True;
  end Are_Matches_Sorted;

  procedure Encode is

    -----------------------
    --  LZHuf algorithm  --
    -----------------------

    procedure LZ77_using_LZHuf is
      --  Based on LZHUF by OKUMURA & YOSHIZAKI.
      --  Here the adaptive Huffman coding is thrown away:
      --  algorithm is used only to find matching patterns.

      N_Char    : constant Integer := 256 - Threshold + Look_Ahead;
      --  Character code (= 0..N_CHAR-1)
      Max_Table     : constant Integer := N_Char * 2 - 1;

      type Text_Buffer is array (0 .. String_buffer_size + Look_Ahead - 1) of Byte;
      empty_buffer : constant Text_Buffer := (others => 32);  --  ' '

      --  > The Huffman frequency handling is made generic so we have
      --    one copy of the tree and of the frequency table for Encode
      --    and one for Decode

      generic
      package Huffman is
        --- Pointing parent nodes.
        --- Area [Max_Table..(Max_Table + N_CHAR - 1)] are pointers for leaves
        Parent :  array (0 .. Max_Table + N_Char - 1) of Natural;
        --- Pointing children nodes (son[], son[] + 1)
        Son    :  array (0 .. Max_Table - 1)  of Natural;

        Root_Position : constant Integer := Max_Table - 1;  --  (can be always Son'last ?)

        procedure Start;
        procedure Update_Freq_Tree (C0 : Natural);
      end Huffman;

      package body Huffman is

        Freq : array (0 .. Max_Table) of Natural;  --  Cumulative freq table

        Max_Freq : constant := 16#8000#;
        --  ^-- update when cumulative frequency reaches to this value

        procedure Start is
          I : Natural;
        begin
          for J in  0 .. N_Char - 1  loop
            Freq (J) := 1;
            Son (J) := J + Max_Table;
            Parent (J + Max_Table) := J;
          end loop;

          I := 0;
          for J in N_Char .. Root_Position  loop
            Freq (J) := Freq (I) + Freq (I + 1);
            Son (J) := I;
            Parent (I) := J;
            Parent (I + 1) := J;
            I := I + 2;
          end loop;

          Freq (Freq'Last) := 16#FFFF#;  --  ( Max_Table )
          Parent (Root_Position) := 0;
        end Start;

        procedure Update_Freq_Tree (C0 : Natural) is

          procedure Reconstruct_Freq_Tree is
            I, J, K, F, L : Natural;
          begin
            --  Halven cumulative freq for leaf nodes
            J := 0;
            for I in 0 .. Root_Position loop
              if Son (I) >= Max_Table then
                Freq (J) := (Freq (I) + 1) / 2;
                Son (J) := Son (I);
                J := J + 1;
              end if;
            end loop;

            --  Make a tree : first, connect children nodes
            I := 0;
            for J in N_Char .. Root_Position loop  --  J : free nodes
              K := I + 1;
              F := Freq (I) + Freq (K); -- new frequency
              Freq (J) := F;
              K := J - 1;
              while F < Freq (K) loop
                K := K - 1;
              end loop;

              K := K + 1;
              L := J - K;  --  2007: fix: was L:= (J-K)*2, memcopy parameter remain

              Freq (K + 1 .. K + L) := Freq (K .. K + L - 1);  --  shift by one cell right
              Freq (K) := F;
              Son (K + 1 .. K + L) := Son (K .. K + L - 1);  --  shift by one cell right
              Son (K) := I;
              I := I + 2;
            end loop;

            --  Connect parent nodes
            for I in 0 .. Max_Table - 1 loop
              K := Son (I);
              Parent (K) := I;
              if K < Max_Table then
                Parent (K + 1) := I;
              end if;
            end loop;

          end Reconstruct_Freq_Tree;

          C, I, J, K, L : Natural;

        begin  --  Update_Freq_Tree;
          if Freq (Root_Position) = Max_Freq then
            Reconstruct_Freq_Tree;
          end if;
          C := Parent (C0 + Max_Table);
          loop
            Freq (C) := Freq (C) + 1;
            K := Freq (C);
            --  Swap nodes to keep the tree freq-ordered
            L := C + 1;
            if  K > Freq (L) then
              while K > Freq (L + 1) loop
                L := L + 1;
              end loop;

              Freq (C) := Freq (L);
              Freq (L) := K;

              I := Son (C);
              Parent (I) := L;
              if I < Max_Table then
                Parent (I + 1) := L;
              end if;

              J := Son (L);
              Son (L) := I;

              Parent (J) := C;
              if J < Max_Table then
                Parent (J + 1) := C;
              end if;
              Son (C) := J;

              C := L;
            end if;
            C := Parent (C);
            exit when C = 0;
          end loop;        -- do it until reaching the root
        end Update_Freq_Tree;

      end Huffman;

      Node_Nil : constant Integer := String_buffer_size;    --  End of tree's node

      Lson, Dad :  array (0 .. String_buffer_size) of Natural;
      Rson :       array (0 .. String_buffer_size + 256) of Natural;

      procedure Init_Tree is
      begin
        for I in String_buffer_size + 1 .. Rson'Last loop
          Rson (I) := Node_Nil;
        end loop;  --  root
        for I in 0 .. String_buffer_size - 1 loop
          Dad (I) := Node_Nil;
        end loop;  --  node
      end Init_Tree;

      Match_Position : Natural;
      Match_Length   : Natural;

      Text_Buf : Text_Buffer := empty_buffer;

      procedure Insert_Node (R : Integer) is
        I, P : Integer;
        Geq  : Boolean := True;
        C    : Natural;
      begin
        P := String_buffer_size + 1 + Integer (Text_Buf (R));
        Rson (R) := Node_Nil;
        Lson (R) := Node_Nil;
        Match_Length := 0;
        loop
          if Geq then
            if Rson (P) = Node_Nil then
              Rson (P) := R;
              Dad (R) := P;
              return;
            end if;
            P := Rson (P);
          else
            if Lson (P) = Node_Nil then
              Lson (P) := R;
              Dad (R) := P;
              return;
            end if;
            P := Lson (P);
          end if;
          I := 1;
          while I < Look_Ahead and then Text_Buf (R + I) = Text_Buf (P + I)  loop
            I := I + 1;
          end loop;

          Geq := Text_Buf (R + I) >= Text_Buf (P + I) or I = Look_Ahead;

          if I > Threshold then
            if I > Match_Length then
              Match_Position := (R - P) mod String_buffer_size - 1;
              Match_Length := I;
              exit when Match_Length >= Look_Ahead;
            end if;
            if I = Match_Length then
              C := (R - P) mod String_buffer_size - 1;
              if C < Match_Position then
                Match_Position := C;
              end if;
            end if;
          end if;
        end loop;

        Dad (R)  := Dad (P);
        Lson (R) := Lson (P);
        Rson (R) := Rson (P);
        Dad (Lson (P)) := R;
        Dad (Rson (P)) := R;
        if Rson (Dad (P)) = P then
          Rson (Dad (P)) := R;
        else
          Lson (Dad (P)) := R;
        end if;
        Dad (P) := Node_Nil;  --  remove P
      end Insert_Node;

      procedure Delete_Node (P : Natural) is
        Q : Natural;
      begin
        if Dad (P) = Node_Nil then  --  unregistered
          return;
        end if;
        if Rson (P) = Node_Nil then
          Q := Lson (P);
        elsif Lson (P) = Node_Nil then
          Q := Rson (P);
        else
          Q := Lson (P);
          if Rson (Q) /= Node_Nil then
            loop
              Q := Rson (Q);
              exit when Rson (Q) = Node_Nil;
            end loop;
            Rson (Dad (Q)) := Lson (Q);
            Dad (Lson (Q)) := Dad (Q);
            Lson (Q) := Lson (P);
            Dad (Lson (P)) := Q;
          end if;
          Rson (Q) := Rson (P);
          Dad (Rson (P)) := Q;
        end if;
        Dad (Q) := Dad (P);
        if Rson (Dad (P)) = P then
          Rson (Dad (P)) := Q;
        else
          Lson (Dad (P)) := Q;
        end if;
        Dad (P) := Node_Nil;
      end Delete_Node;

      package Huffman_E is new Huffman;

      I, R, S, Last_Match_Length : Natural;
      Len : Integer;
      C : Byte;
    begin
      if not More_Bytes then
        return;
      end if;
      Huffman_E.Start;
      Init_Tree;
      S := 0;
      R := String_buffer_size - Look_Ahead;
      Len := 0;
      while Len < Look_Ahead and More_Bytes loop
        Text_Buf (R + Len) := Read_Byte;
        Len := Len + 1;
      end loop;

      --  Seems: fill dictionary with default value
      --
      --  for I in 1.. Look_Ahead loop
      --    Insert_Node(R - I);
      --  end loop;

      Insert_Node (R);

      loop
        if Match_Length > Len then
          Match_Length := Len;
        end if;
        if Match_Length <= Threshold then
          Match_Length := 1;
          Huffman_E.Update_Freq_Tree (Natural (Text_Buf (R)));
          Write_Literal (Text_Buf (R));
        else
          Write_DL_Code (Match_Position + 1, Match_Length);
        end if;
        Last_Match_Length := Match_Length;
        I := 0;
        while I < Last_Match_Length and More_Bytes loop
          I := I + 1;
          Delete_Node (S);
          C := Read_Byte;
          Text_Buf (S) := C;
          if  S < Look_Ahead - 1 then
            Text_Buf (S + String_buffer_size) := C;
          end if;
          S := (S + 1) mod String_buffer_size;
          R := (R + 1) mod String_buffer_size;
          Insert_Node (R);
        end loop;

        while I < Last_Match_Length loop
          I := I + 1;
          Delete_Node (S);
          S := (S + 1) mod String_buffer_size;
          R := (R + 1) mod String_buffer_size;
          Len := Len - 1;
          if Len > 0 then
            Insert_Node (R);
          end if;
        end loop;

        exit when Len = 0;
      end loop;
    end LZ77_using_LZHuf;

    --------------------------
    --  Info-Zip algorithm  --
    --------------------------

    --  LZ77_using_IZ: based on deflate.c by Jean-Loup Gailly.
    --  Core description of the algorithm:
    --
    --     The most straightforward technique turns out to be the fastest for
    --     most input files: try all possible matches and select the longest.
    --     The key feature of this algorithm is that insertions into the string
    --     dictionary are very simple and thus fast, and deletions are avoided
    --     completely. Insertions are performed at each input character, whereas
    --     string matches are performed only when the previous match ends. So it
    --     is preferable to spend more time in matches to allow very fast string
    --     insertions and avoid deletions. The matching algorithm for small
    --     strings is inspired from that of Rabin & Karp [1]. A brute force approach
    --     is used to find longer strings when a small match has been found.
    --
    --     The idea of lazy evaluation of matches is due to Jan-Mark Wams.
    --
    --     [1] A description of the Rabin and Karp algorithm is given in the book
    --         "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
    --
    --  About hashing: chapter 6.4 of The Art of Computer Programming, Volume 3, D.E. Knuth
    --  Rabin and Karp algorithm: http://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm

    --  Compression level: 0: store, 1: best speed, 9: best compression, 10: variant of level 9
    --  Ada code: only levels 4 to 10 are supported.

    procedure LZ77_using_IZ (level : Natural) is
      HASH_BITS : constant := 15;  --  13..15
      HASH_SIZE : constant := 2 ** HASH_BITS;
      HASH_MASK : constant := HASH_SIZE - 1;
      WSIZE     : constant Integer_M32 := Integer_M32 (String_buffer_size);
      WMASK     : constant Unsigned_M16 := Unsigned_M16 (WSIZE - 1);
      --  HASH_SIZE and WSIZE must be powers of two
      NIL      : constant := 0;     --  Tail of hash chains
      TOO_FAR  : constant := 4096;  --  Matches of length 3 are discarded if their distance exceeds TOO_FAR
      --
      subtype ulg is Unsigned_M32;
      subtype unsigned is Unsigned_M16;
      subtype ush is Unsigned_M16;
      --  subtype long is Integer_M32;
      --  subtype int is Integer;
      subtype Pos is Unsigned_M32;  --  must be at least 32 bits
      --  subtype IPos is unsigned;
      --  A Pos is an index in the character window. IPos is used only for parameter passing.
      window : array (0 .. 2 * WSIZE - 1) of Byte;
      --  Sliding window. Input bytes are read into the second half of the window,
      --  and move to the first half later to keep a dictionary of at least WSIZE
      --  bytes. With this organization, matches are limited to a distance of
      --  WSIZE-MAX_MATCH bytes, but this ensures that IO is always
      --  performed with a length multiple of the block size.
      prev : array (0 .. unsigned (WSIZE - 1)) of Pos;
      --  Link to older string with same hash index.
      --  This link is maintained only for the last 32K strings.
      --  An index in this array is thus a window index modulo 32K.
      head : array (0 .. unsigned (HASH_SIZE - 1)) of Pos;
      --  Heads of the hash chains or NIL.
      window_size : ulg;
      --  window size, 2*WSIZE except for MMAP or BIG_MEM, where it is the
      --  input file length plus MIN_LOOKAHEAD.
      sliding : Boolean;  --  Set to False when the input file is already in memory  [was: int]
      ins_h : unsigned;   --  hash index of string to be inserted
      MIN_MATCH : constant Integer_M32 := Integer_M32 (Threshold) + 1;    --  Deflate: 3
      MAX_MATCH : constant Integer_M32 := Integer_M32 (Look_Ahead);       --  Deflate: 258
      --  Minimum amount of lookahead, except at the end of the input file.
      MIN_LOOKAHEAD : constant Integer_M32 := MAX_MATCH + MIN_MATCH + 1;  --  Deflate: 262
      --  This LZ77 compression doesn't use the full possible distance range: 32507..32768 unused!
      MAX_DIST : constant Integer_M32 := WSIZE - MIN_LOOKAHEAD;  --  Deflate: 32506
      H_SHIFT : constant Integer := Integer ((HASH_BITS + MIN_MATCH - 1) / MIN_MATCH);
      --  Number of bits by which ins_h and del_h must be shifted at each
      --  input step. It must be such that after MIN_MATCH steps, the oldest
      --  byte no longer takes part in the hash key, that is:
      --  H_SHIFT * MIN_MATCH >= HASH_BITS
      prev_length : Natural_M32; --  [was: unsigned]
      --  Length of the best match at previous step. Matches not greater than this
      --  are discarded. This is used in the lazy match evaluation.
      strstart    : Natural_M32;   --  start of string to insert [was: unsigned]
      match_start : Natural_M32;   --  start of matching string [was: unsigned]
      eofile      : Boolean;       --  flag set at end of input file [was: int]
      lookahead   : Natural_M32;   --  number of valid bytes ahead in window  [was: unsigned]
      max_chain_length : unsigned;
      --  To speed up deflation, hash chains are never searched beyond this length.
      --  A higher limit improves compression ratio but degrades the speed.
      max_lazy_match : Natural_M32;  --  [was: unsigned]
      --  Attempt to find a better match only when the current match is strictly
      --  smaller than this value. This mechanism is used only for compression
      --  levels >= 4.
      good_match : Natural_M32;  --  [was: unsigned]
      --  Use a faster search when the previous match is longer than this
      nice_match : Integer_M32;  --  Stop searching when current match exceeds this
      --  Values for max_lazy_match, good_match, nice_match and max_chain_length,
      --  depending on the desired pack level (0..9). The values given below have
      --  been tuned to exclude worst case performance for pathological files.
      --  Better values may be found for specific files.
      type config is record
        good_length  : Natural_M32;  --  reduce lazy search above this match length [was: ush]
        max_lazy     : Natural_M32;  --  do not perform lazy search above this match length
        nice_length  : Integer_M32;  --  quit search above this match length
        max_chain    : ush;
      end record;

      configuration_table : constant array (0 .. 10) of config := (
      --  good lazy nice chain
          (0,    0,  0,    0),    --  0: store only
          (4,    4,  8,    4),    --  1: maximum speed, no lazy matches
          (4,    5, 16,    8),
          (4,    6, 32,   32),
          (4,    4, 16,   16),    --  4: lazy matches
          (8,   16, 32,   32),
          (8,   16, 128, 128),
          (8,   32, 128, 256),
          (32, 128, 258, 1024),
          (32, 258, 258, 4096),   --  9: maximum compression
          (34, 258, 258, 4096));  --  "secret" variant of level 9

      --  Update a hash value with the given input byte
      --  IN  assertion: all calls to to UPDATE_HASH are made with consecutive
      --     input characters, so that a running hash key can be computed from the
      --     previous key instead of complete recalculation each time.

      procedure UPDATE_HASH (h : in out unsigned; c : Byte) is
      pragma Inline (UPDATE_HASH);
      begin
        h := (unsigned (Shift_Left (Unsigned_32 (h), H_SHIFT)) xor unsigned (c)) and HASH_MASK;
      end UPDATE_HASH;

      --  Insert string starting at s in the dictionary and set match_head to the previous head
      --  of the hash chain (the most recent string with same hash key). Return
      --  the previous length of the hash chain.
      --  IN  assertion: all calls to to INSERT_STRING are made with consecutive
      --     input characters and the first MIN_MATCH bytes of s are valid
      --     (except for the last MIN_MATCH-1 bytes of the input file).

      procedure INSERT_STRING (s : Integer_M32; match_head : out Natural_M32) is
      pragma Inline (INSERT_STRING);
      begin
        UPDATE_HASH (ins_h, window (s + MIN_MATCH - 1));
        match_head := Natural_M32 (head (ins_h));
        prev (unsigned (s) and WMASK) := Pos (match_head);
        head (ins_h) := Pos (s);
      end INSERT_STRING;

      procedure Read_buf (from : Integer_M32; amount : unsigned; actual : out Integer_M32) is
        need : unsigned := amount;
      begin
        --  put_line("Read buffer: from:" & from'img & ";  amount:" & amount'img);
        actual := 0;
        while need > 0 and then More_Bytes loop
          window (from + actual) := Read_Byte;
          actual := actual + 1;
          need := need - 1;
        end loop;
        --  put_line("Read buffer: actual:" & actual'img);
      end Read_buf;

      --  Fill the window when the lookahead becomes insufficient.
      --  Updates strstart and lookahead, and sets eofile if end of input file.
      --
      --  IN assertion: lookahead < MIN_LOOKAHEAD && strstart + lookahead > 0
      --  OUT assertions: strstart <= window_size-MIN_LOOKAHEAD
      --     At least one byte has been read, or eofile is set; file reads are
      --     performed for at least two bytes (required for the translate_eol option).

      procedure Fill_window is
        more : unsigned;
        m : Pos;
        n : Natural_M32;
      begin
        loop
          more := unsigned (window_size - ulg (lookahead) - ulg (strstart));
          if False then  --  C: "if (more == (unsigned)EOF) {" ?... GdM: seems a 16-bit code for EOF
            --  Very unlikely, but possible on 16 bit machine if strstart == 0
            --  and lookahead == 1 (input done one byte at time)
            more := more - 1;
          elsif strstart >= WSIZE + MAX_DIST and then sliding then
            --  By the IN assertion, the window is not empty so we can't confuse
            --  more == 0 with more == 64K on a 16 bit machine.
            window (0 .. WSIZE - 1) := window (WSIZE .. 2 * WSIZE - 1);
            --  GdM: in rare cases (e.g. level 9 on test file "enwik8"), match_start happens
            --  to be < WSIZE. We do as in the original 16-bit C code: mod 2**16, such that the
            --  index is the window's range.
            --  This assumes WSIZE = 2**15, which is checked at startup of LZ77_using_IZ.
            --  Very likely, match_start is garbage anyway - see http://sf.net/p/infozip/bugs/49/
            match_start := Natural_M32 (Unsigned_16 (match_start) - Unsigned_16 (WSIZE mod (2**16)));
            strstart    := strstart - WSIZE; -- we now have strstart >= MAX_DIST:
            for nn in 0 .. unsigned'(HASH_SIZE - 1) loop
              m := head (nn);
              if m >= Pos (WSIZE) then
                head (nn) := m - Pos (WSIZE);
              else
                head (nn) := NIL;
              end if;
            end loop;
            --
            for nn in 0 .. unsigned (WSIZE - 1) loop
              m := prev (nn);
              if m >= Pos (WSIZE) then
                prev (nn) := m - Pos (WSIZE);
              else
                prev (nn) := NIL;
              end if;
              --  If n is not on any hash chain, prev[n] is garbage but its value will never be used.
            end loop;
            more := more + unsigned (WSIZE);
          end if;
          exit when eofile;
          --  If there was no sliding:
          --     strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
          --     more == window_size - lookahead - strstart
          --  => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
          --  => more >= window_size - 2*WSIZE + 2
          --  In the MMAP or BIG_MEM case (not yet supported in gzip),
          --    window_size == input_size + MIN_LOOKAHEAD  &&
          --    strstart + lookahead <= input_size => more >= MIN_LOOKAHEAD.
          --  Otherwise, window_size == 2*WSIZE so more >= 2.
          --  If there was sliding, more >= WSIZE. So in all cases, more >= 2.
          --
          pragma Assert (more >= 2, "more < 2");
          --
          Read_buf (strstart + lookahead, more, n);
          if n = 0 then
            eofile := True;
          else
            lookahead := lookahead + n;
          end if;
          exit when lookahead >= MIN_LOOKAHEAD or eofile;
        end loop;
        --  put_line("Fill done - eofile = " & eofile'img);
      end Fill_window;

      --  Initialize the "longest match" routines for a new file
      --
      --  IN assertion: window_size is > 0 if the input file is already read or
      --     mapped in the window array, 0 otherwise. In the first case,
      --     window_size is sufficient to contain the whole input file plus
      --     MIN_LOOKAHEAD bytes (to avoid referencing memory beyond the end
      --     of window when looking for matches towards the end).

      procedure LM_Init (pack_level : Natural) is
      begin
        --  Do not slide the window if the whole input is already in memory (window_size > 0)
        sliding := False;
        if window_size = 0 then
          sliding := True;
          window_size := 2 * ulg (WSIZE);
        end if;
        --  Initialize the hash table.
        --  prev will be initialized on the fly.
        head := (others => NIL);
        --  Set the default configuration parameters:
        max_lazy_match   := configuration_table (pack_level).max_lazy;
        good_match       := configuration_table (pack_level).good_length;
        nice_match       := configuration_table (pack_level).nice_length;
        max_chain_length := configuration_table (pack_level).max_chain;
        --  Info-Zip comment: ??? reduce max_chain_length for binary files
        strstart := 0;
        Read_buf (0, unsigned (WSIZE), lookahead);
        if lookahead = 0 then
          eofile := True;
          return;
        end if;
        eofile := False;
        --  Make sure that we always have enough lookahead. This is important
        --  if input comes from a device such as a tty.
        if lookahead < MIN_LOOKAHEAD then
          Fill_window;
        end if;
        ins_h := 0;
        for j in 0 .. Natural_M32 (MIN_MATCH) - 2 loop
          UPDATE_HASH (ins_h, window (j));
        end loop;
        --  If lookahead < MIN_MATCH, ins_h is garbage, but this is
        --  not important since only literal bytes will be emitted.
      end LM_Init;

      --  Set match_start to the longest match starting at the given string and
      --  return its length. Matches shorter or equal to prev_length are discarded,
      --  in which case the result is equal to prev_length and match_start is
      --  garbage.
      --  IN assertions: current_match is the head of the hash chain for the current
      --    string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1

      procedure Longest_Match (current_match : in out Integer_M32; longest : out Integer_M32) is
        chain_length : unsigned := max_chain_length;  --  max hash chain length
        scan         : Integer_M32 := strstart;       --  current string
        match        : Integer_M32;                   --  matched string
        len          : Integer_M32;                   --  length of current match
        best_len     : Integer_M32 := prev_length;    --  best match length so far
        limit        : Natural_M32;  --  [was: IPos]
        strend       : constant Integer_M32 := strstart + MAX_MATCH;
        scan_end     : Integer_M32 := scan + best_len;
      begin
        --  Stop when current_match becomes <= limit. To simplify the code,
        --  we prevent matches with the string of window index 0.
        if strstart > MAX_DIST then
          limit := strstart - MAX_DIST;
        else
          limit := NIL;
        end if;
        --  Do not waste too much time if we already have a good match:
        if prev_length >= good_match then
          chain_length := chain_length / 4;
        end if;
        pragma Assert
          (strstart <= Integer_M32 (window_size) - MIN_LOOKAHEAD,
           "insufficient lookahead");  --  In deflate.c
        loop
          if current_match >= strstart then
            --  Added 2020-11-07. The file test/sample.jpg bombs the assertion a few lines later.
            longest := MIN_MATCH - 1;
            return;
          end if;
          pragma Assert (current_match < strstart, "no future");  --  In deflate.c
          match := current_match;
          --  Skip to next match if the match length cannot increase
          --  or if the match length is less than 2:
          --
          --  NB: this is the Not-UNALIGNED_OK variant in the C code.
          --      Translation of the UNALIGNED_OK variant is left as an exercise ;-).
          --      (!! worth a try: GNAT optimizes window(match..match+1[3]) to 16[32] bit)
          --
          if window (match + best_len)     /= window (scan_end) or else
             window (match + best_len - 1) /= window (scan_end - 1) or else
             window (match)                /= window (scan) or else
             window (match + 1)            /= window (scan + 1)
          then
            match := match + 1;  --  C: continue
          else
            --  The check at best_len - 1 can be removed because it will be made
            --  again later. (This heuristic is not always a win.)
            --
            --  It is not necessary to compare window(scan + 2) and window(match + 2) since they
            --  are always equal when the other bytes match, given that
            --  the hash keys are equal and that HASH_BITS >= 8.
            scan := scan + 2;
            match := match + 2;
            --  C: The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
            --     It is easy to get rid of this optimization if necessary.
            --  Ada: see the "else" part below.
            if MAX_MATCH = 258 then
              --  We check for insufficient lookahead only every 8th comparison;
              --  the 256th check will be made at strstart + 258.
              loop
                scan := scan + 1;
                match := match + 1;
                exit when window (scan) /= window (match);
                scan := scan + 1;
                match := match + 1;
                exit when window (scan) /= window (match);
                scan := scan + 1;
                match := match + 1;
                exit when window (scan) /= window (match);
                scan := scan + 1;
                match := match + 1;
                exit when window (scan) /= window (match);
                scan := scan + 1;
                match := match + 1;
                exit when window (scan) /= window (match);
                scan := scan + 1;
                match := match + 1;
                exit when window (scan) /= window (match);
                scan := scan + 1;
                match := match + 1;
                exit when window (scan) /= window (match);
                scan := scan + 1;
                match := match + 1;
                exit when window (scan) /= window (match) or else scan >= strend;
              end loop;
            else
              --  We check for insufficient lookahead after every comparison.
              loop
                scan := scan + 1;
                match := match + 1;
                exit when window (scan) /= window (match) or else scan >= strend;
              end loop;
            end if;
            --  Assert(scan <= window+(unsigned)(window_size-1), "wild scan");  ??
            len := MAX_MATCH - (strend - scan);
            scan := strend - MAX_MATCH;
            if len > best_len then
              match_start := current_match;
              best_len := len;
              exit when len >= nice_match;
              scan_end  := scan + best_len;
            end if;
          end if;
          current_match := Integer_M32 (prev (unsigned (current_match) and WMASK));
          exit when current_match <= limit;
          chain_length := chain_length - 1;
          exit when chain_length = 0;
        end loop;
        longest := best_len;
      end Longest_Match;

      procedure LZ77_part_of_IZ_Deflate is
        hash_head  : Natural_M32 := NIL;              --  head of hash chain
        prev_match : Natural_M32;                     --  previous match  [was: IPos]
        match_available : Boolean := False;           --  set if previous match exists
        match_length : Natural_M32 := MIN_MATCH - 1;  --  length of best match
        max_insert : Natural_M32;
      begin
        match_start := 0;  --  NB: no initialization in deflate.c
        --  NB: level <= 3 would call deflate_fast;
        --
        --  Process the input block.
        while lookahead /= 0 loop
          --  Insert the string window(strstart .. strstart + 2) in the
          --  dictionary, and set hash_head to the head of the hash chain:
          if lookahead >= MIN_MATCH then
            INSERT_STRING (strstart, hash_head);
          end if;
          --  Find the longest match, discarding those <= prev_length.
          prev_length  := match_length;
          prev_match   := match_start;
          match_length := MIN_MATCH - 1;
          if hash_head /= NIL and then
             prev_length < max_lazy_match and then
             strstart - hash_head <= MAX_DIST
          then
            --  To simplify the code, we prevent matches with the string
            --  of window index 0 (in particular we have to avoid a match
            --  of the string with itself at the start of the input file).
            --
            --  Do not look for matches beyond the end of the input.
            --  This is necessary to make deflate deterministic.
            if nice_match > lookahead then
              nice_match := lookahead;
            end if;
            Longest_Match (hash_head, match_length);
            --  Longest_Match sets match_start
            if match_length > lookahead then
              match_length := lookahead;
            end if;
            --  Ignore a length 3 match if it is too distant:
            if match_length = MIN_MATCH and then strstart - match_start > TOO_FAR then
              --  If prev_match is also MIN_MATCH, match_start is garbage
              --  but we will ignore the current match anyway.
              match_length := MIN_MATCH - 1;
            end if;
          end if;
          --  If there was a match at the previous step and the current
          --  match is not better, output the previous match:
          if prev_length >= MIN_MATCH and then match_length <= prev_length then
            max_insert := strstart + lookahead - MIN_MATCH;
            --  C: in DEBUG mode: check_match(strstart-1, prev_match, prev_length);
            --
            ------------------------------------
            --  Output a Distance-Length code --
            ------------------------------------
            Write_DL_Code (Positive (strstart - 1 - prev_match), Positive (prev_length));
            --  Insert in hash table all strings up to the end of the match.
            --  strstart-1 and strstart are already inserted.
            lookahead := lookahead - (prev_length - 1);
            prev_length := prev_length - 2;
            loop
              strstart := strstart + 1;
              if strstart <= max_insert then
                INSERT_STRING (strstart, hash_head);
                --  strstart never exceeds WSIZE - MAX_MATCH, so there
                --  are always MIN_MATCH bytes ahead.
              end if;
              prev_length := prev_length - 1;
              exit when prev_length = 0;
            end loop;
            strstart := strstart + 1;
            match_available := False;
            match_length := MIN_MATCH - 1;
          elsif match_available then
            --  If there was no match at the previous position, output a
            --  single literal. If there was a match but the current match
            --  is longer, truncate the previous match to a single literal.
            --
            ------------------------
            --  Output a literal  --
            ------------------------
            Write_Literal (window (strstart - 1));
            strstart := strstart + 1;
            lookahead := lookahead - 1;
          else
            --  There is no previous match to compare with, wait for the next step to decide.
            match_available := True;
            strstart := strstart + 1;
            lookahead := lookahead - 1;
          end if;
          --  Assert(strstart <= isize && lookahead <= isize, "a bit too far");
          --
          --  Make sure that we always have enough lookahead, except
          --  at the end of the input file. We need MAX_MATCH bytes
          --  for the next match, plus MIN_MATCH bytes to insert the
          --  string following the next match.
          if lookahead < MIN_LOOKAHEAD then
            Fill_window;
          end if;
        end loop;
        -----------------------------------
        --  Output last literal, if any  --
        -----------------------------------
        if match_available then
          Write_Literal (window (strstart - 1));
        end if;
      end LZ77_part_of_IZ_Deflate;

      Code_too_clever : exception;
    begin
      if Look_Ahead /= 258 or String_buffer_size /= 2 ** 15 or Threshold /= 2 then
        raise Code_too_clever;  --  was optimized for these parameters
      end if;
      window_size := 0;
      LM_Init (level);
      LZ77_part_of_IZ_Deflate;
    end LZ77_using_IZ;

    ---------------------------------------------------------------------
    --  BT4  -  Binary tree of match positions selected with           --
    --          the leading 2 to 4 bytes of each possible match.       --
    ---------------------------------------------------------------------

    --  Based on BT4.java and LZMAEncoderFast.java by Lasse Collin,
    --  itself based on LzFind.c by Igor Pavlov.

    procedure LZ77_using_BT4 is
      MATCH_LEN_MIN : constant Integer := Threshold + 1;
      --
      readPos     : Integer := -1;
      cur_literal : Byte;
      readLimit   : Integer := -1;
      finishing   : constant Boolean := False;
      writePos    : Integer :=  0;
      pendingSize : Integer :=  0;
      --
      OPTS : constant := 4096;
      EXTRA_SIZE_BEFORE : constant :=  OPTS;
      EXTRA_SIZE_AFTER  : constant :=  OPTS;

      keepSizeBefore : constant Integer := EXTRA_SIZE_BEFORE + String_buffer_size;
      keepSizeAfter  : constant Integer := EXTRA_SIZE_AFTER  + Look_Ahead;
      reserveSize : constant Integer :=
        Integer'Min (
          String_buffer_size / 2 +
          256 * (2 ** 10),  --  256 KiB
          512 * (2 ** 20)   --  512 MiB
        );
      getBufSize : constant Integer := keepSizeBefore + keepSizeAfter + reserveSize;

      type Int_array is array (Natural range <>) of Integer;
      type p_Int_array is access Int_array;
      procedure Dispose is new Ada.Unchecked_Deallocation (Int_array, p_Int_array);

      procedure Normalize (positions : in out Int_array; normalizationOffset : Integer) is
      begin
        for i in 0 .. positions'Length - 1 loop
          if positions (i) <= normalizationOffset then
            positions (i) := 0;
          else
            positions (i) := positions (i) - normalizationOffset;
          end if;
        end loop;
      end Normalize;

      function Get_Available return Integer is
      pragma Inline (Get_Available);
      begin
         --  Compared to the Java version: - 1 shift for getting readPos
         --  in buf'Range upon: cur_literal := buf (readPos);
        return writePos - readPos - 1;
      end Get_Available;

      function Move_Pos (requiredForFlushing, requiredForFinishing : Integer) return Integer is
        --  Java name: movePos.
        avail : Integer;
      begin
        pragma Assert (requiredForFlushing >= requiredForFinishing);
        readPos := readPos + 1;
        avail   := Get_Available;
        if avail < requiredForFlushing then
          if avail < requiredForFinishing or else not finishing
          then
            pendingSize := pendingSize + 1;
            --  GdM: This causes cyclicPos and lzpos not being in sync with readPos.
            --       The pendingSize value is there for catching up.
            avail := 0;
          end if;
        end if;
        return avail;
      end Move_Pos;

      function getHash4Size return Integer is
        h : Unsigned_32 := Unsigned_32 (String_buffer_size - 1);
      begin
        h := h or Shift_Right (h, 1);
        h := h or Shift_Right (h, 2);
        h := h or Shift_Right (h, 4);
        h := h or Shift_Right (h, 8);
        h := Shift_Right (h, 1);
        h := h or 16#FFFF#;  --  LzFind.c: "don't change it! It's required for Deflate"
        if h > 2 ** 24 then
          h := Shift_Right (h, 1);
        end if;
        return Integer (h + 1);
      end getHash4Size;

      type p_Byte_Array is access Byte_Array;
      procedure Dispose is new Ada.Unchecked_Deallocation (Byte_Array, p_Byte_Array);

      package Hash234 is
        HASH_2_SIZE : constant := 2 ** 10;
        HASH_2_MASK : constant := HASH_2_SIZE - 1;
        HASH_3_SIZE : constant := 2 ** 16;
        HASH_3_MASK : constant := HASH_3_SIZE - 1;
        hash_4_size : constant Integer := getHash4Size;
        hash_4_mask : constant Unsigned_32 := Unsigned_32 (hash_4_size) - 1;
        --
        hash2Table : Int_array (0 .. HASH_2_SIZE - 1) := (others => 0);  --  [Initialization added]
        hash3Table : Int_array (0 .. HASH_3_SIZE - 1) := (others => 0);  --  [Initialization added]
        hash4Table : p_Int_array;
        --
        hash2Value, hash3Value, hash4Value : Unsigned_32 := 0;
        --
        procedure calcHashes (buf : Byte_Array; off : Integer);
        procedure updateTables (pos : Integer);
        procedure Normalize (normalizeOffset : Integer);
      end Hash234;

      package body Hash234 is

        crcTable : array (Byte) of Unsigned_32;
        CRC32_POLY : constant := 16#EDB8_8320#;

        procedure calcHashes (buf : Byte_Array; off : Integer) is
          temp : Unsigned_32 := crcTable (buf (off)) xor Unsigned_32 (buf (off + 1));
        begin
          hash2Value := temp and HASH_2_MASK;
          temp := temp xor Shift_Left (Unsigned_32 (buf (off + 2)), 8);
          hash3Value := temp and HASH_3_MASK;
          temp := temp xor Shift_Left (crcTable (buf (off + 3)), 5);
          hash4Value := temp and hash_4_mask;
        end calcHashes;

        procedure updateTables (pos : Integer) is
        begin
          hash2Table (Integer (hash2Value)) := pos;
          hash3Table (Integer (hash3Value)) := pos;
          hash4Table (Integer (hash4Value)) := pos;
        end updateTables;

        procedure Normalize (normalizeOffset : Integer) is
        begin
          Normalize (hash2Table, normalizeOffset);
          Normalize (hash3Table, normalizeOffset);
          Normalize (hash4Table.all, normalizeOffset);
        end Normalize;

        r : Unsigned_32;
      begin
        --  NB: heap allocation used only for convenience because of
        --      small default stack sizes on some compilers.
        hash4Table := new Int_array (0 .. hash_4_size - 1);
        hash4Table.all := (others => 0);  --  [Initialization added]
        for i in Byte loop
          r := Unsigned_32 (i);
          for j in 0 .. 7 loop
            if (r and 1) = 0 then
              r := Shift_Right (r, 1);
            else
              r := Shift_Right (r, 1) xor CRC32_POLY;
            end if;
          end loop;
          crcTable (i) := r;
        end loop;
      end Hash234;

      Nice_Length : constant Integer := Integer'Min (162, Look_Ahead);  --  const. was 64
      Depth_Limit : constant := 48;  --  Alternatively: 16 + Nice_Length / 2

      cyclicSize : constant Integer := String_buffer_size;  --  Had: + 1;
      cyclicPos  : Integer := -1;
      lzPos      : Integer := cyclicSize;

      max_dist : constant Integer := cyclicSize - (Look_Ahead + 2);
      --  NB: 2020-11-04: added "- (Look_Ahead + 2)" to prevent corruption of
      --  the expansion buffer in LZMA.Encoding when DL codes are tested in front
      --  of the actual writes, before actual entropy compression (since rev. #850).

      package BT4_Algo is
        procedure Skip (len : Natural);
        pragma Inline (Skip);
        procedure Read_One_and_Get_Matches (matches : out Matches_Type);
      end BT4_Algo;

      buf : p_Byte_Array;
      tree : p_Int_array;

      package body BT4_Algo is

        function Move_Pos_in_BT4 return Integer is
          --  Java name: movePos.
          avail : constant Integer :=
            Move_Pos (requiredForFlushing  => Nice_Length,
                      requiredForFinishing => 4);
          normalizationOffset : Integer;
        begin
          --  Put_Line ("BT4_Algo.Move_Pos_in_BT4");
          if avail /= 0 then
            lzPos := lzPos + 1;
            if lzPos = Integer'Last then
              normalizationOffset := Integer'Last - cyclicSize;
              Hash234.Normalize (normalizationOffset);
              Normalize (tree.all, normalizationOffset);
              lzPos := lzPos - normalizationOffset;
            end if;
            cyclicPos := cyclicPos + 1;
            if cyclicPos = cyclicSize then
              --  Put_Line("cyclicPos zeroed");
              cyclicPos := 0;
            end if;
          end if;
          return avail;
        end Move_Pos_in_BT4;

        Null_position : constant := -1;  --  LzFind.c: kEmptyHashValue, 0

        procedure Skip_and_Update_Tree (niceLenLimit : Integer; currentMatch : in out Integer) is
          delta0, depth, ptr0, ptr1, pair, len, len0, len1 : Integer;
        begin
          --  Put("BT4_Algo.Skip_and_Update_Tree... ");
          depth := Depth_Limit;
          ptr0  := cyclicPos * 2 + 1;
          ptr1  := cyclicPos * 2;
          len0  := 0;
          len1  := 0;
          loop
            delta0 := lzPos - currentMatch;
            if depth = 0 or else delta0 >= max_dist then
              tree (ptr0) := Null_position;
              tree (ptr1) := Null_position;
              return;
            end if;
            depth := depth - 1;
            if cyclicPos - delta0 < 0 then
              pair := cyclicSize;
            else
              pair := 0;
            end if;
            pair := (cyclicPos - delta0 + pair) * 2;
            len  := Integer'Min (len0, len1);
            --  Match ?
            if buf (readPos + len - delta0) = buf (readPos + len) then
              --  No need to look for longer matches than niceLenLimit
              --  because we only are updating the tree, not returning
              --  matches found to the caller.
              loop
                len := len + 1;
                if len = niceLenLimit then
                  tree (ptr1) := tree (pair);
                  tree (ptr0) := tree (pair + 1);
                  return;
                end if;
                exit when buf (readPos + len - delta0) /= buf (readPos + len);
              end loop;
            end if;
            --  Bytes are no more matching. The past value is either smaller...
            if buf (readPos + len - delta0) < buf (readPos + len) then
              tree (ptr1) := currentMatch;
              ptr1 := pair + 1;
              currentMatch := tree (ptr1);
              len1 := len;
            else  --  ... or larger
              tree (ptr0) := currentMatch;
              ptr0 := pair;
              currentMatch := tree (ptr0);
              len0 := len;
            end if;
          end loop;
        end Skip_and_Update_Tree;

        procedure Skip (len : Natural) is
          --
          procedure Skip_one is
          pragma Inline (Skip_one);
            niceLenLimit, avail, currentMatch : Integer;
          begin
            niceLenLimit := Nice_Length;
            avail := Move_Pos_in_BT4;
            if avail < niceLenLimit then
              if avail = 0 then
                return;
              end if;
              niceLenLimit := avail;
            end if;
            Hash234.calcHashes (buf.all, readPos);
            currentMatch := Hash234.hash4Table (Integer (Hash234.hash4Value));
            Hash234.updateTables (lzPos);
            Skip_and_Update_Tree (niceLenLimit, currentMatch);
          end Skip_one;
          --
        begin
          for count in reverse 1 .. len loop
            Skip_one;
          end loop;
        end Skip;

        procedure Read_One_and_Get_Matches (matches : out Matches_Type) is
          matchLenLimit : Integer := Look_Ahead;
          niceLenLimit  : Integer := Nice_Length;
          avail : Integer;
          delta0, delta2, delta3, currentMatch,
          lenBest, depth, ptr0, ptr1, pair, len, len0, len1 : Integer;
        begin
          --  Put("BT4_Algo.Get_Matches... ");
          matches.count := 0;
          avail := Move_Pos_in_BT4;
          if avail < matchLenLimit then
            if avail = 0 then
              return;
            end if;
            matchLenLimit := avail;
            if niceLenLimit > avail then
              niceLenLimit := avail;
            end if;
          end if;
          --
          Hash234.calcHashes (buf.all, readPos);
          delta2 := lzPos - Hash234.hash2Table (Integer (Hash234.hash2Value));
          delta3 := lzPos - Hash234.hash3Table (Integer (Hash234.hash3Value));
          currentMatch :=   Hash234.hash4Table (Integer (Hash234.hash4Value));
          Hash234.updateTables (lzPos);
          --
          lenBest := 0;
          --  See if the hash from the first two bytes found a match.
          --  The hashing algorithm guarantees that if the first byte
          --  matches, also the second byte does, so there's no need to
          --  test the second byte.
          if delta2 < max_dist and then buf (readPos - delta2) = buf (readPos) then
            --  Match of length 2 found and checked.
            lenBest := 2;
            matches.count := 1;
            matches.dl (matches.count).length := 2;
            matches.dl (matches.count).distance := delta2;
          end if;
          --  See if the hash from the first three bytes found a match that
          --  is different from the match possibly found by the two-byte hash.
          --  Also here the hashing algorithm guarantees that if the first byte
          --  matches, also the next two bytes do.
          if delta2 /= delta3 and then delta3 < max_dist
                  and then buf (readPos - delta3) = buf (readPos)
          then
            --  Match of length 3 found and checked.
            lenBest := 3;
            matches.count := matches.count + 1;
            matches.dl (matches.count).distance := delta3;
            delta2 := delta3;
          end if;
          --  If a match was found, see how long it is.
          if matches.count > 0 then
            while lenBest < matchLenLimit and then buf (readPos + lenBest - delta2)
                                                 = buf (readPos + lenBest)
            loop
              lenBest := lenBest + 1;
            end loop;
            matches.dl (matches.count).length := lenBest;
            --  Return if it is long enough (niceLen or reached the end of the dictionary).
            if lenBest >= niceLenLimit then
              Skip_and_Update_Tree (niceLenLimit, currentMatch);
              return;
            end if;
          end if;
          --  A long enough match wasn't found so easily.
          --  Look for better matches from the binary tree.
          if lenBest < 3 then
            lenBest := 3;
          end if;
          depth := Depth_Limit;
          ptr0  := cyclicPos * 2 + 1;
          ptr1  := cyclicPos * 2;
          len0  := 0;
          len1  := 0;
          --
          loop
            delta0 := lzPos - currentMatch;
            --  Return if the search depth limit has been reached or
            --  if the distance of the potential match exceeds the
            --  dictionary size.
            if depth = 0 or else delta0 >= max_dist then
              tree (ptr0) := Null_position;
              tree (ptr1) := Null_position;
              return;
            end if;
            depth := depth - 1;
            --
            if cyclicPos - delta0 < 0 then
              pair := cyclicSize;
            else
              pair := 0;
            end if;
            pair := (cyclicPos - delta0 + pair) * 2;
            len  := Integer'Min (len0, len1);
            --  Match ?
            if buf (readPos + len - delta0) = buf (readPos + len) then
              loop
                len := len + 1;
                exit when len >= matchLenLimit
                  or else buf (readPos + len - delta0) /= buf (readPos + len);
              end loop;
              if len > lenBest then
                lenBest := len;
                matches.count := matches.count + 1;
                matches.dl (matches.count).length := len;
                matches.dl (matches.count).distance := delta0;
                if len >= niceLenLimit then
                  tree (ptr1) := tree (pair);
                  tree (ptr0) := tree (pair + 1);
                  return;
                end if;
              end if;
            end if;
            --  Bytes are no more matching. The past value is either smaller...
            if buf (readPos + len - delta0) < buf (readPos + len) then
              tree (ptr1) := currentMatch;
              ptr1 := pair + 1;
              currentMatch := tree (ptr1);
              len1 := len;
            else  --  ... or larger
              tree (ptr0) := currentMatch;
              ptr0 := pair;
              currentMatch := tree (ptr0);
              len0 := len;
            end if;
          end loop;
        end Read_One_and_Get_Matches;

      begin
        --  NB: heap allocation used only for convenience because of
        --      small default stack sizes on some compilers.
        tree := new Int_array (0 .. cyclicSize * 2 - 1);
        for i in tree'Range loop
          tree (i) := Null_position;
        end loop;
      end BT4_Algo;

      --  Moves data from the end of the buffer to the beginning, discarding
      --  old data and making space for new input.

      procedure Move_Window is
        --  Java name: moveWindow.
        --  Align the move to a multiple of 16 bytes (LZMA-friendly, see pos_bits)
        moveOffset : constant Integer := ((readPos + 1 - keepSizeBefore) / 16) * 16;
        moveSize   : constant Integer := writePos - moveOffset;
      begin
        --  Put_Line("  Move window, size=" & moveSize'Img & " offset=" & moveOffset'Img);
        buf (0 .. moveSize - 1) := buf (moveOffset .. moveOffset + moveSize - 1);
        readPos   := readPos   - moveOffset;
        readLimit := readLimit - moveOffset;
        writePos  := writePos  - moveOffset;
      end Move_Window;

     --  Copies new data into the buffer.
     function Fill_Window (len_initial : Integer) return Integer is
     --  Java name: fillWindow

       --  Process pending data that hasn't been ran through the match finder yet.
       --  Run it through the match finder now if there is enough new data
       --  available (readPos < readLimit) that the encoder may encode at
       --  least one more input byte.
       --
       procedure processPendingBytes is
         oldPendingSize : Integer;
       begin
         if pendingSize > 0 and then readPos < readLimit then
           readPos := readPos - pendingSize;
           oldPendingSize := pendingSize;
           pendingSize := 0;
           BT4_Algo.Skip (oldPendingSize);
         end if;
       end processPendingBytes;
       --
       len : Integer := len_initial;
       actual_len : Integer := 0;
     begin
        --  Put_Line("Fill window - start");
        --  Move the sliding window if needed.
        if readPos >= buf'Length - keepSizeAfter then
          Move_Window;
        end if;

        --  Try to fill the dictionary buffer up to its boundary.
        if len > buf'Length - writePos then
          len := buf'Length - writePos;
        end if;

        while len > 0 and then More_Bytes loop
          buf (writePos) := Read_Byte;
          writePos := writePos + 1;
          len := len - 1;
          actual_len := actual_len + 1;
        end loop;

        --  Set the new readLimit but only if there's enough data to allow
        --  encoding of at least one more byte.
        if writePos >= keepSizeAfter then
          readLimit := writePos - keepSizeAfter;
        end if;

        processPendingBytes;

        --  Put_Line("Fill window, requested=" & len_initial'Img & " actual=" & actual_len'Img);
        --  Tell the caller how much input we actually copied into the dictionary.
        return actual_len;
      end Fill_Window;

      function Compute_Match_Length (distance, length_limit : Integer) return Natural is
      pragma Inline (Compute_Match_Length);
        back_pos : constant Integer := readPos - distance;
        len : Integer := 0;
      begin
        if distance < 2 then
          return 0;
        end if;
        --  @ if readPos+len not in buf.all'Range then
        --  @   Put("**** readpos " & buf'Last'Img & readPos'Img);
        --  @ end if;
        --  @ if backPos+len not in buf.all'Range then
        --  @   Put("**** backpos " & buf'Last'Img & back_pos'Img);
        --  @ end if;
        while len < length_limit and then buf (readPos + len) = buf (back_pos + len) loop
          len := len + 1;
        end loop;
        return len;
      end Compute_Match_Length;

      readAhead : Integer := -1;  --  LZMAEncoder.java
      --  Small stack of recent distances used for LZMA.
      subtype Repeat_stack_range is Integer range 0 .. 3;
      --  1-based distances.
      rep_dist : array (Repeat_stack_range) of Distance_Type := (others => 1);
      len_rep_dist : array (Repeat_stack_range) of Natural := (others => 0);

      function Has_much_smaller_Distance (smallDist, bigDist : Distance_Type) return Boolean is
      pragma Inline (Has_much_smaller_Distance);
      begin
        return (smallDist - 1) < (bigDist - 1) / 128;
      end Has_much_smaller_Distance;

      best_length_for_rep_dist, best_rep_dist_index : Integer;

      procedure Read_One_and_Get_Matches (matches : out Matches_Type) is
        avail, len : Integer;
      begin
        readAhead := readAhead + 1;
        --
        BT4_Algo.Read_One_and_Get_Matches (matches);
        --
        if LZMA_friendly then
          best_length_for_rep_dist := 0;
          avail := Integer'Min (Get_Available, Look_Ahead);
          if avail >= MATCH_LEN_MIN then
            for rep in Repeat_stack_range loop
              len := Compute_Match_Length (rep_dist (rep), avail);
              len_rep_dist (rep) := len;
              --  Remember the index and length of the best repeated match.
              if len > best_length_for_rep_dist then
                best_rep_dist_index      := rep;
                best_length_for_rep_dist := len;
              end if;
            end loop;
          else
            for rep in Repeat_stack_range loop
              len_rep_dist (rep) := 0;  --  No match possible in any case.
            end loop;
          end if;
        end if;
      end Read_One_and_Get_Matches;

      procedure Get_supplemental_Matches_from_Repeat_Matches (matches : in out Matches_Type) is
        len, ins : Integer;
      begin
        if matches.count = 0 then
          if best_length_for_rep_dist >= MATCH_LEN_MIN then
            matches.dl (1).distance := rep_dist (best_rep_dist_index);
            matches.dl (1).length   := best_length_for_rep_dist;
            matches.count := 1;
          end if;
        end if;
        for rep in Repeat_stack_range loop
          len := len_rep_dist (rep);
          if len >= MATCH_LEN_MIN then
            ins := 0;
            for i in reverse 1 .. matches.count loop
              if len = matches.dl (i).length then
                if rep_dist (rep) = matches.dl (i).distance then
                  null;  --  Identical match
                else
                  --  Tie: insert the repeat match of same length into the list.
                  --  If the longest match strategy is applied, the second item is preferred.
                  if Has_much_smaller_Distance (matches.dl (i).distance, rep_dist (rep)) then
                    ins := i;      --  Insert before
                  else
                    ins := i + 1;  --  Insert after
                  end if;
                  exit;
                  --  Ada.Text_IO.Put_Line ("Tie");
                end if;
              elsif i < matches.count then
                if len > matches.dl (i).length and then len < matches.dl (i + 1).length then
                  --  Insert between existing lengths
                  ins := i + 1;
                  exit;
                --  We don't add len as the shortest length (worsens compression).
                ------
                --  elsif i = 1
                --    and then len >= MATCH_LEN_MIN
                --    and then len >= matches.dl (1).length - 1  --  Some reluctance...
                --  then
                --    ins := 1;
                end if;
              elsif len > matches.dl (i).length then
                --  i = matches.count in this case: add as longest.
                ins := i + 1;
                exit;
              end if;
            end loop;
            --  We can insert this repeat match at position 'ins'.
            if ins > 0 then
              for i in reverse ins .. matches.count loop  --  Empty if ins > count.
                matches.dl (i + 1) := matches.dl (i);
              end loop;
              matches.dl (ins).distance := rep_dist (rep);
              matches.dl (ins).length   := len;
              matches.count := matches.count + 1;
              exit;
            end if;
          end if;
        end loop;
        pragma Assert (Are_Matches_Sorted (matches));
      end Get_supplemental_Matches_from_Repeat_Matches;

      procedure Skip (len : Natural) is
      pragma Inline (Skip);
      begin
        readAhead := readAhead + len;
        BT4_Algo.Skip (len);
      end Skip;

      procedure Reduce_consecutive_max_lengths (m : in out Matches_Type) is
      --  Sometimes the BT4 algo returns a long list with consecutive lengths.
      --  We try to reduce it, if there is a clear advantage with distances.
      begin
        while m.count > 1
          and then m.dl (m.count).length = m.dl (m.count - 1).length + 1
          and then Has_much_smaller_Distance (m.dl (m.count - 1).distance, m.dl (m.count).distance)
        loop
          m.count := m.count - 1;
        end loop;
      end Reduce_consecutive_max_lengths;

      procedure Show_Matches (m : Matches_Type; phase : String) is
      begin
        Ada.Text_IO.Put_Line (
          phase & " --- Matches: " & Integer'Image (m.count)
        );
        for i in 1 .. m.count loop
          Ada.Text_IO.Put_Line (
            "  Distance:" & Integer'Image (m.dl (i).distance) &
            ";  Length:" & Integer'Image (m.dl (i).length)
          );
        end loop;
      end Show_Matches;
      pragma Unreferenced (Show_Matches);

      matches : Matches_Array (0 .. 1);
      current_match_index : Prefetch_Index_Type := 0;
      match_trace : DLP_Array (1 .. Max_Length_any_Algo);

      procedure Get_Next_Symbol is
        new_ld, main : Distance_Length_Pair;

        --  This function is for debugging. The matches stored in the 'tree' array
        --  may be wrong if the variables cyclicPos, lzPos and readPos are not in sync.
        --  The issue seems to have been solved now (rev. 489).
        function Is_match_correct (shift : Natural) return Boolean is
        begin
          for i in reverse -1 + shift .. main.length - 2 + shift loop
            if buf (readPos - (main.distance) + i) /= buf (readPos + i) then
              return False;  --  Should not occur.
            end if;
          end loop;
          return True;
        end Is_match_correct;

        procedure Send_first_literal_of_match is
        begin
          Write_Literal (cur_literal);
          readAhead := readAhead - 1;
        end Send_first_literal_of_match;

        procedure Send_DL_code (distance, length : Integer) is
          found_repeat : Integer := rep_dist'First - 1;
          aux : Integer;
        begin
          Write_DL_Code (distance, length);
          readAhead := readAhead - length;
          if LZMA_friendly then
            --
            --  Manage the stack of recent distances in the same way the "MA" part of LZMA does.
            --
            for i in rep_dist'Range loop
              if distance = rep_dist (i) then
                found_repeat := i;
                exit;
              end if;
            end loop;
            if found_repeat >= rep_dist'First then
              --  Roll the stack of recent distances up to the item with index found_repeat,
              --  which becomes first. If found_repeat = rep_dist'First, no actual change occurs.
              aux := rep_dist (found_repeat);
              for i in reverse rep_dist'First + 1 .. found_repeat loop
                rep_dist (i) := rep_dist (i - 1);
              end loop;
              rep_dist (rep_dist'First) := aux;
            else
              --  Shift the stack of recent distances; the new distance becomes the first item.
              for i in reverse rep_dist'First + 1 .. rep_dist'Last loop
                rep_dist (i) := rep_dist (i - 1);
              end loop;
              rep_dist (0) := distance;
            end if;
          end if;
        end Send_DL_code;

        avail, limit : Integer;
        index_max_score : Positive;
        set_max_score : Prefetch_Index_Type;
        hurdle : constant := 40;
      begin
        --  Get the matches for the next byte unless readAhead indicates
        --  that we already got the new matches during the previous call
        --  to this procedure.
        if readAhead = -1 then
          Read_One_and_Get_Matches (matches (current_match_index));
        end if;
        --  @ if readPos not in buf.all'Range then
        --  @   Put("**** " & buf'Last'Img & keepSizeAfter'Img & readPos'Img & writePos'Img);
        --  @ end if;
        cur_literal := buf (readPos);
        --  Get the number of bytes available in the dictionary, but
        --  not more than the maximum match length. If there aren't
        --  enough bytes remaining to encode a match at all, return
        --  immediately to encode this byte as a literal.
        avail := Integer'Min (Get_Available, Look_Ahead);
        if avail < MATCH_LEN_MIN then
          --  Put("[a]");
          Send_first_literal_of_match;
          return;
        end if;

        if LZMA_friendly and then best_length_for_rep_dist >= Nice_Length then
          Skip (best_length_for_rep_dist - 1);
          --  Put_Line("[DL RA]");
          Send_DL_code (rep_dist (best_rep_dist_index), best_length_for_rep_dist);
          return;
        end if;

        main := (length => 1, distance => 1);
        if matches (current_match_index).count > 0 then
          main := matches (current_match_index).dl (matches (current_match_index).count);
          if main.length >= Nice_Length then
            pragma Assert (Is_match_correct (1));
            Skip (main.length - 1);
            --  Put_Line("[DL A]" & mainDist'Img & mainLen'Img);
            Send_DL_code (main.distance, main.length);
            return;
          end if;
          Reduce_consecutive_max_lengths (matches (current_match_index));
          if LZMA_friendly then
            Get_supplemental_Matches_from_Repeat_Matches (matches (current_match_index));
          end if;
          main := matches (current_match_index).dl (matches (current_match_index).count);
          --
          if main.length = MATCH_LEN_MIN and then main.distance > 128 then
            main.length := 1;
          end if;
        end if;

        if LZMA_friendly
          and then best_length_for_rep_dist > MATCH_LEN_MIN
          and then (best_length_for_rep_dist >= main.length
            or else (best_length_for_rep_dist >= main.length - 2 and then main.distance > 2 ** 9)
            or else (best_length_for_rep_dist >= main.length - 3 and then main.distance > 2 ** 15))
        then
          --  Shortcut: we choose the longest repeat match.
          Skip (best_length_for_rep_dist - 1);
          --  Put_Line("[DL RB]");
          Send_DL_code (rep_dist (best_rep_dist_index), best_length_for_rep_dist);
          return;
        end if;

        if main.length < MATCH_LEN_MIN or else avail <= MATCH_LEN_MIN then
          --  Put("[b]");
          Send_first_literal_of_match;
          return;
        end if;

        -------------------------------------------------------------------------
        --  Get the next match. Test if it is better than the current match.   --
        --  If so, encode the current byte as a literal.                       --
        -------------------------------------------------------------------------
        current_match_index := 1 - current_match_index;
        Read_One_and_Get_Matches (matches (current_match_index));
        --
        --  Show_Matches (matches (1 - current_match_index), "------ Old");
        --  Show_Matches (matches (current_match_index),     "       New");
        --
        if matches (current_match_index).count > 0 then
          new_ld := matches (current_match_index).dl (matches (current_match_index).count);  --  Longest new match
          if        (new_ld.length >= main.length + hurdle     and then new_ld.distance < main.distance)
            or else
              (new_ld.length =  main.length + hurdle + 1
               and then not Has_much_smaller_Distance (main.distance, new_ld.distance))
            or else  new_ld.length >  main.length + hurdle + 1
            or else (new_ld.length >= main.length + hurdle - 1
                and then main.length >= MATCH_LEN_MIN + 1
                and then Has_much_smaller_Distance (new_ld.distance, main.distance))
          then
            --  We prefer literal, then the new match (or even better!)
            Send_first_literal_of_match;
            return;
          end if;
          --
          --  Here we compare the scores of both match sets.
          --
          Reduce_consecutive_max_lengths (matches (current_match_index));
          if LZMA_friendly then
            Get_supplemental_Matches_from_Repeat_Matches (matches (current_match_index));
          end if;
          Estimate_DL_Codes (
            matches, 1 - current_match_index, (1 => cur_literal),
            index_max_score, set_max_score, match_trace
          );
          if set_max_score = 1 - current_match_index then
            --  Old match is seems better.
            main :=  matches (set_max_score).dl (index_max_score);
          else
            --  We prefer at least a literal, then a new, better match.
            Send_first_literal_of_match;
            return;
          end if;
        end if;

        if LZMA_friendly then
          limit := Integer'Max (main.length - 1, MATCH_LEN_MIN);
          for rep in rep_dist'Range loop
            if Compute_Match_Length (rep_dist (rep), limit) = limit then
              --  A "literal then DL_Code (some distance, main.length - 1)" match
              --  is verified and could use the stack of last distances -> got for it!
              Send_first_literal_of_match;
              return;
            end if;
          end loop;
        end if;

        pragma Assert (Is_match_correct (0));
        Skip (main.length - 2);
        --  Put_Line("[DL B]" & mainDist'Img & mainLen'Img);
        Send_DL_code (main.distance, main.length);
      end Get_Next_Symbol;

      procedure Deallocation is
      begin
        Dispose (buf);
        Dispose (tree);
        Dispose (Hash234.hash4Table);
      end Deallocation;

      actual_written, avail : Integer;
    begin
      --  NB: heap allocation used only for convenience because of
      --      the small default stack sizes on some compilers.
      buf := new Byte_Array (0 .. getBufSize);
      --
      actual_written := Fill_Window (String_buffer_size);
      if actual_written > 0 then
        loop
          Get_Next_Symbol;
          avail := Get_Available;
          if avail = 0 then
            actual_written := Fill_Window (String_buffer_size);
            exit when actual_written = 0;
          end if;
        end loop;
      end if;
      Deallocation;
    exception
      when others =>
        Deallocation;
        raise;
    end LZ77_using_BT4;

    procedure LZ77_by_Rich is
      --  * PROG2.C [lz77a.c]                                             *
      --  * Simple Hashing LZ77 Sliding Dictionary Compression Program    *
      --  * By Rich Geldreich, Jr. October, 1993                          *
      --  * Originally compiled with QuickC v2.5 in the small model.      *
      --  * This program uses more efficient code to delete strings from  *
      --  * the sliding dictionary compared to PROG1.C, at the expense of *
      --  * greater memory requirements. See the HashData and DeleteData  *
      --  * subroutines.                                                  *
      --
      --  Comments by GdM, 2019+ appear in square brackets: [...]

      --  Set this to True for a greedy encoder.
      GREEDY : constant Boolean := False;  --  [original: False]

      --  Ratio vs. speed constant [ Is it really a ratio? ].
      --  The larger this constant, the better the compression.
      MAXCOMPARES : constant := 4096;  --  [original: 75; good: 2400; from Info-Zip: 4096]

      --  Unused entry code.
      NIL : constant := 16#FFFF#;

      --  /* bits per symbol- normally 8 for general purpose compression */
      --  #define CHARBITS : constant := 8;  [ NB: dictionary uses char (byte) ]

      --  Minimum match length & maximum match length.
      THRESHOLD_Rich : constant := 2;
      MATCHBITS      : constant := 8;  --  [original: 4]
      --  [original: 2 ** MATCHBITS + THRESHOLD - 1]
      MAXMATCH  : constant := 2 ** MATCHBITS + THRESHOLD_Rich;  -- 258 is Deflate-friendly.

      --  Sliding dictionary size and hash table's size.
      --  Some combinations of HASHBITS and THRESHOLD values will not work
      --  correctly because of the way this program hashes strings.

      DICTBITS : constant := 15;  --  [original: 13]
      HASHBITS : constant := 13;  --  [original: 10]
      --
      DICTSIZE : constant := 2 ** DICTBITS;
      HASHSIZE : constant := 2 ** HASHBITS;

      --  # bits to shift after each XOR hash
      --  This constant must be high enough so that only THRESHOLD + 1
      --  characters are in the hash accumulator at one time.

      SHIFTBITS : constant := ((HASHBITS + THRESHOLD_Rich) / (THRESHOLD_Rich + 1));

      --  Sector size constants [the dictionary is partitoned in sectors].

      SECTORBIT : constant := 13;  --  [original: 10; OK: 13]
      SECTORLEN : constant := 2 ** SECTORBIT;

      HASH_MASK_1 : constant := 16#8000#;  --  [ was called HASHFLAG1 ]
      HASH_MASK_2 : constant := 16#7FFF#;  --  [ was called HASHFLAG2 ]

      --  Dictionary plus MAXMATCH extra chars for string comparisions.
      dict : array (Integer_M32'(0) .. DICTSIZE + MAXMATCH - 1) of Byte;

      subtype Unsigned_int is Unsigned_16;

      --  Hash table & link list tables.

      --  [ So far we index the hash table with Integer (minimum 16 bit signed) ]
      hash       : array (0 .. HASHSIZE - 1) of Unsigned_int := (others => NIL);
      --  [ nextlink: in lz77a.c: only through DICTSIZE - 1,
      --    although Init has: nextlink[DICTSIZE] = NIL. In doubt we set the
      --    'Last to DICTSIZE and fill everything with NIL... ]
      nextlink   : array (Integer_M32'(0) .. DICTSIZE)     of Unsigned_int := (others => NIL);
      lastlink   : array (Integer_M32'(0) .. DICTSIZE - 1) of Unsigned_int := (others => NIL);

      --  Loads dictionary with characters from the input stream.
      --
      procedure Load_Dict (dictpos : Integer_M32; actually_read : out Integer_M32) is
        i : Integer_M32 := 0;
      begin
        while More_Bytes loop
          dict (dictpos + i) := Read_Byte;
          i := i + 1;
          exit when i = SECTORLEN;
        end loop;

        --  Since the dictionary is a ring buffer, copy the characters at
        --  the very start of the dictionary to the end
        --  [this avoids having to use an "and" or a "mod" operator when searching].
        --
        if dictpos = 0 then
          for j in Integer_M32'(0) .. MAXMATCH - 1 loop
            dict (j + DICTSIZE) := dict (j);
          end loop;
        end if;

        actually_read := i;
      end Load_Dict;

      --  Deletes data from the dictionary search structures
      --  This is only done when the number of bytes to be
      --  compressed exceeds the dictionary's size.
      --
      procedure Delete_Data (dictpos : Integer_M32) is
        j, k : Integer_M32;
      begin
        --  Delete all references to the sector being deleted.
        k := dictpos + SECTORLEN;
        for i in dictpos .. k - 1 loop
          j := Integer_M32 (lastlink (i));
          if (Unsigned_int (j) and HASH_MASK_1) /= 0 then
            if j /= NIL then
              hash (Integer (Unsigned_int (j) and HASH_MASK_2)) := NIL;
            end if;
          else
            nextlink (j) := NIL;
          end if;
        end loop;
      end Delete_Data;

      --  Hash data just entered into dictionary.
      --  XOR hashing is used here, but practically any hash function will work.
      --
      procedure Hash_Data (dictpos, bytestodo : Integer_M32) is
        j : Integer;
        k : Integer_M32;
      begin
        if bytestodo <= THRESHOLD_Rich then  -- Not enough bytes in sector for match?
          nextlink (dictpos .. dictpos + bytestodo - 1) := (others => NIL);
          lastlink (dictpos .. dictpos + bytestodo - 1) := (others => NIL);
        else
          --  Matches can't cross sector boundaries.
          nextlink (dictpos + bytestodo - THRESHOLD_Rich .. dictpos + bytestodo - 1) := (others => NIL);
          lastlink (dictpos + bytestodo - THRESHOLD_Rich .. dictpos + bytestodo - 1) := (others => NIL);

          j :=  Integer (
                  Shift_Left (Unsigned_int (dict (dictpos)), SHIFTBITS)
                  xor
                  Unsigned_int (dict (dictpos + 1))
                );

          k := dictpos + bytestodo - THRESHOLD_Rich;  --  Calculate end of sector.

          for i in dictpos ..  k - 1 loop
            j := Integer (
                  (Shift_Left (Unsigned_int (j), SHIFTBITS) and (HASHSIZE - 1))
                   xor
                   Unsigned_int (dict (i + THRESHOLD_Rich))
                 );
            lastlink (i) := Unsigned_int (j) or HASH_MASK_1;
            nextlink (i) := hash (j);
            if nextlink (i) /= NIL then
              lastlink (Integer_M32 (nextlink (i))) := Unsigned_int (i);
            end if;
            hash (j) := Unsigned_int (i);
          end loop;
        end if;
      end Hash_Data;

      matchlength, matchpos : Integer_M32;

      --  Finds match for string at position dictpos.
      --  This search code finds the longest AND closest
      --  match for the string at dictpos.
      --
      procedure Find_Match (dictpos, startlen : Integer_M32) is
        i, j : Integer_M32;
        match_byte : Byte;
      begin
        i := dictpos;
        matchlength := startlen;
        match_byte := dict (dictpos + matchlength);
        --
        Chances :
        for compare_count in 1 .. MAXCOMPARES loop
          i := Integer_M32 (nextlink (i));  --  Get next string in list.
          if i = NIL then
            return;
          end if;
          --
          if dict (i + matchlength) = match_byte then  --  Possible larger match?
            j := 0;
            --  Compare strings.
            loop
              exit when dict (dictpos + j) /= dict (i + j);
              j := j + 1;
              exit when j = MAXMATCH;
            end loop;
            --
            if j > matchlength then  --  Found larger match?
              matchlength := j;
              matchpos    := i;
              if matchlength = MAXMATCH then
                return;  --  Exit if largest possible match.
              end if;
              match_byte := dict (dictpos + matchlength);
            end if;
          end if;
        end loop Chances;  --  Keep on trying until we run out of chances.
      end Find_Match;

      --  Finds dictionary matches for characters in current sector.
      --
      procedure Dict_Search (dictpos, bytestodo : Integer_M32) is
        i, j : Integer_M32;
        matchlen1, matchpos1 : Integer_M32;
        --
        procedure Write_literal_pos_i is
        pragma Inline (Write_literal_pos_i);
        begin
          Write_Literal (dict (i));
          i := i + 1;
          j := j - 1;
        end Write_literal_pos_i;
      begin
        i := dictpos;
        j := bytestodo;

        if not GREEDY then  --  Non-greedy search loop (slow).

          while j /= 0 loop  --  Loop while there are still characters left to be compressed.
            Find_Match (i, THRESHOLD_Rich);

            if matchlength > THRESHOLD_Rich then
              matchlen1 := matchlength;
              matchpos1 := matchpos;

              loop
                Find_Match (i + 1, matchlen1);

                if matchlength > matchlen1 then
                  matchlen1 := matchlength;
                  matchpos1 := matchpos;
                  Write_literal_pos_i;
                else
                  if matchlen1 > j then
                    matchlen1 := j;
                    if matchlen1 <= THRESHOLD_Rich then
                      Write_literal_pos_i;
                      exit;
                    end if;
                  end if;

                  Write_DL_Code (
                    length   => Integer (matchlen1),
                    --  [The subtraction happens modulo 2**n, needs to be cleaned modulo 2**DICTSIZE]
                    distance => Integer ((Unsigned_32 (i) - Unsigned_32 (matchpos1)) and (DICTSIZE - 1))
                  );
                  i := i + matchlen1;
                  j := j - matchlen1;
                  exit;
                end if;
              end loop;

            else
              Write_literal_pos_i;
            end if;

          end loop;  --  while j /= 0

        else  --  Greedy search loop (fast).

          while j /= 0 loop  --  Loop while there are still characters left to be compressed.

            Find_Match (i, THRESHOLD_Rich);

            if matchlength > j then
              matchlength := j;     --  Clamp matchlength.
            end if;

            if matchlength > THRESHOLD_Rich then  --  Valid match?
              Write_DL_Code (
                length   => Integer (matchlength),
                --  [The subtraction happens modulo 2**n, needs to be cleaned modulo 2**DICTSIZE]
                distance => Integer ((Unsigned_32 (i) - Unsigned_32 (matchpos)) and (DICTSIZE - 1))
              );
              i := i + matchlength;
              j := j - matchlength;
            else
              Write_literal_pos_i;
            end if;
          end loop;

        end if;  --  Greedy or not.

      end Dict_Search;

      procedure Encode_Rich is
        dictpos, actual_read : Integer_M32 :=  0;
        deleteflag : Boolean := False;
      begin
        loop
          --  Delete old data from dictionary.
          if deleteflag then
            Delete_Data (dictpos);
          end if;

          --  Grab more data to compress.
          Load_Dict (dictpos, actual_read);
          exit when actual_read = 0;

          --  Hash the data.
          Hash_Data (dictpos, actual_read);

          --  Find dictionary matches.
          Dict_Search (dictpos, actual_read);

          dictpos := dictpos + SECTORLEN;

          --  Wrap back to beginning of dictionary when it's full.
          if dictpos = DICTSIZE then
            dictpos := 0;
            deleteflag := True;   --  Ok to delete now.
          end if;
        end loop;
      end Encode_Rich;

    begin
      Encode_Rich;
    end LZ77_by_Rich;

    --  The following is for research purposes: compare different LZ77
    --  algorithms applied to entropy encoders (Deflate, LZMA, ...).

    procedure LZ77_from_Dump_File is
      LZ77_Dump : Ada.Text_IO.File_Type;
      tag : String (1 .. 3);
      Wrong_LZ77_tag : exception;
      a, b : Integer;
      dummy : Byte;
      use Ada.Integer_Text_IO;
    begin
      --  Pretend we compress the given stream.
      --  Entire stream is consumed here.
      while More_Bytes loop
        dummy := Read_Byte;
      end loop;
      --  Now send dumped LZ77 data further.
      Ada.Text_IO.Open (LZ77_Dump, Ada.Text_IO.In_File, "dump.lz77");
      --  File from UnZip.Decompress, or LZMA.Decoding, some_trace = True mode
      while not Ada.Text_IO.End_Of_File (LZ77_Dump) loop
        Ada.Text_IO.Get (LZ77_Dump, tag);
        if tag = "Lit" then
          Get (LZ77_Dump, a);
          Write_Literal (Byte (a));
        elsif tag = "DLE" then
          Get (LZ77_Dump, a);
          Get (LZ77_Dump, b);
          Write_DL_Code (a, b);
        else
          raise Wrong_LZ77_tag;
        end if;
        Ada.Text_IO.Skip_Line (LZ77_Dump);
      end loop;
      Ada.Text_IO.Close (LZ77_Dump);
    end LZ77_from_Dump_File;

  begin
    case Method is
      when LZHuf =>
        LZ77_using_LZHuf;
      when IZ_4 .. IZ_10 =>
        LZ77_using_IZ (4 + Method_Type'Pos (Method) -  Method_Type'Pos (IZ_4));
      when BT4 =>
        LZ77_using_BT4;
      when Rich =>
        LZ77_by_Rich;
      when No_LZ77 =>
        while More_Bytes loop
          Write_Literal (Read_Byte);
        end loop;
      when Read_LZ77_Codes =>
        LZ77_from_Dump_File;
    end case;
  end Encode;

end LZ77;


Zip-Ada: Ada library for zip archive files (.zip). Ada programming.
Some news about Zip-Ada and other Ada projects on Gautier's blog.