Source file : lz77.adb
-- There are four LZ77 encoders at choice in this package:
--
-- 1/ LZ77_using_LZHuf, based on LZHuf by Haruhiko Okumura and Haruyasu Yoshizaki.
--
-- 2/ LZ77_using_IZ, based on Info-Zip's Zip's deflate.c by Jean-Loup Gailly.
-- deflate.c is actually the LZ77 part of Info-Zip's compression.
--
-- 3/ LZ77_using_BT4, based on LZMA SDK's BT4 algorithm by Igor Pavlov.
--
-- 4/ LZ77_by_Rich, based on PROG2.C by Rich Geldreich, Jr.
--
-- Variant 1/, LZ77_using_LZHuf, is working since 2009. Two problems: it is slow
-- and not well adapted to the Deflate format (mediocre compression).
--
-- Variant 2/, LZ77_using_IZ, is much faster, and better suited for Deflate.
-- It has been added on 05-Mar-2016.
-- The code is tailored and optimized for a single set of
-- the String_buffer_size, Look_Ahead, Threshold LZ77 parameters - those for Deflate.
--
-- Variant 3/, LZ77_using_BT4, was added on 06-Sep-2016.
-- The seems to be the best match finder for LZMA on data of the >= 1 MiB scale.
-- To do:
--
-- 2/
-- - LZ77 / IZ: similar to the test with TOO_FAR, try to cluster distances around
-- values needing less extra bits (may not work at all...)
-- - LZ77 / IZ: tune TOO_FAR (max: 32767), see http://optipng.sf.net/pngtech/too_far.html
-- "TOO_FAR in zlib Is Not Too Far" for discussion
-- Legal licensing note:
-- Copyright (c) 2016 .. 2020 Gautier de Montmollin (maintainer of the Ada version)
-- SWITZERLAND
-- Permission is hereby granted, free of charge, to any person obtaining a copy
-- of this software and associated documentation files (the "Software"), to deal
-- in the Software without restriction, including without limitation the rights
-- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-- copies of the Software, and to permit persons to whom the Software is
-- furnished to do so, subject to the following conditions:
-- The above copyright notice and this permission notice shall be included in
-- all copies or substantial portions of the Software.
-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-- THE SOFTWARE.
-- NB: this is the MIT License, as found 21-Aug-2016 on the site
-- http://www.opensource.org/licenses/mit-license.php
with Ada.Text_IO, Ada.Integer_Text_IO;
with Ada.Unchecked_Deallocation;
with Interfaces; use Interfaces;
with System;
package body LZ77 is
-- System.Word_Size: 13.3(8): A word is the largest amount of storage
-- that can be conveniently and efficiently manipulated by the hardware,
-- given the implementation's run-time model.
--
min_bits_32 : constant := Integer'Max (32, System.Word_Size);
min_bits_16 : constant := Integer'Max (16, System.Word_Size);
-- We define an Integer type which is at least 32 bits, but n bits
-- on a native n (> 32) bits architecture (no performance hit on 64+
-- bits architectures).
-- Integer_M16 not needed: Integer already guarantees 16 bits
--
type Integer_M32 is range -2**(min_bits_32 - 1) .. 2**(min_bits_32 - 1) - 1;
subtype Natural_M32 is Integer_M32 range 0 .. Integer_M32'Last;
-- subtype Positive_M32 is Integer_M32 range 1 .. Integer_M32'Last;
type Unsigned_M16 is mod 2**min_bits_16;
type Unsigned_M32 is mod 2**min_bits_32;
function Are_Matches_Sorted (m : Matches_Type) return Boolean is
begin
for i in 2 .. m.count loop
if m.dl (i).length < m.dl (i - 1).length then
return False;
end if;
end loop;
return True;
end Are_Matches_Sorted;
procedure Encode is
-----------------------
-- LZHuf algorithm --
-----------------------
procedure LZ77_using_LZHuf is
-- Based on LZHUF by OKUMURA & YOSHIZAKI.
-- Here the adaptive Huffman coding is thrown away:
-- algorithm is used only to find matching patterns.
N_Char : constant Integer := 256 - Threshold + Look_Ahead;
-- Character code (= 0..N_CHAR-1)
Max_Table : constant Integer := N_Char * 2 - 1;
type Text_Buffer is array (0 .. String_buffer_size + Look_Ahead - 1) of Byte;
empty_buffer : constant Text_Buffer := (others => 32); -- ' '
-- > The Huffman frequency handling is made generic so we have
-- one copy of the tree and of the frequency table for Encode
-- and one for Decode
generic
package Huffman is
--- Pointing parent nodes.
--- Area [Max_Table..(Max_Table + N_CHAR - 1)] are pointers for leaves
Parent : array (0 .. Max_Table + N_Char - 1) of Natural;
--- Pointing children nodes (son[], son[] + 1)
Son : array (0 .. Max_Table - 1) of Natural;
Root_Position : constant Integer := Max_Table - 1; -- (can be always Son'last ?)
procedure Start;
procedure Update_Freq_Tree (C0 : Natural);
end Huffman;
package body Huffman is
Freq : array (0 .. Max_Table) of Natural; -- Cumulative freq table
Max_Freq : constant := 16#8000#;
-- ^-- update when cumulative frequency reaches to this value
procedure Start is
I : Natural;
begin
for J in 0 .. N_Char - 1 loop
Freq (J) := 1;
Son (J) := J + Max_Table;
Parent (J + Max_Table) := J;
end loop;
I := 0;
for J in N_Char .. Root_Position loop
Freq (J) := Freq (I) + Freq (I + 1);
Son (J) := I;
Parent (I) := J;
Parent (I + 1) := J;
I := I + 2;
end loop;
Freq (Freq'Last) := 16#FFFF#; -- ( Max_Table )
Parent (Root_Position) := 0;
end Start;
procedure Update_Freq_Tree (C0 : Natural) is
procedure Reconstruct_Freq_Tree is
I, J, K, F, L : Natural;
begin
-- Halven cumulative freq for leaf nodes
J := 0;
for I in 0 .. Root_Position loop
if Son (I) >= Max_Table then
Freq (J) := (Freq (I) + 1) / 2;
Son (J) := Son (I);
J := J + 1;
end if;
end loop;
-- Make a tree : first, connect children nodes
I := 0;
for J in N_Char .. Root_Position loop -- J : free nodes
K := I + 1;
F := Freq (I) + Freq (K); -- new frequency
Freq (J) := F;
K := J - 1;
while F < Freq (K) loop
K := K - 1;
end loop;
K := K + 1;
L := J - K; -- 2007: fix: was L:= (J-K)*2, memcopy parameter remain
Freq (K + 1 .. K + L) := Freq (K .. K + L - 1); -- shift by one cell right
Freq (K) := F;
Son (K + 1 .. K + L) := Son (K .. K + L - 1); -- shift by one cell right
Son (K) := I;
I := I + 2;
end loop;
-- Connect parent nodes
for I in 0 .. Max_Table - 1 loop
K := Son (I);
Parent (K) := I;
if K < Max_Table then
Parent (K + 1) := I;
end if;
end loop;
end Reconstruct_Freq_Tree;
C, I, J, K, L : Natural;
begin -- Update_Freq_Tree;
if Freq (Root_Position) = Max_Freq then
Reconstruct_Freq_Tree;
end if;
C := Parent (C0 + Max_Table);
loop
Freq (C) := Freq (C) + 1;
K := Freq (C);
-- Swap nodes to keep the tree freq-ordered
L := C + 1;
if K > Freq (L) then
while K > Freq (L + 1) loop
L := L + 1;
end loop;
Freq (C) := Freq (L);
Freq (L) := K;
I := Son (C);
Parent (I) := L;
if I < Max_Table then
Parent (I + 1) := L;
end if;
J := Son (L);
Son (L) := I;
Parent (J) := C;
if J < Max_Table then
Parent (J + 1) := C;
end if;
Son (C) := J;
C := L;
end if;
C := Parent (C);
exit when C = 0;
end loop; -- do it until reaching the root
end Update_Freq_Tree;
end Huffman;
Node_Nil : constant Integer := String_buffer_size; -- End of tree's node
Lson, Dad : array (0 .. String_buffer_size) of Natural;
Rson : array (0 .. String_buffer_size + 256) of Natural;
procedure Init_Tree is
begin
for I in String_buffer_size + 1 .. Rson'Last loop
Rson (I) := Node_Nil;
end loop; -- root
for I in 0 .. String_buffer_size - 1 loop
Dad (I) := Node_Nil;
end loop; -- node
end Init_Tree;
Match_Position : Natural;
Match_Length : Natural;
Text_Buf : Text_Buffer := empty_buffer;
procedure Insert_Node (R : Integer) is
I, P : Integer;
Geq : Boolean := True;
C : Natural;
begin
P := String_buffer_size + 1 + Integer (Text_Buf (R));
Rson (R) := Node_Nil;
Lson (R) := Node_Nil;
Match_Length := 0;
loop
if Geq then
if Rson (P) = Node_Nil then
Rson (P) := R;
Dad (R) := P;
return;
end if;
P := Rson (P);
else
if Lson (P) = Node_Nil then
Lson (P) := R;
Dad (R) := P;
return;
end if;
P := Lson (P);
end if;
I := 1;
while I < Look_Ahead and then Text_Buf (R + I) = Text_Buf (P + I) loop
I := I + 1;
end loop;
Geq := Text_Buf (R + I) >= Text_Buf (P + I) or I = Look_Ahead;
if I > Threshold then
if I > Match_Length then
Match_Position := (R - P) mod String_buffer_size - 1;
Match_Length := I;
exit when Match_Length >= Look_Ahead;
end if;
if I = Match_Length then
C := (R - P) mod String_buffer_size - 1;
if C < Match_Position then
Match_Position := C;
end if;
end if;
end if;
end loop;
Dad (R) := Dad (P);
Lson (R) := Lson (P);
Rson (R) := Rson (P);
Dad (Lson (P)) := R;
Dad (Rson (P)) := R;
if Rson (Dad (P)) = P then
Rson (Dad (P)) := R;
else
Lson (Dad (P)) := R;
end if;
Dad (P) := Node_Nil; -- remove P
end Insert_Node;
procedure Delete_Node (P : Natural) is
Q : Natural;
begin
if Dad (P) = Node_Nil then -- unregistered
return;
end if;
if Rson (P) = Node_Nil then
Q := Lson (P);
elsif Lson (P) = Node_Nil then
Q := Rson (P);
else
Q := Lson (P);
if Rson (Q) /= Node_Nil then
loop
Q := Rson (Q);
exit when Rson (Q) = Node_Nil;
end loop;
Rson (Dad (Q)) := Lson (Q);
Dad (Lson (Q)) := Dad (Q);
Lson (Q) := Lson (P);
Dad (Lson (P)) := Q;
end if;
Rson (Q) := Rson (P);
Dad (Rson (P)) := Q;
end if;
Dad (Q) := Dad (P);
if Rson (Dad (P)) = P then
Rson (Dad (P)) := Q;
else
Lson (Dad (P)) := Q;
end if;
Dad (P) := Node_Nil;
end Delete_Node;
package Huffman_E is new Huffman;
I, R, S, Last_Match_Length : Natural;
Len : Integer;
C : Byte;
begin
if not More_Bytes then
return;
end if;
Huffman_E.Start;
Init_Tree;
S := 0;
R := String_buffer_size - Look_Ahead;
Len := 0;
while Len < Look_Ahead and More_Bytes loop
Text_Buf (R + Len) := Read_Byte;
Len := Len + 1;
end loop;
-- Seems: fill dictionary with default value
--
-- for I in 1.. Look_Ahead loop
-- Insert_Node(R - I);
-- end loop;
Insert_Node (R);
loop
if Match_Length > Len then
Match_Length := Len;
end if;
if Match_Length <= Threshold then
Match_Length := 1;
Huffman_E.Update_Freq_Tree (Natural (Text_Buf (R)));
Write_Literal (Text_Buf (R));
else
Write_DL_Code (Match_Position + 1, Match_Length);
end if;
Last_Match_Length := Match_Length;
I := 0;
while I < Last_Match_Length and More_Bytes loop
I := I + 1;
Delete_Node (S);
C := Read_Byte;
Text_Buf (S) := C;
if S < Look_Ahead - 1 then
Text_Buf (S + String_buffer_size) := C;
end if;
S := (S + 1) mod String_buffer_size;
R := (R + 1) mod String_buffer_size;
Insert_Node (R);
end loop;
while I < Last_Match_Length loop
I := I + 1;
Delete_Node (S);
S := (S + 1) mod String_buffer_size;
R := (R + 1) mod String_buffer_size;
Len := Len - 1;
if Len > 0 then
Insert_Node (R);
end if;
end loop;
exit when Len = 0;
end loop;
end LZ77_using_LZHuf;
--------------------------
-- Info-Zip algorithm --
--------------------------
-- LZ77_using_IZ: based on deflate.c by Jean-Loup Gailly.
-- Core description of the algorithm:
--
-- The most straightforward technique turns out to be the fastest for
-- most input files: try all possible matches and select the longest.
-- The key feature of this algorithm is that insertions into the string
-- dictionary are very simple and thus fast, and deletions are avoided
-- completely. Insertions are performed at each input character, whereas
-- string matches are performed only when the previous match ends. So it
-- is preferable to spend more time in matches to allow very fast string
-- insertions and avoid deletions. The matching algorithm for small
-- strings is inspired from that of Rabin & Karp [1]. A brute force approach
-- is used to find longer strings when a small match has been found.
--
-- The idea of lazy evaluation of matches is due to Jan-Mark Wams.
--
-- [1] A description of the Rabin and Karp algorithm is given in the book
-- "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
--
-- About hashing: chapter 6.4 of The Art of Computer Programming, Volume 3, D.E. Knuth
-- Rabin and Karp algorithm: http://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm
-- Compression level: 0: store, 1: best speed, 9: best compression, 10: variant of level 9
-- Ada code: only levels 4 to 10 are supported.
procedure LZ77_using_IZ (level : Natural) is
HASH_BITS : constant := 15; -- 13..15
HASH_SIZE : constant := 2 ** HASH_BITS;
HASH_MASK : constant := HASH_SIZE - 1;
WSIZE : constant Integer_M32 := Integer_M32 (String_buffer_size);
WMASK : constant Unsigned_M16 := Unsigned_M16 (WSIZE - 1);
-- HASH_SIZE and WSIZE must be powers of two
NIL : constant := 0; -- Tail of hash chains
TOO_FAR : constant := 4096; -- Matches of length 3 are discarded if their distance exceeds TOO_FAR
--
subtype ulg is Unsigned_M32;
subtype unsigned is Unsigned_M16;
subtype ush is Unsigned_M16;
-- subtype long is Integer_M32;
-- subtype int is Integer;
subtype Pos is Unsigned_M32; -- must be at least 32 bits
-- subtype IPos is unsigned;
-- A Pos is an index in the character window. IPos is used only for parameter passing.
window : array (0 .. 2 * WSIZE - 1) of Byte;
-- Sliding window. Input bytes are read into the second half of the window,
-- and move to the first half later to keep a dictionary of at least WSIZE
-- bytes. With this organization, matches are limited to a distance of
-- WSIZE-MAX_MATCH bytes, but this ensures that IO is always
-- performed with a length multiple of the block size.
prev : array (0 .. unsigned (WSIZE - 1)) of Pos;
-- Link to older string with same hash index.
-- This link is maintained only for the last 32K strings.
-- An index in this array is thus a window index modulo 32K.
head : array (0 .. unsigned (HASH_SIZE - 1)) of Pos;
-- Heads of the hash chains or NIL.
window_size : ulg;
-- window size, 2*WSIZE except for MMAP or BIG_MEM, where it is the
-- input file length plus MIN_LOOKAHEAD.
sliding : Boolean; -- Set to False when the input file is already in memory [was: int]
ins_h : unsigned; -- hash index of string to be inserted
MIN_MATCH : constant Integer_M32 := Integer_M32 (Threshold) + 1; -- Deflate: 3
MAX_MATCH : constant Integer_M32 := Integer_M32 (Look_Ahead); -- Deflate: 258
-- Minimum amount of lookahead, except at the end of the input file.
MIN_LOOKAHEAD : constant Integer_M32 := MAX_MATCH + MIN_MATCH + 1; -- Deflate: 262
-- This LZ77 compression doesn't use the full possible distance range: 32507..32768 unused!
MAX_DIST : constant Integer_M32 := WSIZE - MIN_LOOKAHEAD; -- Deflate: 32506
H_SHIFT : constant Integer := Integer ((HASH_BITS + MIN_MATCH - 1) / MIN_MATCH);
-- Number of bits by which ins_h and del_h must be shifted at each
-- input step. It must be such that after MIN_MATCH steps, the oldest
-- byte no longer takes part in the hash key, that is:
-- H_SHIFT * MIN_MATCH >= HASH_BITS
prev_length : Natural_M32; -- [was: unsigned]
-- Length of the best match at previous step. Matches not greater than this
-- are discarded. This is used in the lazy match evaluation.
strstart : Natural_M32; -- start of string to insert [was: unsigned]
match_start : Natural_M32; -- start of matching string [was: unsigned]
eofile : Boolean; -- flag set at end of input file [was: int]
lookahead : Natural_M32; -- number of valid bytes ahead in window [was: unsigned]
max_chain_length : unsigned;
-- To speed up deflation, hash chains are never searched beyond this length.
-- A higher limit improves compression ratio but degrades the speed.
max_lazy_match : Natural_M32; -- [was: unsigned]
-- Attempt to find a better match only when the current match is strictly
-- smaller than this value. This mechanism is used only for compression
-- levels >= 4.
good_match : Natural_M32; -- [was: unsigned]
-- Use a faster search when the previous match is longer than this
nice_match : Integer_M32; -- Stop searching when current match exceeds this
-- Values for max_lazy_match, good_match, nice_match and max_chain_length,
-- depending on the desired pack level (0..9). The values given below have
-- been tuned to exclude worst case performance for pathological files.
-- Better values may be found for specific files.
type config is record
good_length : Natural_M32; -- reduce lazy search above this match length [was: ush]
max_lazy : Natural_M32; -- do not perform lazy search above this match length
nice_length : Integer_M32; -- quit search above this match length
max_chain : ush;
end record;
configuration_table : constant array (0 .. 10) of config := (
-- good lazy nice chain
(0, 0, 0, 0), -- 0: store only
(4, 4, 8, 4), -- 1: maximum speed, no lazy matches
(4, 5, 16, 8),
(4, 6, 32, 32),
(4, 4, 16, 16), -- 4: lazy matches
(8, 16, 32, 32),
(8, 16, 128, 128),
(8, 32, 128, 256),
(32, 128, 258, 1024),
(32, 258, 258, 4096), -- 9: maximum compression
(34, 258, 258, 4096)); -- "secret" variant of level 9
-- Update a hash value with the given input byte
-- IN assertion: all calls to to UPDATE_HASH are made with consecutive
-- input characters, so that a running hash key can be computed from the
-- previous key instead of complete recalculation each time.
procedure UPDATE_HASH (h : in out unsigned; c : Byte) is
pragma Inline (UPDATE_HASH);
begin
h := (unsigned (Shift_Left (Unsigned_32 (h), H_SHIFT)) xor unsigned (c)) and HASH_MASK;
end UPDATE_HASH;
-- Insert string starting at s in the dictionary and set match_head to the previous head
-- of the hash chain (the most recent string with same hash key). Return
-- the previous length of the hash chain.
-- IN assertion: all calls to to INSERT_STRING are made with consecutive
-- input characters and the first MIN_MATCH bytes of s are valid
-- (except for the last MIN_MATCH-1 bytes of the input file).
procedure INSERT_STRING (s : Integer_M32; match_head : out Natural_M32) is
pragma Inline (INSERT_STRING);
begin
UPDATE_HASH (ins_h, window (s + MIN_MATCH - 1));
match_head := Natural_M32 (head (ins_h));
prev (unsigned (s) and WMASK) := Pos (match_head);
head (ins_h) := Pos (s);
end INSERT_STRING;
procedure Read_buf (from : Integer_M32; amount : unsigned; actual : out Integer_M32) is
need : unsigned := amount;
begin
-- put_line("Read buffer: from:" & from'img & "; amount:" & amount'img);
actual := 0;
while need > 0 and then More_Bytes loop
window (from + actual) := Read_Byte;
actual := actual + 1;
need := need - 1;
end loop;
-- put_line("Read buffer: actual:" & actual'img);
end Read_buf;
-- Fill the window when the lookahead becomes insufficient.
-- Updates strstart and lookahead, and sets eofile if end of input file.
--
-- IN assertion: lookahead < MIN_LOOKAHEAD && strstart + lookahead > 0
-- OUT assertions: strstart <= window_size-MIN_LOOKAHEAD
-- At least one byte has been read, or eofile is set; file reads are
-- performed for at least two bytes (required for the translate_eol option).
procedure Fill_window is
more : unsigned;
m : Pos;
n : Natural_M32;
begin
loop
more := unsigned (window_size - ulg (lookahead) - ulg (strstart));
if False then -- C: "if (more == (unsigned)EOF) {" ?... GdM: seems a 16-bit code for EOF
-- Very unlikely, but possible on 16 bit machine if strstart == 0
-- and lookahead == 1 (input done one byte at time)
more := more - 1;
elsif strstart >= WSIZE + MAX_DIST and then sliding then
-- By the IN assertion, the window is not empty so we can't confuse
-- more == 0 with more == 64K on a 16 bit machine.
window (0 .. WSIZE - 1) := window (WSIZE .. 2 * WSIZE - 1);
-- GdM: in rare cases (e.g. level 9 on test file "enwik8"), match_start happens
-- to be < WSIZE. We do as in the original 16-bit C code: mod 2**16, such that the
-- index is the window's range.
-- This assumes WSIZE = 2**15, which is checked at startup of LZ77_using_IZ.
-- Very likely, match_start is garbage anyway - see http://sf.net/p/infozip/bugs/49/
match_start := Natural_M32 (Unsigned_16 (match_start) - Unsigned_16 (WSIZE mod (2**16)));
strstart := strstart - WSIZE; -- we now have strstart >= MAX_DIST:
for nn in 0 .. unsigned'(HASH_SIZE - 1) loop
m := head (nn);
if m >= Pos (WSIZE) then
head (nn) := m - Pos (WSIZE);
else
head (nn) := NIL;
end if;
end loop;
--
for nn in 0 .. unsigned (WSIZE - 1) loop
m := prev (nn);
if m >= Pos (WSIZE) then
prev (nn) := m - Pos (WSIZE);
else
prev (nn) := NIL;
end if;
-- If n is not on any hash chain, prev[n] is garbage but its value will never be used.
end loop;
more := more + unsigned (WSIZE);
end if;
exit when eofile;
-- If there was no sliding:
-- strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
-- more == window_size - lookahead - strstart
-- => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
-- => more >= window_size - 2*WSIZE + 2
-- In the MMAP or BIG_MEM case (not yet supported in gzip),
-- window_size == input_size + MIN_LOOKAHEAD &&
-- strstart + lookahead <= input_size => more >= MIN_LOOKAHEAD.
-- Otherwise, window_size == 2*WSIZE so more >= 2.
-- If there was sliding, more >= WSIZE. So in all cases, more >= 2.
--
pragma Assert (more >= 2, "more < 2");
--
Read_buf (strstart + lookahead, more, n);
if n = 0 then
eofile := True;
else
lookahead := lookahead + n;
end if;
exit when lookahead >= MIN_LOOKAHEAD or eofile;
end loop;
-- put_line("Fill done - eofile = " & eofile'img);
end Fill_window;
-- Initialize the "longest match" routines for a new file
--
-- IN assertion: window_size is > 0 if the input file is already read or
-- mapped in the window array, 0 otherwise. In the first case,
-- window_size is sufficient to contain the whole input file plus
-- MIN_LOOKAHEAD bytes (to avoid referencing memory beyond the end
-- of window when looking for matches towards the end).
procedure LM_Init (pack_level : Natural) is
begin
-- Do not slide the window if the whole input is already in memory (window_size > 0)
sliding := False;
if window_size = 0 then
sliding := True;
window_size := 2 * ulg (WSIZE);
end if;
-- Initialize the hash table.
-- prev will be initialized on the fly.
head := (others => NIL);
-- Set the default configuration parameters:
max_lazy_match := configuration_table (pack_level).max_lazy;
good_match := configuration_table (pack_level).good_length;
nice_match := configuration_table (pack_level).nice_length;
max_chain_length := configuration_table (pack_level).max_chain;
-- Info-Zip comment: ??? reduce max_chain_length for binary files
strstart := 0;
Read_buf (0, unsigned (WSIZE), lookahead);
if lookahead = 0 then
eofile := True;
return;
end if;
eofile := False;
-- Make sure that we always have enough lookahead. This is important
-- if input comes from a device such as a tty.
if lookahead < MIN_LOOKAHEAD then
Fill_window;
end if;
ins_h := 0;
for j in 0 .. Natural_M32 (MIN_MATCH) - 2 loop
UPDATE_HASH (ins_h, window (j));
end loop;
-- If lookahead < MIN_MATCH, ins_h is garbage, but this is
-- not important since only literal bytes will be emitted.
end LM_Init;
-- Set match_start to the longest match starting at the given string and
-- return its length. Matches shorter or equal to prev_length are discarded,
-- in which case the result is equal to prev_length and match_start is
-- garbage.
-- IN assertions: current_match is the head of the hash chain for the current
-- string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
procedure Longest_Match (current_match : in out Integer_M32; longest : out Integer_M32) is
chain_length : unsigned := max_chain_length; -- max hash chain length
scan : Integer_M32 := strstart; -- current string
match : Integer_M32; -- matched string
len : Integer_M32; -- length of current match
best_len : Integer_M32 := prev_length; -- best match length so far
limit : Natural_M32; -- [was: IPos]
strend : constant Integer_M32 := strstart + MAX_MATCH;
scan_end : Integer_M32 := scan + best_len;
begin
-- Stop when current_match becomes <= limit. To simplify the code,
-- we prevent matches with the string of window index 0.
if strstart > MAX_DIST then
limit := strstart - MAX_DIST;
else
limit := NIL;
end if;
-- Do not waste too much time if we already have a good match:
if prev_length >= good_match then
chain_length := chain_length / 4;
end if;
pragma Assert
(strstart <= Integer_M32 (window_size) - MIN_LOOKAHEAD,
"insufficient lookahead"); -- In deflate.c
loop
if current_match >= strstart then
-- Added 2020-11-07. The file test/sample.jpg bombs the assertion a few lines later.
longest := MIN_MATCH - 1;
return;
end if;
pragma Assert (current_match < strstart, "no future"); -- In deflate.c
match := current_match;
-- Skip to next match if the match length cannot increase
-- or if the match length is less than 2:
--
-- NB: this is the Not-UNALIGNED_OK variant in the C code.
-- Translation of the UNALIGNED_OK variant is left as an exercise ;-).
-- (!! worth a try: GNAT optimizes window(match..match+1[3]) to 16[32] bit)
--
if window (match + best_len) /= window (scan_end) or else
window (match + best_len - 1) /= window (scan_end - 1) or else
window (match) /= window (scan) or else
window (match + 1) /= window (scan + 1)
then
match := match + 1; -- C: continue
else
-- The check at best_len - 1 can be removed because it will be made
-- again later. (This heuristic is not always a win.)
--
-- It is not necessary to compare window(scan + 2) and window(match + 2) since they
-- are always equal when the other bytes match, given that
-- the hash keys are equal and that HASH_BITS >= 8.
scan := scan + 2;
match := match + 2;
-- C: The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
-- It is easy to get rid of this optimization if necessary.
-- Ada: see the "else" part below.
if MAX_MATCH = 258 then
-- We check for insufficient lookahead only every 8th comparison;
-- the 256th check will be made at strstart + 258.
loop
scan := scan + 1;
match := match + 1;
exit when window (scan) /= window (match);
scan := scan + 1;
match := match + 1;
exit when window (scan) /= window (match);
scan := scan + 1;
match := match + 1;
exit when window (scan) /= window (match);
scan := scan + 1;
match := match + 1;
exit when window (scan) /= window (match);
scan := scan + 1;
match := match + 1;
exit when window (scan) /= window (match);
scan := scan + 1;
match := match + 1;
exit when window (scan) /= window (match);
scan := scan + 1;
match := match + 1;
exit when window (scan) /= window (match);
scan := scan + 1;
match := match + 1;
exit when window (scan) /= window (match) or else scan >= strend;
end loop;
else
-- We check for insufficient lookahead after every comparison.
loop
scan := scan + 1;
match := match + 1;
exit when window (scan) /= window (match) or else scan >= strend;
end loop;
end if;
-- Assert(scan <= window+(unsigned)(window_size-1), "wild scan"); ??
len := MAX_MATCH - (strend - scan);
scan := strend - MAX_MATCH;
if len > best_len then
match_start := current_match;
best_len := len;
exit when len >= nice_match;
scan_end := scan + best_len;
end if;
end if;
current_match := Integer_M32 (prev (unsigned (current_match) and WMASK));
exit when current_match <= limit;
chain_length := chain_length - 1;
exit when chain_length = 0;
end loop;
longest := best_len;
end Longest_Match;
procedure LZ77_part_of_IZ_Deflate is
hash_head : Natural_M32 := NIL; -- head of hash chain
prev_match : Natural_M32; -- previous match [was: IPos]
match_available : Boolean := False; -- set if previous match exists
match_length : Natural_M32 := MIN_MATCH - 1; -- length of best match
max_insert : Natural_M32;
begin
match_start := 0; -- NB: no initialization in deflate.c
-- NB: level <= 3 would call deflate_fast;
--
-- Process the input block.
while lookahead /= 0 loop
-- Insert the string window(strstart .. strstart + 2) in the
-- dictionary, and set hash_head to the head of the hash chain:
if lookahead >= MIN_MATCH then
INSERT_STRING (strstart, hash_head);
end if;
-- Find the longest match, discarding those <= prev_length.
prev_length := match_length;
prev_match := match_start;
match_length := MIN_MATCH - 1;
if hash_head /= NIL and then
prev_length < max_lazy_match and then
strstart - hash_head <= MAX_DIST
then
-- To simplify the code, we prevent matches with the string
-- of window index 0 (in particular we have to avoid a match
-- of the string with itself at the start of the input file).
--
-- Do not look for matches beyond the end of the input.
-- This is necessary to make deflate deterministic.
if nice_match > lookahead then
nice_match := lookahead;
end if;
Longest_Match (hash_head, match_length);
-- Longest_Match sets match_start
if match_length > lookahead then
match_length := lookahead;
end if;
-- Ignore a length 3 match if it is too distant:
if match_length = MIN_MATCH and then strstart - match_start > TOO_FAR then
-- If prev_match is also MIN_MATCH, match_start is garbage
-- but we will ignore the current match anyway.
match_length := MIN_MATCH - 1;
end if;
end if;
-- If there was a match at the previous step and the current
-- match is not better, output the previous match:
if prev_length >= MIN_MATCH and then match_length <= prev_length then
max_insert := strstart + lookahead - MIN_MATCH;
-- C: in DEBUG mode: check_match(strstart-1, prev_match, prev_length);
--
------------------------------------
-- Output a Distance-Length code --
------------------------------------
Write_DL_Code (Positive (strstart - 1 - prev_match), Positive (prev_length));
-- Insert in hash table all strings up to the end of the match.
-- strstart-1 and strstart are already inserted.
lookahead := lookahead - (prev_length - 1);
prev_length := prev_length - 2;
loop
strstart := strstart + 1;
if strstart <= max_insert then
INSERT_STRING (strstart, hash_head);
-- strstart never exceeds WSIZE - MAX_MATCH, so there
-- are always MIN_MATCH bytes ahead.
end if;
prev_length := prev_length - 1;
exit when prev_length = 0;
end loop;
strstart := strstart + 1;
match_available := False;
match_length := MIN_MATCH - 1;
elsif match_available then
-- If there was no match at the previous position, output a
-- single literal. If there was a match but the current match
-- is longer, truncate the previous match to a single literal.
--
------------------------
-- Output a literal --
------------------------
Write_Literal (window (strstart - 1));
strstart := strstart + 1;
lookahead := lookahead - 1;
else
-- There is no previous match to compare with, wait for the next step to decide.
match_available := True;
strstart := strstart + 1;
lookahead := lookahead - 1;
end if;
-- Assert(strstart <= isize && lookahead <= isize, "a bit too far");
--
-- Make sure that we always have enough lookahead, except
-- at the end of the input file. We need MAX_MATCH bytes
-- for the next match, plus MIN_MATCH bytes to insert the
-- string following the next match.
if lookahead < MIN_LOOKAHEAD then
Fill_window;
end if;
end loop;
-----------------------------------
-- Output last literal, if any --
-----------------------------------
if match_available then
Write_Literal (window (strstart - 1));
end if;
end LZ77_part_of_IZ_Deflate;
Code_too_clever : exception;
begin
if Look_Ahead /= 258 or String_buffer_size /= 2 ** 15 or Threshold /= 2 then
raise Code_too_clever; -- was optimized for these parameters
end if;
window_size := 0;
LM_Init (level);
LZ77_part_of_IZ_Deflate;
end LZ77_using_IZ;
---------------------------------------------------------------------
-- BT4 - Binary tree of match positions selected with --
-- the leading 2 to 4 bytes of each possible match. --
---------------------------------------------------------------------
-- Based on BT4.java and LZMAEncoderFast.java by Lasse Collin,
-- itself based on LzFind.c by Igor Pavlov.
procedure LZ77_using_BT4 is
MATCH_LEN_MIN : constant Integer := Threshold + 1;
--
readPos : Integer := -1;
cur_literal : Byte;
readLimit : Integer := -1;
finishing : constant Boolean := False;
writePos : Integer := 0;
pendingSize : Integer := 0;
--
OPTS : constant := 4096;
EXTRA_SIZE_BEFORE : constant := OPTS;
EXTRA_SIZE_AFTER : constant := OPTS;
keepSizeBefore : constant Integer := EXTRA_SIZE_BEFORE + String_buffer_size;
keepSizeAfter : constant Integer := EXTRA_SIZE_AFTER + Look_Ahead;
reserveSize : constant Integer :=
Integer'Min (
String_buffer_size / 2 +
256 * (2 ** 10), -- 256 KiB
512 * (2 ** 20) -- 512 MiB
);
getBufSize : constant Integer := keepSizeBefore + keepSizeAfter + reserveSize;
type Int_array is array (Natural range <>) of Integer;
type p_Int_array is access Int_array;
procedure Dispose is new Ada.Unchecked_Deallocation (Int_array, p_Int_array);
procedure Normalize (positions : in out Int_array; normalizationOffset : Integer) is
begin
for i in 0 .. positions'Length - 1 loop
if positions (i) <= normalizationOffset then
positions (i) := 0;
else
positions (i) := positions (i) - normalizationOffset;
end if;
end loop;
end Normalize;
function Get_Available return Integer is
pragma Inline (Get_Available);
begin
-- Compared to the Java version: - 1 shift for getting readPos
-- in buf'Range upon: cur_literal := buf (readPos);
return writePos - readPos - 1;
end Get_Available;
function Move_Pos (requiredForFlushing, requiredForFinishing : Integer) return Integer is
-- Java name: movePos.
avail : Integer;
begin
pragma Assert (requiredForFlushing >= requiredForFinishing);
readPos := readPos + 1;
avail := Get_Available;
if avail < requiredForFlushing then
if avail < requiredForFinishing or else not finishing
then
pendingSize := pendingSize + 1;
-- GdM: This causes cyclicPos and lzpos not being in sync with readPos.
-- The pendingSize value is there for catching up.
avail := 0;
end if;
end if;
return avail;
end Move_Pos;
function getHash4Size return Integer is
h : Unsigned_32 := Unsigned_32 (String_buffer_size - 1);
begin
h := h or Shift_Right (h, 1);
h := h or Shift_Right (h, 2);
h := h or Shift_Right (h, 4);
h := h or Shift_Right (h, 8);
h := Shift_Right (h, 1);
h := h or 16#FFFF#; -- LzFind.c: "don't change it! It's required for Deflate"
if h > 2 ** 24 then
h := Shift_Right (h, 1);
end if;
return Integer (h + 1);
end getHash4Size;
type p_Byte_Array is access Byte_Array;
procedure Dispose is new Ada.Unchecked_Deallocation (Byte_Array, p_Byte_Array);
package Hash234 is
HASH_2_SIZE : constant := 2 ** 10;
HASH_2_MASK : constant := HASH_2_SIZE - 1;
HASH_3_SIZE : constant := 2 ** 16;
HASH_3_MASK : constant := HASH_3_SIZE - 1;
hash_4_size : constant Integer := getHash4Size;
hash_4_mask : constant Unsigned_32 := Unsigned_32 (hash_4_size) - 1;
--
hash2Table : Int_array (0 .. HASH_2_SIZE - 1) := (others => 0); -- [Initialization added]
hash3Table : Int_array (0 .. HASH_3_SIZE - 1) := (others => 0); -- [Initialization added]
hash4Table : p_Int_array;
--
hash2Value, hash3Value, hash4Value : Unsigned_32 := 0;
--
procedure calcHashes (buf : Byte_Array; off : Integer);
procedure updateTables (pos : Integer);
procedure Normalize (normalizeOffset : Integer);
end Hash234;
package body Hash234 is
crcTable : array (Byte) of Unsigned_32;
CRC32_POLY : constant := 16#EDB8_8320#;
procedure calcHashes (buf : Byte_Array; off : Integer) is
temp : Unsigned_32 := crcTable (buf (off)) xor Unsigned_32 (buf (off + 1));
begin
hash2Value := temp and HASH_2_MASK;
temp := temp xor Shift_Left (Unsigned_32 (buf (off + 2)), 8);
hash3Value := temp and HASH_3_MASK;
temp := temp xor Shift_Left (crcTable (buf (off + 3)), 5);
hash4Value := temp and hash_4_mask;
end calcHashes;
procedure updateTables (pos : Integer) is
begin
hash2Table (Integer (hash2Value)) := pos;
hash3Table (Integer (hash3Value)) := pos;
hash4Table (Integer (hash4Value)) := pos;
end updateTables;
procedure Normalize (normalizeOffset : Integer) is
begin
Normalize (hash2Table, normalizeOffset);
Normalize (hash3Table, normalizeOffset);
Normalize (hash4Table.all, normalizeOffset);
end Normalize;
r : Unsigned_32;
begin
-- NB: heap allocation used only for convenience because of
-- small default stack sizes on some compilers.
hash4Table := new Int_array (0 .. hash_4_size - 1);
hash4Table.all := (others => 0); -- [Initialization added]
for i in Byte loop
r := Unsigned_32 (i);
for j in 0 .. 7 loop
if (r and 1) = 0 then
r := Shift_Right (r, 1);
else
r := Shift_Right (r, 1) xor CRC32_POLY;
end if;
end loop;
crcTable (i) := r;
end loop;
end Hash234;
Nice_Length : constant Integer := Integer'Min (162, Look_Ahead); -- const. was 64
Depth_Limit : constant := 48; -- Alternatively: 16 + Nice_Length / 2
cyclicSize : constant Integer := String_buffer_size; -- Had: + 1;
cyclicPos : Integer := -1;
lzPos : Integer := cyclicSize;
max_dist : constant Integer := cyclicSize - (Look_Ahead + 2);
-- NB: 2020-11-04: added "- (Look_Ahead + 2)" to prevent corruption of
-- the expansion buffer in LZMA.Encoding when DL codes are tested in front
-- of the actual writes, before actual entropy compression (since rev. #850).
package BT4_Algo is
procedure Skip (len : Natural);
pragma Inline (Skip);
procedure Read_One_and_Get_Matches (matches : out Matches_Type);
end BT4_Algo;
buf : p_Byte_Array;
tree : p_Int_array;
package body BT4_Algo is
function Move_Pos_in_BT4 return Integer is
-- Java name: movePos.
avail : constant Integer :=
Move_Pos (requiredForFlushing => Nice_Length,
requiredForFinishing => 4);
normalizationOffset : Integer;
begin
-- Put_Line ("BT4_Algo.Move_Pos_in_BT4");
if avail /= 0 then
lzPos := lzPos + 1;
if lzPos = Integer'Last then
normalizationOffset := Integer'Last - cyclicSize;
Hash234.Normalize (normalizationOffset);
Normalize (tree.all, normalizationOffset);
lzPos := lzPos - normalizationOffset;
end if;
cyclicPos := cyclicPos + 1;
if cyclicPos = cyclicSize then
-- Put_Line("cyclicPos zeroed");
cyclicPos := 0;
end if;
end if;
return avail;
end Move_Pos_in_BT4;
Null_position : constant := -1; -- LzFind.c: kEmptyHashValue, 0
procedure Skip_and_Update_Tree (niceLenLimit : Integer; currentMatch : in out Integer) is
delta0, depth, ptr0, ptr1, pair, len, len0, len1 : Integer;
begin
-- Put("BT4_Algo.Skip_and_Update_Tree... ");
depth := Depth_Limit;
ptr0 := cyclicPos * 2 + 1;
ptr1 := cyclicPos * 2;
len0 := 0;
len1 := 0;
loop
delta0 := lzPos - currentMatch;
if depth = 0 or else delta0 >= max_dist then
tree (ptr0) := Null_position;
tree (ptr1) := Null_position;
return;
end if;
depth := depth - 1;
if cyclicPos - delta0 < 0 then
pair := cyclicSize;
else
pair := 0;
end if;
pair := (cyclicPos - delta0 + pair) * 2;
len := Integer'Min (len0, len1);
-- Match ?
if buf (readPos + len - delta0) = buf (readPos + len) then
-- No need to look for longer matches than niceLenLimit
-- because we only are updating the tree, not returning
-- matches found to the caller.
loop
len := len + 1;
if len = niceLenLimit then
tree (ptr1) := tree (pair);
tree (ptr0) := tree (pair + 1);
return;
end if;
exit when buf (readPos + len - delta0) /= buf (readPos + len);
end loop;
end if;
-- Bytes are no more matching. The past value is either smaller...
if buf (readPos + len - delta0) < buf (readPos + len) then
tree (ptr1) := currentMatch;
ptr1 := pair + 1;
currentMatch := tree (ptr1);
len1 := len;
else -- ... or larger
tree (ptr0) := currentMatch;
ptr0 := pair;
currentMatch := tree (ptr0);
len0 := len;
end if;
end loop;
end Skip_and_Update_Tree;
procedure Skip (len : Natural) is
--
procedure Skip_one is
pragma Inline (Skip_one);
niceLenLimit, avail, currentMatch : Integer;
begin
niceLenLimit := Nice_Length;
avail := Move_Pos_in_BT4;
if avail < niceLenLimit then
if avail = 0 then
return;
end if;
niceLenLimit := avail;
end if;
Hash234.calcHashes (buf.all, readPos);
currentMatch := Hash234.hash4Table (Integer (Hash234.hash4Value));
Hash234.updateTables (lzPos);
Skip_and_Update_Tree (niceLenLimit, currentMatch);
end Skip_one;
--
begin
for count in reverse 1 .. len loop
Skip_one;
end loop;
end Skip;
procedure Read_One_and_Get_Matches (matches : out Matches_Type) is
matchLenLimit : Integer := Look_Ahead;
niceLenLimit : Integer := Nice_Length;
avail : Integer;
delta0, delta2, delta3, currentMatch,
lenBest, depth, ptr0, ptr1, pair, len, len0, len1 : Integer;
begin
-- Put("BT4_Algo.Get_Matches... ");
matches.count := 0;
avail := Move_Pos_in_BT4;
if avail < matchLenLimit then
if avail = 0 then
return;
end if;
matchLenLimit := avail;
if niceLenLimit > avail then
niceLenLimit := avail;
end if;
end if;
--
Hash234.calcHashes (buf.all, readPos);
delta2 := lzPos - Hash234.hash2Table (Integer (Hash234.hash2Value));
delta3 := lzPos - Hash234.hash3Table (Integer (Hash234.hash3Value));
currentMatch := Hash234.hash4Table (Integer (Hash234.hash4Value));
Hash234.updateTables (lzPos);
--
lenBest := 0;
-- See if the hash from the first two bytes found a match.
-- The hashing algorithm guarantees that if the first byte
-- matches, also the second byte does, so there's no need to
-- test the second byte.
if delta2 < max_dist and then buf (readPos - delta2) = buf (readPos) then
-- Match of length 2 found and checked.
lenBest := 2;
matches.count := 1;
matches.dl (matches.count).length := 2;
matches.dl (matches.count).distance := delta2;
end if;
-- See if the hash from the first three bytes found a match that
-- is different from the match possibly found by the two-byte hash.
-- Also here the hashing algorithm guarantees that if the first byte
-- matches, also the next two bytes do.
if delta2 /= delta3 and then delta3 < max_dist
and then buf (readPos - delta3) = buf (readPos)
then
-- Match of length 3 found and checked.
lenBest := 3;
matches.count := matches.count + 1;
matches.dl (matches.count).distance := delta3;
delta2 := delta3;
end if;
-- If a match was found, see how long it is.
if matches.count > 0 then
while lenBest < matchLenLimit and then buf (readPos + lenBest - delta2)
= buf (readPos + lenBest)
loop
lenBest := lenBest + 1;
end loop;
matches.dl (matches.count).length := lenBest;
-- Return if it is long enough (niceLen or reached the end of the dictionary).
if lenBest >= niceLenLimit then
Skip_and_Update_Tree (niceLenLimit, currentMatch);
return;
end if;
end if;
-- A long enough match wasn't found so easily.
-- Look for better matches from the binary tree.
if lenBest < 3 then
lenBest := 3;
end if;
depth := Depth_Limit;
ptr0 := cyclicPos * 2 + 1;
ptr1 := cyclicPos * 2;
len0 := 0;
len1 := 0;
--
loop
delta0 := lzPos - currentMatch;
-- Return if the search depth limit has been reached or
-- if the distance of the potential match exceeds the
-- dictionary size.
if depth = 0 or else delta0 >= max_dist then
tree (ptr0) := Null_position;
tree (ptr1) := Null_position;
return;
end if;
depth := depth - 1;
--
if cyclicPos - delta0 < 0 then
pair := cyclicSize;
else
pair := 0;
end if;
pair := (cyclicPos - delta0 + pair) * 2;
len := Integer'Min (len0, len1);
-- Match ?
if buf (readPos + len - delta0) = buf (readPos + len) then
loop
len := len + 1;
exit when len >= matchLenLimit
or else buf (readPos + len - delta0) /= buf (readPos + len);
end loop;
if len > lenBest then
lenBest := len;
matches.count := matches.count + 1;
matches.dl (matches.count).length := len;
matches.dl (matches.count).distance := delta0;
if len >= niceLenLimit then
tree (ptr1) := tree (pair);
tree (ptr0) := tree (pair + 1);
return;
end if;
end if;
end if;
-- Bytes are no more matching. The past value is either smaller...
if buf (readPos + len - delta0) < buf (readPos + len) then
tree (ptr1) := currentMatch;
ptr1 := pair + 1;
currentMatch := tree (ptr1);
len1 := len;
else -- ... or larger
tree (ptr0) := currentMatch;
ptr0 := pair;
currentMatch := tree (ptr0);
len0 := len;
end if;
end loop;
end Read_One_and_Get_Matches;
begin
-- NB: heap allocation used only for convenience because of
-- small default stack sizes on some compilers.
tree := new Int_array (0 .. cyclicSize * 2 - 1);
for i in tree'Range loop
tree (i) := Null_position;
end loop;
end BT4_Algo;
-- Moves data from the end of the buffer to the beginning, discarding
-- old data and making space for new input.
procedure Move_Window is
-- Java name: moveWindow.
-- Align the move to a multiple of 16 bytes (LZMA-friendly, see pos_bits)
moveOffset : constant Integer := ((readPos + 1 - keepSizeBefore) / 16) * 16;
moveSize : constant Integer := writePos - moveOffset;
begin
-- Put_Line(" Move window, size=" & moveSize'Img & " offset=" & moveOffset'Img);
buf (0 .. moveSize - 1) := buf (moveOffset .. moveOffset + moveSize - 1);
readPos := readPos - moveOffset;
readLimit := readLimit - moveOffset;
writePos := writePos - moveOffset;
end Move_Window;
-- Copies new data into the buffer.
function Fill_Window (len_initial : Integer) return Integer is
-- Java name: fillWindow
-- Process pending data that hasn't been ran through the match finder yet.
-- Run it through the match finder now if there is enough new data
-- available (readPos < readLimit) that the encoder may encode at
-- least one more input byte.
--
procedure processPendingBytes is
oldPendingSize : Integer;
begin
if pendingSize > 0 and then readPos < readLimit then
readPos := readPos - pendingSize;
oldPendingSize := pendingSize;
pendingSize := 0;
BT4_Algo.Skip (oldPendingSize);
end if;
end processPendingBytes;
--
len : Integer := len_initial;
actual_len : Integer := 0;
begin
-- Put_Line("Fill window - start");
-- Move the sliding window if needed.
if readPos >= buf'Length - keepSizeAfter then
Move_Window;
end if;
-- Try to fill the dictionary buffer up to its boundary.
if len > buf'Length - writePos then
len := buf'Length - writePos;
end if;
while len > 0 and then More_Bytes loop
buf (writePos) := Read_Byte;
writePos := writePos + 1;
len := len - 1;
actual_len := actual_len + 1;
end loop;
-- Set the new readLimit but only if there's enough data to allow
-- encoding of at least one more byte.
if writePos >= keepSizeAfter then
readLimit := writePos - keepSizeAfter;
end if;
processPendingBytes;
-- Put_Line("Fill window, requested=" & len_initial'Img & " actual=" & actual_len'Img);
-- Tell the caller how much input we actually copied into the dictionary.
return actual_len;
end Fill_Window;
function Compute_Match_Length (distance, length_limit : Integer) return Natural is
pragma Inline (Compute_Match_Length);
back_pos : constant Integer := readPos - distance;
len : Integer := 0;
begin
if distance < 2 then
return 0;
end if;
-- @ if readPos+len not in buf.all'Range then
-- @ Put("**** readpos " & buf'Last'Img & readPos'Img);
-- @ end if;
-- @ if backPos+len not in buf.all'Range then
-- @ Put("**** backpos " & buf'Last'Img & back_pos'Img);
-- @ end if;
while len < length_limit and then buf (readPos + len) = buf (back_pos + len) loop
len := len + 1;
end loop;
return len;
end Compute_Match_Length;
readAhead : Integer := -1; -- LZMAEncoder.java
-- Small stack of recent distances used for LZMA.
subtype Repeat_stack_range is Integer range 0 .. 3;
-- 1-based distances.
rep_dist : array (Repeat_stack_range) of Distance_Type := (others => 1);
len_rep_dist : array (Repeat_stack_range) of Natural := (others => 0);
function Has_much_smaller_Distance (smallDist, bigDist : Distance_Type) return Boolean is
pragma Inline (Has_much_smaller_Distance);
begin
return (smallDist - 1) < (bigDist - 1) / 128;
end Has_much_smaller_Distance;
best_length_for_rep_dist, best_rep_dist_index : Integer;
procedure Read_One_and_Get_Matches (matches : out Matches_Type) is
avail, len : Integer;
begin
readAhead := readAhead + 1;
--
BT4_Algo.Read_One_and_Get_Matches (matches);
--
if LZMA_friendly then
best_length_for_rep_dist := 0;
avail := Integer'Min (Get_Available, Look_Ahead);
if avail >= MATCH_LEN_MIN then
for rep in Repeat_stack_range loop
len := Compute_Match_Length (rep_dist (rep), avail);
len_rep_dist (rep) := len;
-- Remember the index and length of the best repeated match.
if len > best_length_for_rep_dist then
best_rep_dist_index := rep;
best_length_for_rep_dist := len;
end if;
end loop;
else
for rep in Repeat_stack_range loop
len_rep_dist (rep) := 0; -- No match possible in any case.
end loop;
end if;
end if;
end Read_One_and_Get_Matches;
procedure Get_supplemental_Matches_from_Repeat_Matches (matches : in out Matches_Type) is
len, ins : Integer;
begin
if matches.count = 0 then
if best_length_for_rep_dist >= MATCH_LEN_MIN then
matches.dl (1).distance := rep_dist (best_rep_dist_index);
matches.dl (1).length := best_length_for_rep_dist;
matches.count := 1;
end if;
end if;
for rep in Repeat_stack_range loop
len := len_rep_dist (rep);
if len >= MATCH_LEN_MIN then
ins := 0;
for i in reverse 1 .. matches.count loop
if len = matches.dl (i).length then
if rep_dist (rep) = matches.dl (i).distance then
null; -- Identical match
else
-- Tie: insert the repeat match of same length into the list.
-- If the longest match strategy is applied, the second item is preferred.
if Has_much_smaller_Distance (matches.dl (i).distance, rep_dist (rep)) then
ins := i; -- Insert before
else
ins := i + 1; -- Insert after
end if;
exit;
-- Ada.Text_IO.Put_Line ("Tie");
end if;
elsif i < matches.count then
if len > matches.dl (i).length and then len < matches.dl (i + 1).length then
-- Insert between existing lengths
ins := i + 1;
exit;
-- We don't add len as the shortest length (worsens compression).
------
-- elsif i = 1
-- and then len >= MATCH_LEN_MIN
-- and then len >= matches.dl (1).length - 1 -- Some reluctance...
-- then
-- ins := 1;
end if;
elsif len > matches.dl (i).length then
-- i = matches.count in this case: add as longest.
ins := i + 1;
exit;
end if;
end loop;
-- We can insert this repeat match at position 'ins'.
if ins > 0 then
for i in reverse ins .. matches.count loop -- Empty if ins > count.
matches.dl (i + 1) := matches.dl (i);
end loop;
matches.dl (ins).distance := rep_dist (rep);
matches.dl (ins).length := len;
matches.count := matches.count + 1;
exit;
end if;
end if;
end loop;
pragma Assert (Are_Matches_Sorted (matches));
end Get_supplemental_Matches_from_Repeat_Matches;
procedure Skip (len : Natural) is
pragma Inline (Skip);
begin
readAhead := readAhead + len;
BT4_Algo.Skip (len);
end Skip;
procedure Reduce_consecutive_max_lengths (m : in out Matches_Type) is
-- Sometimes the BT4 algo returns a long list with consecutive lengths.
-- We try to reduce it, if there is a clear advantage with distances.
begin
while m.count > 1
and then m.dl (m.count).length = m.dl (m.count - 1).length + 1
and then Has_much_smaller_Distance (m.dl (m.count - 1).distance, m.dl (m.count).distance)
loop
m.count := m.count - 1;
end loop;
end Reduce_consecutive_max_lengths;
procedure Show_Matches (m : Matches_Type; phase : String) is
begin
Ada.Text_IO.Put_Line (
phase & " --- Matches: " & Integer'Image (m.count)
);
for i in 1 .. m.count loop
Ada.Text_IO.Put_Line (
" Distance:" & Integer'Image (m.dl (i).distance) &
"; Length:" & Integer'Image (m.dl (i).length)
);
end loop;
end Show_Matches;
pragma Unreferenced (Show_Matches);
matches : Matches_Array (0 .. 1);
current_match_index : Prefetch_Index_Type := 0;
match_trace : DLP_Array (1 .. Max_Length_any_Algo);
procedure Get_Next_Symbol is
new_ld, main : Distance_Length_Pair;
-- This function is for debugging. The matches stored in the 'tree' array
-- may be wrong if the variables cyclicPos, lzPos and readPos are not in sync.
-- The issue seems to have been solved now (rev. 489).
function Is_match_correct (shift : Natural) return Boolean is
begin
for i in reverse -1 + shift .. main.length - 2 + shift loop
if buf (readPos - (main.distance) + i) /= buf (readPos + i) then
return False; -- Should not occur.
end if;
end loop;
return True;
end Is_match_correct;
procedure Send_first_literal_of_match is
begin
Write_Literal (cur_literal);
readAhead := readAhead - 1;
end Send_first_literal_of_match;
procedure Send_DL_code (distance, length : Integer) is
found_repeat : Integer := rep_dist'First - 1;
aux : Integer;
begin
Write_DL_Code (distance, length);
readAhead := readAhead - length;
if LZMA_friendly then
--
-- Manage the stack of recent distances in the same way the "MA" part of LZMA does.
--
for i in rep_dist'Range loop
if distance = rep_dist (i) then
found_repeat := i;
exit;
end if;
end loop;
if found_repeat >= rep_dist'First then
-- Roll the stack of recent distances up to the item with index found_repeat,
-- which becomes first. If found_repeat = rep_dist'First, no actual change occurs.
aux := rep_dist (found_repeat);
for i in reverse rep_dist'First + 1 .. found_repeat loop
rep_dist (i) := rep_dist (i - 1);
end loop;
rep_dist (rep_dist'First) := aux;
else
-- Shift the stack of recent distances; the new distance becomes the first item.
for i in reverse rep_dist'First + 1 .. rep_dist'Last loop
rep_dist (i) := rep_dist (i - 1);
end loop;
rep_dist (0) := distance;
end if;
end if;
end Send_DL_code;
avail, limit : Integer;
index_max_score : Positive;
set_max_score : Prefetch_Index_Type;
hurdle : constant := 40;
begin
-- Get the matches for the next byte unless readAhead indicates
-- that we already got the new matches during the previous call
-- to this procedure.
if readAhead = -1 then
Read_One_and_Get_Matches (matches (current_match_index));
end if;
-- @ if readPos not in buf.all'Range then
-- @ Put("**** " & buf'Last'Img & keepSizeAfter'Img & readPos'Img & writePos'Img);
-- @ end if;
cur_literal := buf (readPos);
-- Get the number of bytes available in the dictionary, but
-- not more than the maximum match length. If there aren't
-- enough bytes remaining to encode a match at all, return
-- immediately to encode this byte as a literal.
avail := Integer'Min (Get_Available, Look_Ahead);
if avail < MATCH_LEN_MIN then
-- Put("[a]");
Send_first_literal_of_match;
return;
end if;
if LZMA_friendly and then best_length_for_rep_dist >= Nice_Length then
Skip (best_length_for_rep_dist - 1);
-- Put_Line("[DL RA]");
Send_DL_code (rep_dist (best_rep_dist_index), best_length_for_rep_dist);
return;
end if;
main := (length => 1, distance => 1);
if matches (current_match_index).count > 0 then
main := matches (current_match_index).dl (matches (current_match_index).count);
if main.length >= Nice_Length then
pragma Assert (Is_match_correct (1));
Skip (main.length - 1);
-- Put_Line("[DL A]" & mainDist'Img & mainLen'Img);
Send_DL_code (main.distance, main.length);
return;
end if;
Reduce_consecutive_max_lengths (matches (current_match_index));
if LZMA_friendly then
Get_supplemental_Matches_from_Repeat_Matches (matches (current_match_index));
end if;
main := matches (current_match_index).dl (matches (current_match_index).count);
--
if main.length = MATCH_LEN_MIN and then main.distance > 128 then
main.length := 1;
end if;
end if;
if LZMA_friendly
and then best_length_for_rep_dist > MATCH_LEN_MIN
and then (best_length_for_rep_dist >= main.length
or else (best_length_for_rep_dist >= main.length - 2 and then main.distance > 2 ** 9)
or else (best_length_for_rep_dist >= main.length - 3 and then main.distance > 2 ** 15))
then
-- Shortcut: we choose the longest repeat match.
Skip (best_length_for_rep_dist - 1);
-- Put_Line("[DL RB]");
Send_DL_code (rep_dist (best_rep_dist_index), best_length_for_rep_dist);
return;
end if;
if main.length < MATCH_LEN_MIN or else avail <= MATCH_LEN_MIN then
-- Put("[b]");
Send_first_literal_of_match;
return;
end if;
-------------------------------------------------------------------------
-- Get the next match. Test if it is better than the current match. --
-- If so, encode the current byte as a literal. --
-------------------------------------------------------------------------
current_match_index := 1 - current_match_index;
Read_One_and_Get_Matches (matches (current_match_index));
--
-- Show_Matches (matches (1 - current_match_index), "------ Old");
-- Show_Matches (matches (current_match_index), " New");
--
if matches (current_match_index).count > 0 then
new_ld := matches (current_match_index).dl (matches (current_match_index).count); -- Longest new match
if (new_ld.length >= main.length + hurdle and then new_ld.distance < main.distance)
or else
(new_ld.length = main.length + hurdle + 1
and then not Has_much_smaller_Distance (main.distance, new_ld.distance))
or else new_ld.length > main.length + hurdle + 1
or else (new_ld.length >= main.length + hurdle - 1
and then main.length >= MATCH_LEN_MIN + 1
and then Has_much_smaller_Distance (new_ld.distance, main.distance))
then
-- We prefer literal, then the new match (or even better!)
Send_first_literal_of_match;
return;
end if;
--
-- Here we compare the scores of both match sets.
--
Reduce_consecutive_max_lengths (matches (current_match_index));
if LZMA_friendly then
Get_supplemental_Matches_from_Repeat_Matches (matches (current_match_index));
end if;
Estimate_DL_Codes (
matches, 1 - current_match_index, (1 => cur_literal),
index_max_score, set_max_score, match_trace
);
if set_max_score = 1 - current_match_index then
-- Old match is seems better.
main := matches (set_max_score).dl (index_max_score);
else
-- We prefer at least a literal, then a new, better match.
Send_first_literal_of_match;
return;
end if;
end if;
if LZMA_friendly then
limit := Integer'Max (main.length - 1, MATCH_LEN_MIN);
for rep in rep_dist'Range loop
if Compute_Match_Length (rep_dist (rep), limit) = limit then
-- A "literal then DL_Code (some distance, main.length - 1)" match
-- is verified and could use the stack of last distances -> got for it!
Send_first_literal_of_match;
return;
end if;
end loop;
end if;
pragma Assert (Is_match_correct (0));
Skip (main.length - 2);
-- Put_Line("[DL B]" & mainDist'Img & mainLen'Img);
Send_DL_code (main.distance, main.length);
end Get_Next_Symbol;
procedure Deallocation is
begin
Dispose (buf);
Dispose (tree);
Dispose (Hash234.hash4Table);
end Deallocation;
actual_written, avail : Integer;
begin
-- NB: heap allocation used only for convenience because of
-- the small default stack sizes on some compilers.
buf := new Byte_Array (0 .. getBufSize);
--
actual_written := Fill_Window (String_buffer_size);
if actual_written > 0 then
loop
Get_Next_Symbol;
avail := Get_Available;
if avail = 0 then
actual_written := Fill_Window (String_buffer_size);
exit when actual_written = 0;
end if;
end loop;
end if;
Deallocation;
exception
when others =>
Deallocation;
raise;
end LZ77_using_BT4;
procedure LZ77_by_Rich is
-- * PROG2.C [lz77a.c] *
-- * Simple Hashing LZ77 Sliding Dictionary Compression Program *
-- * By Rich Geldreich, Jr. October, 1993 *
-- * Originally compiled with QuickC v2.5 in the small model. *
-- * This program uses more efficient code to delete strings from *
-- * the sliding dictionary compared to PROG1.C, at the expense of *
-- * greater memory requirements. See the HashData and DeleteData *
-- * subroutines. *
--
-- Comments by GdM, 2019+ appear in square brackets: [...]
-- Set this to True for a greedy encoder.
GREEDY : constant Boolean := False; -- [original: False]
-- Ratio vs. speed constant [ Is it really a ratio? ].
-- The larger this constant, the better the compression.
MAXCOMPARES : constant := 4096; -- [original: 75; good: 2400; from Info-Zip: 4096]
-- Unused entry code.
NIL : constant := 16#FFFF#;
-- /* bits per symbol- normally 8 for general purpose compression */
-- #define CHARBITS : constant := 8; [ NB: dictionary uses char (byte) ]
-- Minimum match length & maximum match length.
THRESHOLD_Rich : constant := 2;
MATCHBITS : constant := 8; -- [original: 4]
-- [original: 2 ** MATCHBITS + THRESHOLD - 1]
MAXMATCH : constant := 2 ** MATCHBITS + THRESHOLD_Rich; -- 258 is Deflate-friendly.
-- Sliding dictionary size and hash table's size.
-- Some combinations of HASHBITS and THRESHOLD values will not work
-- correctly because of the way this program hashes strings.
DICTBITS : constant := 15; -- [original: 13]
HASHBITS : constant := 13; -- [original: 10]
--
DICTSIZE : constant := 2 ** DICTBITS;
HASHSIZE : constant := 2 ** HASHBITS;
-- # bits to shift after each XOR hash
-- This constant must be high enough so that only THRESHOLD + 1
-- characters are in the hash accumulator at one time.
SHIFTBITS : constant := ((HASHBITS + THRESHOLD_Rich) / (THRESHOLD_Rich + 1));
-- Sector size constants [the dictionary is partitoned in sectors].
SECTORBIT : constant := 13; -- [original: 10; OK: 13]
SECTORLEN : constant := 2 ** SECTORBIT;
HASH_MASK_1 : constant := 16#8000#; -- [ was called HASHFLAG1 ]
HASH_MASK_2 : constant := 16#7FFF#; -- [ was called HASHFLAG2 ]
-- Dictionary plus MAXMATCH extra chars for string comparisions.
dict : array (Integer_M32'(0) .. DICTSIZE + MAXMATCH - 1) of Byte;
subtype Unsigned_int is Unsigned_16;
-- Hash table & link list tables.
-- [ So far we index the hash table with Integer (minimum 16 bit signed) ]
hash : array (0 .. HASHSIZE - 1) of Unsigned_int := (others => NIL);
-- [ nextlink: in lz77a.c: only through DICTSIZE - 1,
-- although Init has: nextlink[DICTSIZE] = NIL. In doubt we set the
-- 'Last to DICTSIZE and fill everything with NIL... ]
nextlink : array (Integer_M32'(0) .. DICTSIZE) of Unsigned_int := (others => NIL);
lastlink : array (Integer_M32'(0) .. DICTSIZE - 1) of Unsigned_int := (others => NIL);
-- Loads dictionary with characters from the input stream.
--
procedure Load_Dict (dictpos : Integer_M32; actually_read : out Integer_M32) is
i : Integer_M32 := 0;
begin
while More_Bytes loop
dict (dictpos + i) := Read_Byte;
i := i + 1;
exit when i = SECTORLEN;
end loop;
-- Since the dictionary is a ring buffer, copy the characters at
-- the very start of the dictionary to the end
-- [this avoids having to use an "and" or a "mod" operator when searching].
--
if dictpos = 0 then
for j in Integer_M32'(0) .. MAXMATCH - 1 loop
dict (j + DICTSIZE) := dict (j);
end loop;
end if;
actually_read := i;
end Load_Dict;
-- Deletes data from the dictionary search structures
-- This is only done when the number of bytes to be
-- compressed exceeds the dictionary's size.
--
procedure Delete_Data (dictpos : Integer_M32) is
j, k : Integer_M32;
begin
-- Delete all references to the sector being deleted.
k := dictpos + SECTORLEN;
for i in dictpos .. k - 1 loop
j := Integer_M32 (lastlink (i));
if (Unsigned_int (j) and HASH_MASK_1) /= 0 then
if j /= NIL then
hash (Integer (Unsigned_int (j) and HASH_MASK_2)) := NIL;
end if;
else
nextlink (j) := NIL;
end if;
end loop;
end Delete_Data;
-- Hash data just entered into dictionary.
-- XOR hashing is used here, but practically any hash function will work.
--
procedure Hash_Data (dictpos, bytestodo : Integer_M32) is
j : Integer;
k : Integer_M32;
begin
if bytestodo <= THRESHOLD_Rich then -- Not enough bytes in sector for match?
nextlink (dictpos .. dictpos + bytestodo - 1) := (others => NIL);
lastlink (dictpos .. dictpos + bytestodo - 1) := (others => NIL);
else
-- Matches can't cross sector boundaries.
nextlink (dictpos + bytestodo - THRESHOLD_Rich .. dictpos + bytestodo - 1) := (others => NIL);
lastlink (dictpos + bytestodo - THRESHOLD_Rich .. dictpos + bytestodo - 1) := (others => NIL);
j := Integer (
Shift_Left (Unsigned_int (dict (dictpos)), SHIFTBITS)
xor
Unsigned_int (dict (dictpos + 1))
);
k := dictpos + bytestodo - THRESHOLD_Rich; -- Calculate end of sector.
for i in dictpos .. k - 1 loop
j := Integer (
(Shift_Left (Unsigned_int (j), SHIFTBITS) and (HASHSIZE - 1))
xor
Unsigned_int (dict (i + THRESHOLD_Rich))
);
lastlink (i) := Unsigned_int (j) or HASH_MASK_1;
nextlink (i) := hash (j);
if nextlink (i) /= NIL then
lastlink (Integer_M32 (nextlink (i))) := Unsigned_int (i);
end if;
hash (j) := Unsigned_int (i);
end loop;
end if;
end Hash_Data;
matchlength, matchpos : Integer_M32;
-- Finds match for string at position dictpos.
-- This search code finds the longest AND closest
-- match for the string at dictpos.
--
procedure Find_Match (dictpos, startlen : Integer_M32) is
i, j : Integer_M32;
match_byte : Byte;
begin
i := dictpos;
matchlength := startlen;
match_byte := dict (dictpos + matchlength);
--
Chances :
for compare_count in 1 .. MAXCOMPARES loop
i := Integer_M32 (nextlink (i)); -- Get next string in list.
if i = NIL then
return;
end if;
--
if dict (i + matchlength) = match_byte then -- Possible larger match?
j := 0;
-- Compare strings.
loop
exit when dict (dictpos + j) /= dict (i + j);
j := j + 1;
exit when j = MAXMATCH;
end loop;
--
if j > matchlength then -- Found larger match?
matchlength := j;
matchpos := i;
if matchlength = MAXMATCH then
return; -- Exit if largest possible match.
end if;
match_byte := dict (dictpos + matchlength);
end if;
end if;
end loop Chances; -- Keep on trying until we run out of chances.
end Find_Match;
-- Finds dictionary matches for characters in current sector.
--
procedure Dict_Search (dictpos, bytestodo : Integer_M32) is
i, j : Integer_M32;
matchlen1, matchpos1 : Integer_M32;
--
procedure Write_literal_pos_i is
pragma Inline (Write_literal_pos_i);
begin
Write_Literal (dict (i));
i := i + 1;
j := j - 1;
end Write_literal_pos_i;
begin
i := dictpos;
j := bytestodo;
if not GREEDY then -- Non-greedy search loop (slow).
while j /= 0 loop -- Loop while there are still characters left to be compressed.
Find_Match (i, THRESHOLD_Rich);
if matchlength > THRESHOLD_Rich then
matchlen1 := matchlength;
matchpos1 := matchpos;
loop
Find_Match (i + 1, matchlen1);
if matchlength > matchlen1 then
matchlen1 := matchlength;
matchpos1 := matchpos;
Write_literal_pos_i;
else
if matchlen1 > j then
matchlen1 := j;
if matchlen1 <= THRESHOLD_Rich then
Write_literal_pos_i;
exit;
end if;
end if;
Write_DL_Code (
length => Integer (matchlen1),
-- [The subtraction happens modulo 2**n, needs to be cleaned modulo 2**DICTSIZE]
distance => Integer ((Unsigned_32 (i) - Unsigned_32 (matchpos1)) and (DICTSIZE - 1))
);
i := i + matchlen1;
j := j - matchlen1;
exit;
end if;
end loop;
else
Write_literal_pos_i;
end if;
end loop; -- while j /= 0
else -- Greedy search loop (fast).
while j /= 0 loop -- Loop while there are still characters left to be compressed.
Find_Match (i, THRESHOLD_Rich);
if matchlength > j then
matchlength := j; -- Clamp matchlength.
end if;
if matchlength > THRESHOLD_Rich then -- Valid match?
Write_DL_Code (
length => Integer (matchlength),
-- [The subtraction happens modulo 2**n, needs to be cleaned modulo 2**DICTSIZE]
distance => Integer ((Unsigned_32 (i) - Unsigned_32 (matchpos)) and (DICTSIZE - 1))
);
i := i + matchlength;
j := j - matchlength;
else
Write_literal_pos_i;
end if;
end loop;
end if; -- Greedy or not.
end Dict_Search;
procedure Encode_Rich is
dictpos, actual_read : Integer_M32 := 0;
deleteflag : Boolean := False;
begin
loop
-- Delete old data from dictionary.
if deleteflag then
Delete_Data (dictpos);
end if;
-- Grab more data to compress.
Load_Dict (dictpos, actual_read);
exit when actual_read = 0;
-- Hash the data.
Hash_Data (dictpos, actual_read);
-- Find dictionary matches.
Dict_Search (dictpos, actual_read);
dictpos := dictpos + SECTORLEN;
-- Wrap back to beginning of dictionary when it's full.
if dictpos = DICTSIZE then
dictpos := 0;
deleteflag := True; -- Ok to delete now.
end if;
end loop;
end Encode_Rich;
begin
Encode_Rich;
end LZ77_by_Rich;
-- The following is for research purposes: compare different LZ77
-- algorithms applied to entropy encoders (Deflate, LZMA, ...).
procedure LZ77_from_Dump_File is
LZ77_Dump : Ada.Text_IO.File_Type;
tag : String (1 .. 3);
Wrong_LZ77_tag : exception;
a, b : Integer;
dummy : Byte;
use Ada.Integer_Text_IO;
begin
-- Pretend we compress the given stream.
-- Entire stream is consumed here.
while More_Bytes loop
dummy := Read_Byte;
end loop;
-- Now send dumped LZ77 data further.
Ada.Text_IO.Open (LZ77_Dump, Ada.Text_IO.In_File, "dump.lz77");
-- File from UnZip.Decompress, or LZMA.Decoding, some_trace = True mode
while not Ada.Text_IO.End_Of_File (LZ77_Dump) loop
Ada.Text_IO.Get (LZ77_Dump, tag);
if tag = "Lit" then
Get (LZ77_Dump, a);
Write_Literal (Byte (a));
elsif tag = "DLE" then
Get (LZ77_Dump, a);
Get (LZ77_Dump, b);
Write_DL_Code (a, b);
else
raise Wrong_LZ77_tag;
end if;
Ada.Text_IO.Skip_Line (LZ77_Dump);
end loop;
Ada.Text_IO.Close (LZ77_Dump);
end LZ77_from_Dump_File;
begin
case Method is
when LZHuf =>
LZ77_using_LZHuf;
when IZ_4 .. IZ_10 =>
LZ77_using_IZ (4 + Method_Type'Pos (Method) - Method_Type'Pos (IZ_4));
when BT4 =>
LZ77_using_BT4;
when Rich =>
LZ77_by_Rich;
when No_LZ77 =>
while More_Bytes loop
Write_Literal (Read_Byte);
end loop;
when Read_LZ77_Codes =>
LZ77_from_Dump_File;
end case;
end Encode;
end LZ77;
Zip-Ada: Ada library for zip archive files (.zip).
Ada programming.
Some news about Zip-Ada and other Ada projects
on Gautier's blog.