Source file : huffman-encoding-length_limited_coding.adb
1 -- Huffman.Encoding.Length_Limited_Coding
2 ------------------------------------------
3 -- Legal licensing note:
4
5 -- Copyright (c) 2016 .. 2019 Gautier de Montmollin (maintainer of the Ada version)
6 -- SWITZERLAND
7 --
8 -- The copyright holder is only the maintainer of the Ada version;
9 -- authors of the C code and those of the algorithm are cited below.
10
11 -- Permission is hereby granted, free of charge, to any person obtaining a copy
12 -- of this software and associated documentation files (the "Software"), to deal
13 -- in the Software without restriction, including without limitation the rights
14 -- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15 -- copies of the Software, and to permit persons to whom the Software is
16 -- furnished to do so, subject to the following conditions:
17
18 -- The above copyright notice and this permission notice shall be included in
19 -- all copies or substantial portions of the Software.
20
21 -- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 -- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 -- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24 -- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 -- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26 -- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27 -- THE SOFTWARE.
28
29 -- NB: this is the MIT License, as found 21-Aug-2016 on the site
30 -- http://www.opensource.org/licenses/mit-license.php
31
32 -- Author: lode.vandevenne [*] gmail [*] com (Lode Vandevenne)
33 -- Author: jyrki.alakuijala [*] gmail [*] com (Jyrki Alakuijala)
34
35 -- Bounded package merge algorithm, based on the paper
36 -- "A Fast and Space-Economical Algorithm for Length-Limited Coding
37 -- Jyrki Katajainen, Alistair Moffat, Andrew Turpin".
38
39 -- Translated by G. de Montmollin to Ada from katajainen.c (Zopfli project), 7-Feb-2016
40 --
41 -- Main technical differences to katajainen.c:
42 -- - pointers are not used, array indices instead
43 -- - all structures are allocated on stack
44 -- - sub-programs are nested, then unneeded parameters are removed
45
46 procedure Huffman.Encoding.Length_Limited_Coding
47 (frequencies : in Count_Array;
48 bit_lengths : out Length_Array)
49 is
50 subtype Index_Type is Count_Type;
51
52 null_index : constant Index_Type := Index_Type'Last;
53
54 -- Nodes forming chains.
55 type Node is record
56 weight : Count_Type;
57 count : Count_Type; -- Number of leaves before this chain.
58 tail : Index_Type := null_index; -- Previous node(s) of this chain, or null_index if none.
59 in_use : Boolean := False; -- Tracking for garbage collection.
60 end record;
61
62 type Leaf_Node is record
63 weight : Count_Type;
64 symbol : Alphabet;
65 end record;
66
67 -- Memory pool for nodes.
68 pool : array (0 .. Index_Type (2 * max_bits * (max_bits + 1) - 1)) of Node;
69 pool_next : Index_Type := pool'First;
70
71 type Index_pair is array (Index_Type'(0) .. 1) of Index_Type;
72 lists : array (0 .. Index_Type (max_bits - 1)) of Index_pair;
73
74 type Leaf_array is array (Index_Type range <>) of Leaf_Node;
75 leaves : Leaf_array (0 .. frequencies'Length - 1);
76
77 num_symbols : Count_Type := 0; -- Amount of symbols with frequency > 0.
78 num_Boundary_PM_runs : Count_Type;
79
80 too_many_symbols_for_length_limit : exception;
81 zero_length_but_nonzero_frequency : exception;
82 nonzero_length_but_zero_frequency : exception;
83 length_exceeds_length_limit : exception;
84 buggy_sorting : exception;
85
86 procedure Init_Node (weight, count : Count_Type; tail, node_idx : Index_Type) is
87 begin
88 pool (node_idx).weight := weight;
89 pool (node_idx).count := count;
90 pool (node_idx).tail := tail;
91 pool (node_idx).in_use := True;
92 end Init_Node;
93
94 -- Finds a free location in the memory pool. Performs garbage collection if needed.
95 -- If use_lists = True, used to mark in-use nodes during garbage collection.
96
97 function Get_Free_Node (use_lists : Boolean) return Index_Type is
98 node_idx : Index_Type;
99 begin
100 loop
101 if pool_next > pool'Last then
102 -- Garbage collection.
103 for i in pool'Range loop
104 pool (i).in_use := False;
105 end loop;
106 if use_lists then
107 for i in 0 .. Index_Type (max_bits * 2 - 1) loop
108 node_idx := lists (i / 2)(i mod 2);
109 while node_idx /= null_index loop
110 pool (node_idx).in_use := True;
111 node_idx := pool (node_idx).tail;
112 end loop;
113 end loop;
114 end if;
115 pool_next := pool'First;
116 end if;
117 exit when not pool (pool_next).in_use; -- Found one.
118 pool_next := pool_next + 1;
119 end loop;
120 pool_next := pool_next + 1;
121 return pool_next - 1;
122 end Get_Free_Node;
123
124 -- Performs a Boundary Package-Merge step. Puts a new chain in the given list. The
125 -- new chain is, depending on the weights, a leaf or a combination of two chains
126 -- from the previous list.
127 -- index: The index of the list in which a new chain or leaf is required.
128 -- final: Whether this is the last time this function is called. If it is then it
129 -- is no more needed to recursively call self.
130
131 procedure Boundary_PM (index : Index_Type; final : Boolean) is
132 newchain : Index_Type;
133 oldchain : Index_Type;
134 lastcount : constant Count_Type := pool (lists (index)(1)).count; -- Count of last chain of list.
135 sum : Count_Type;
136 begin
137 if index = 0 and lastcount >= num_symbols then
138 return;
139 end if;
140 newchain := Get_Free_Node (use_lists => True);
141 oldchain := lists (index)(1);
142 -- These are set up before the recursive calls below, so that there is a list
143 -- pointing to the new node, to let the garbage collection know it's in use.
144 lists (index) := (oldchain, newchain);
145
146 if index = 0 then
147 -- New leaf node in list 0.
148 Init_Node (leaves (lastcount).weight, lastcount + 1, null_index, newchain);
149 else
150 sum := pool (lists (index - 1)(0)).weight + pool (lists (index - 1)(1)).weight;
151 if lastcount < num_symbols and then sum > leaves (lastcount).weight then
152 -- New leaf inserted in list, so count is incremented.
153 Init_Node (leaves (lastcount).weight, lastcount + 1, pool (oldchain).tail, newchain);
154 else
155 Init_Node (sum, lastcount, lists (index - 1)(1), newchain);
156 if not final then
157 -- Two lookahead chains of previous list used up, create new ones.
158 Boundary_PM (index - 1, False);
159 Boundary_PM (index - 1, False);
160 end if;
161 end if;
162 end if;
163 end Boundary_PM;
164
165 -- Initializes each list with as lookahead chains the two leaves with lowest weights.
166
167 procedure Init_Lists is
168 node0 : constant Index_Type := Get_Free_Node (use_lists => False);
169 node1 : constant Index_Type := Get_Free_Node (use_lists => False);
170 begin
171 Init_Node (leaves (0).weight, 1, null_index, node0);
172 Init_Node (leaves (1).weight, 2, null_index, node1);
173 lists := (others => (node0, node1));
174 end Init_Lists;
175
176 -- Converts result of boundary package-merge to the bit_lengths. The result in the
177 -- last chain of the last list contains the amount of active leaves in each list.
178 -- chain: Chain to extract the bit length from (last chain from last list).
179
180 procedure Extract_Bit_Lengths (chain : Index_Type) is
181 node_idx : Index_Type := chain;
182 begin
183 while node_idx /= null_index loop
184 for i in 0 .. pool (node_idx).count - 1 loop
185 bit_lengths (leaves (i).symbol) := bit_lengths (leaves (i).symbol) + 1;
186 end loop;
187 node_idx := pool (node_idx).tail;
188 end loop;
189 end Extract_Bit_Lengths;
190
191 function "<"(a, b : Leaf_Node) return Boolean is
192 begin
193 return a.weight < b.weight;
194 end "<";
195
196 procedure Quick_sort (a : in out Leaf_array) is
197 n : constant Index_Type := a'Length;
198 i, j : Index_Type;
199 p, t : Leaf_Node;
200 begin
201 if n < 2 then
202 return;
203 end if;
204 p := a (n / 2 + a'First);
205 i := 0;
206 j := n - 1;
207 loop
208 while a (i + a'First) < p loop
209 i := i + 1;
210 end loop;
211 while p < a (j + a'First) loop
212 j := j - 1;
213 end loop;
214 exit when i >= j;
215 t := a (i + a'First);
216 a (i + a'First) := a (j + a'First);
217 a (j + a'First) := t;
218 i := i + 1;
219 j := j - 1;
220 end loop;
221 Quick_sort (a (a'First .. a'First + i - 1));
222 Quick_sort (a (a'First + i .. a'Last));
223 end Quick_sort;
224
225 paranoid : constant Boolean := False;
226
227 begin
228 bit_lengths := (others => 0);
229 -- Count used symbols and place them in the leaves.
230 for a in Alphabet loop
231 if frequencies (a) > 0 then
232 leaves (num_symbols) := (frequencies (a), a);
233 num_symbols := num_symbols + 1;
234 end if;
235 end loop;
236 -- Check special cases and error conditions.
237 if num_symbols > 2 ** max_bits then
238 raise too_many_symbols_for_length_limit; -- Error, too few max_bits to represent symbols.
239 end if;
240 if num_symbols = 0 then
241 return; -- No symbols at all. OK.
242 end if;
243 if num_symbols = 1 then
244 bit_lengths (leaves (0).symbol) := 1;
245 return; -- Only one symbol, give it bit length 1, not 0. OK.
246 end if;
247 -- Sort the leaves from lightest to heaviest.
248 Quick_sort (leaves (0 .. num_symbols - 1));
249 if paranoid then
250 for i in 1 .. num_symbols - 1 loop
251 if leaves (i) < leaves (i - 1) then
252 raise buggy_sorting;
253 end if;
254 end loop;
255 end if;
256 Init_Lists;
257 -- In the last list, 2 * num_symbols - 2 active chains need to be created. Two
258 -- are already created in the initialization. Each Boundary_PM run creates one.
259 num_Boundary_PM_runs := 2 * num_symbols - 4;
260 for i in 1 .. num_Boundary_PM_runs loop
261 Boundary_PM (Index_Type (max_bits - 1), i = num_Boundary_PM_runs);
262 end loop;
263 Extract_Bit_Lengths (lists (Index_Type (max_bits - 1))(1));
264 if paranoid then
265 -- Done; some checks before leaving. Not checked: completeness of Huffman codes.
266 for a in Alphabet loop
267 if frequencies (a) = 0 then
268 if bit_lengths (a) > 0 then
269 raise nonzero_length_but_zero_frequency; -- Never happened so far
270 end if;
271 else
272 if bit_lengths (a) = 0 then
273 raise zero_length_but_nonzero_frequency; -- Happened before null_index fix
274 elsif bit_lengths (a) > max_bits then
275 raise length_exceeds_length_limit; -- Never happened so far
276 end if;
277 end if;
278 end loop;
279 end if;
280 end Huffman.Encoding.Length_Limited_Coding;
Web view of Ada source code generated by GNATHTML, project: ALI_Parse version 1.0.
Zip-Ada: Ada library for zip archive files (.zip).
Ada programming.
Some news about Zip-Ada and other Ada projects
on Gautier's blog.