Source file : bzip2-decoding.adb
1 -- Legal licensing note:
2
3 -- Copyright (c) 2009 .. 2024 Gautier de Montmollin (maintainer of the Ada version)
4 -- SWITZERLAND
5
6 -- Permission is hereby granted, free of charge, to any person obtaining a copy
7 -- of this software and associated documentation files (the "Software"), to deal
8 -- in the Software without restriction, including without limitation the rights
9 -- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 -- copies of the Software, and to permit persons to whom the Software is
11 -- furnished to do so, subject to the following conditions:
12
13 -- The above copyright notice and this permission notice shall be included in
14 -- all copies or substantial portions of the Software.
15
16 -- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 -- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 -- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 -- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 -- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 -- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 -- THE SOFTWARE.
23
24 -- NB: this is the MIT License, as found 21-Aug-2016 on the site
25 -- http://www.opensource.org/licenses/mit-license.php
26
27 -- Translated on 20-Oct-2009 by (New) P2Ada v. 15-Nov-2006
28 -- Rework by G. de Montmollin (see spec. for details)
29
30 with Ada.Text_IO, Ada.Unchecked_Deallocation;
31
32 package body BZip2.Decoding is
33
34 procedure Decompress is
35
36 --------------------------
37 -- Byte & Bit buffers --
38 --------------------------
39
40 bits_available : Natural := 0;
41 read_data : Byte := 0;
42 use Interfaces;
43
44 function Get_Bits (n : Natural) return Byte is
45 result_get_bits : Byte;
46 data : Byte;
47 begin
48 if n > bits_available then
49 data := Read_Byte;
50 result_get_bits := Shift_Right (read_data, 8 - n) or Shift_Right (data, 8 - (n - bits_available));
51 read_data := Shift_Left (data, n - bits_available);
52 bits_available := bits_available + 8;
53 else
54 result_get_bits := Shift_Right (read_data, 8 - n);
55 read_data := Shift_Left (read_data, n);
56 end if;
57 bits_available := bits_available - n;
58 return result_get_bits;
59 end Get_Bits;
60
61 function Get_Bits_32 (n : Natural) return Unsigned_32 is
62 begin
63 return Unsigned_32 (Get_Bits (n));
64 end Get_Bits_32;
65
66 function Get_Boolean return Boolean is
67 begin
68 return Boolean'Val (Get_Bits (1));
69 end Get_Boolean;
70
71 function Get_Byte return Byte is
72 begin
73 return Get_Bits (8);
74 end Get_Byte;
75
76 function Get_Cardinal_24 return Unsigned_32 is
77 begin
78 return Shift_Left (Get_Bits_32 (8), 16) or Shift_Left (Get_Bits_32 (8), 8) or Get_Bits_32 (8);
79 end Get_Cardinal_24;
80
81 function Get_Cardinal_32 return Unsigned_32 is
82 begin
83 return Shift_Left (Get_Bits_32 (8), 24) or
84 Shift_Left (Get_Bits_32 (8), 16) or
85 Shift_Left (Get_Bits_32 (8), 8) or
86 Get_Bits_32 (8);
87 end Get_Cardinal_32;
88
89 seq_to_unseq : array (0 .. 255) of Natural;
90 inuse_count : Natural;
91
92 -- Receive the mapping table. To save space, the in_use set is stored in pieces of 16 bits.
93 -- First 16 bits store which pieces of 16 bits are used, then the pieces follow.
94 procedure Receive_Mapping_Table is
95 in_use : array (0 .. 15) of Boolean;
96 begin
97 -- Receive the first 16 bits which tell which pieces are stored.
98 for i in in_use'Range loop
99 in_use (i) := Get_Boolean;
100 end loop;
101 -- Receive the used pieces.
102 inuse_count := 0;
103 for i in in_use'Range loop
104 if in_use (i) then
105 for j in 0 .. 15 loop
106 if Get_Boolean then
107 seq_to_unseq (inuse_count) := 16 * i + j;
108 inuse_count := inuse_count + 1;
109 end if;
110 end loop;
111 end if;
112 end loop;
113 end Receive_Mapping_Table;
114
115 entropy_coder_count : Byte;
116 selector_count : Natural;
117 selector, selector_mtf : array (0 .. max_selectors) of Byte;
118
119 trace : constant Boolean := False;
120
121 procedure Receive_Selectors is
122 value : array (Byte range 0 .. max_entropy_coders - 1) of Byte;
123 j, tmp, v : Byte;
124 begin
125
126 entropy_coder_count := Get_Bits (3);
127 if entropy_coder_count not in min_entropy_coders .. max_entropy_coders then
128 raise data_error
129 with
130 "Invalid BZip2 entropy coder count:" & entropy_coder_count'Image &
131 ", should be between" & min_entropy_coders'Image & " and" & max_entropy_coders'Image;
132 end if;
133 selector_count := Natural (Shift_Left (Get_Bits_32 (8), 7) or Get_Bits_32 (7)); -- Up to 32767.
134 if selector_count > max_selectors then
135 raise data_error with "Invalid BZip2 selector count, maximum is" & max_selectors'Image;
136 -- With standard settings, the maximum value is 18002.
137 end if;
138
139 if trace then
140 Ada.Text_IO.Put_Line ("Entropy coders:" & entropy_coder_count'Image);
141 Ada.Text_IO.Put_Line ("Selectors: . . " & selector_count'Image);
142 end if;
143
144 -- 1) Receive selector list, MTF-transformed:
145 for i in 0 .. selector_count - 1 loop
146 j := 0;
147 while Get_Boolean loop
148 j := j + 1;
149 if j > 5 then
150 raise data_error
151 with
152 "Invalid BZip2 entropy coder index, maximum is" &
153 Integer'Image (max_entropy_coders - 1);
154 end if;
155 end loop;
156 selector_mtf (i) := j;
157 end loop;
158
159 -- 2) De-transform selectors list:
160 for w in Byte range 0 .. entropy_coder_count - 1 loop
161 -- We start with 0, 1, 2, 3, ...:
162 value (w) := w;
163 end loop;
164 Undo_MTF_Values_For_Selectors :
165 for i in 0 .. selector_count - 1 loop
166 v := selector_mtf (i);
167 -- Move pos (v) to the front.
168 tmp := value (v);
169 while v /= 0 loop
170 value (v) := value (v - 1);
171 v := v - 1;
172 end loop;
173 value (0) := tmp;
174 selector (i) := tmp;
175 end loop Undo_MTF_Values_For_Selectors;
176
177 end Receive_Selectors;
178
179 type Alphabet_U32_array is array (0 .. max_alphabet_size) of Unsigned_32;
180 type Alphabet_Nat_array is array (0 .. max_alphabet_size) of Natural;
181
182 procedure Create_Huffman_Decoding_Tables
183 (limit, base, perm : in out Alphabet_U32_array;
184 length : in Alphabet_Nat_array;
185 min_len, max_len : in Natural;
186 alphabet_size : in Integer)
187 is
188 pp, idx : Integer;
189 vec : Unsigned_32;
190 begin
191 pp := 0;
192 for i in min_len .. max_len loop
193 for j in 0 .. alphabet_size - 1 loop
194 if length (j) = i then
195 perm (pp) := Unsigned_32 (j);
196 pp := pp + 1;
197 end if;
198 end loop;
199 end loop;
200 for i in 0 .. max_code_len_max - 1 loop
201 base (i) := 0;
202 limit (i) := 0;
203 end loop;
204 for i in 0 .. alphabet_size - 1 loop
205 idx := length (i) + 1;
206 base (idx) := base (idx) + 1;
207 end loop;
208 for i in 1 .. max_code_len_max - 1 loop
209 base (i) := base (i) + base (i - 1);
210 end loop;
211 vec := 0;
212 for i in min_len .. max_len loop
213 vec := vec + base (i + 1) - base (i);
214 limit (i) := vec - 1;
215 vec := vec * 2;
216 end loop;
217 for i in min_len + 1 .. max_len loop
218 base (i) := (limit (i - 1) + 1) * 2 - base (i);
219 end loop;
220 end Create_Huffman_Decoding_Tables;
221
222 type U32_Array is array (Natural_32 range <>) of Unsigned_32;
223 type U32_Array_Access is access U32_Array;
224 procedure Dispose is new Ada.Unchecked_Deallocation (U32_Array, U32_Array_Access);
225
226 alphabet_size_overall : Natural; -- Alphabet size used for all groups
227
228 -- Tables for the Huffman trees used for decoding MTF values.
229 limit, base, perm : array (Byte range 0 .. max_entropy_coders - 1) of Alphabet_U32_array;
230 min_lens : array (Byte range 0 .. max_entropy_coders - 1) of Natural;
231 len : array (Byte range 0 .. max_entropy_coders - 1) of Alphabet_Nat_array;
232
233 procedure Receive_Huffman_Bit_Lengths is
234 current_bit_length : Natural;
235 begin
236 for t in 0 .. entropy_coder_count - 1 loop
237 current_bit_length := Natural (Get_Bits (5));
238 if current_bit_length not in 1 .. max_code_len_bzip2_1_0_2 then
239 raise data_error with
240 "In BZip2 data, invalid initial bit length for a Huffman tree: got length" &
241 current_bit_length'Image & "; range should be 1 .." & max_code_len_bzip2_1_0_2'Image;
242 end if;
243 for symbol in 0 .. alphabet_size_overall - 1 loop
244 loop
245 exit when not Get_Boolean;
246 if Get_Boolean then
247 current_bit_length := current_bit_length - 1;
248 else
249 current_bit_length := current_bit_length + 1;
250 end if;
251 end loop;
252 if current_bit_length not in 1 .. max_code_len_bzip2_1_0_2 then
253 raise data_error with
254 "In BZip2 data, invalid bit length for a Huffman tree: for symbol " &
255 symbol'Image & " got length" &
256 current_bit_length'Image & "; range should be 1 .." & max_code_len_bzip2_1_0_2'Image;
257 end if;
258 len (t)(symbol) := current_bit_length;
259 end loop;
260 end loop;
261 end Receive_Huffman_Bit_Lengths;
262
263 procedure Make_Huffman_Tables is
264 min_len, max_len : Natural;
265 begin
266 for t in 0 .. entropy_coder_count - 1 loop
267 min_len := 32;
268 max_len := 0;
269 for i in 0 .. alphabet_size_overall - 1 loop
270 if len (t)(i) > max_len then
271 max_len := len (t)(i);
272 end if;
273 if len (t)(i) < min_len then
274 min_len := len (t)(i);
275 end if;
276 end loop;
277 Create_Huffman_Decoding_Tables
278 (limit (t), base (t), perm (t), len (t), min_len, max_len, alphabet_size_overall);
279 min_lens (t) := min_len;
280 end loop;
281 end Make_Huffman_Tables;
282
283 block_size : Natural_32;
284 tt : U32_Array_Access;
285
286 -------------------------
287 -- MTF - Move To Front --
288 -------------------------
289
290 cf_tab : array (0 .. 257) of Natural_32;
291 tt_count : Natural_32;
292
293 procedure Receive_MTF_Values is
294 -- NB: it seems that MTF is also performed in this procedure (where else?).
295 mtf_a_size : constant := 4096;
296 mtf_l_size : constant := 16;
297 mtf_base : array (0 .. 256 / mtf_l_size - 1) of Natural;
298 mtf_a : array (0 .. mtf_a_size - 1) of Natural;
299 --
300 procedure Init_MTF is
301 k : Natural := mtf_a_size - 1;
302 begin
303 for i in reverse 0 .. 256 / mtf_l_size - 1 loop
304 for j in reverse 0 .. mtf_l_size - 1 loop
305 mtf_a (k) := i * mtf_l_size + j;
306 k := k - 1;
307 end loop;
308 mtf_base (i) := k + 1;
309 end loop;
310 end Init_MTF;
311 --
312 pos_countdown, group_no : Integer;
313 g_sel : Byte;
314 g_min_len : Natural;
315 --
316 function Get_MTF_Value return Unsigned_32 is
317 z_n : Natural;
318 z_vec : Unsigned_32;
319 perm_index : Integer;
320 begin
321 if pos_countdown = 0 then
322 pos_countdown := group_size;
323 group_no := group_no + 1;
324 if group_no > selector_count - 1 then
325 raise data_error
326 with
327 "In BZip2 data, selector index exceeds selector count," &
328 selector_count'Image;
329 end if;
330 g_sel := selector (group_no);
331 if g_sel not in base'Range then
332 raise data_error
333 with "In BZip2 data, invalid selector value," & g_sel'Image;
334 end if;
335 g_min_len := min_lens (g_sel);
336 end if;
337 pos_countdown := pos_countdown - 1;
338 z_n := g_min_len;
339 z_vec := Get_Bits_32 (z_n);
340 while z_vec > limit (g_sel)(z_n) loop
341 z_n := z_n + 1;
342 z_vec := Shift_Left (z_vec, 1) or Get_Bits_32 (1);
343 end loop;
344 if z_n not in Alphabet_U32_array'Range then
345 raise data_error with "In BZip2 data, invalid data in Huffman decoding [1]";
346 end if;
347 if z_vec > 2 ** (Integer'Size - 1) - 1 then
348 raise data_error with "In BZip2 data, invalid data in Huffman decoding [2]";
349 end if;
350 perm_index := Integer (z_vec - base (g_sel)(z_n));
351 if perm_index not in Alphabet_U32_array'Range then
352 raise data_error with "In BZip2 data, invalid data in Huffman decoding [3]";
353 end if;
354 return perm (g_sel)(perm_index);
355 end Get_MTF_Value;
356 --
357 procedure Move_MTF_Block is
358 j, k : Natural;
359 begin
360 k := mtf_a_size;
361 for i in reverse 0 .. 256 / mtf_l_size - 1 loop
362 j := mtf_base (i);
363 mtf_a (k - 16 .. k - 1) := mtf_a (j .. j + 15);
364 k := k - 16;
365 mtf_base (i) := k;
366 end loop;
367 end Move_MTF_Block;
368 --
369 t : Natural_32;
370 next_sym : Unsigned_32;
371 es : Natural_32;
372 n : Natural;
373 p, q : Natural; -- indexes mtf_a
374 u, v : Natural; -- indexes mtf_base
375 lno, off : Natural;
376
377 procedure Setup_Table is
378 -- Setup cf_tab to facilitate generation of inverse transformation.
379 t, nn : Natural_32;
380 begin
381 t := 0;
382 for i in 0 .. 256 loop
383 nn := cf_tab (i);
384 cf_tab (i) := t;
385 t := t + nn;
386 end loop;
387 end Setup_Table;
388
389 nn : Natural;
390
391 begin -- Receive_MTF_Values
392 group_no := -1;
393 pos_countdown := 0;
394 t := 0;
395 cf_tab := (others => 0);
396 Init_MTF;
397 next_sym := Get_MTF_Value;
398 --
399 while Natural (next_sym) /= inuse_count + 1 loop
400 if next_sym <= run_b then
401 es := 0;
402 n := 0;
403 loop
404 es := es + Natural_32 (Shift_Left (next_sym + 1, n));
405 n := n + 1;
406 next_sym := Get_MTF_Value;
407 exit when next_sym > run_b;
408 end loop;
409 n := seq_to_unseq (mtf_a (mtf_base (0)));
410 cf_tab (n) := cf_tab (n) + es;
411 if t + es > sub_block_size * block_size then
412 raise data_error with "Index out of block's range [1]";
413 end if;
414 while es > 0 loop
415 tt (t) := Unsigned_32 (n);
416 es := es - 1;
417 t := t + 1;
418 end loop;
419 else
420 -- NB: Likely, the reverse MTF algo happens here.
421 nn := Natural (next_sym - 1); -- Here we know: next_sym > 1, nn > 0.
422 if nn < mtf_l_size then
423 -- Avoid the costs of the general case.
424 p := mtf_base (0);
425 q := p + nn; -- We know: q > p.
426 n := mtf_a (q);
427 loop
428 mtf_a (q) := mtf_a (q - 1);
429 q := q - 1;
430 exit when q = p;
431 end loop;
432 mtf_a (q) := n;
433 else
434 -- General case.
435 lno := nn / mtf_l_size;
436 off := nn mod mtf_l_size;
437 p := mtf_base (lno);
438 q := p + off; -- q >= p
439 n := mtf_a (q);
440 while q /= p loop
441 mtf_a (q) := mtf_a (q - 1);
442 q := q - 1;
443 end loop;
444 u := mtf_base'First;
445 v := u + lno;
446 loop
447 mtf_a (mtf_base (v)) := mtf_a (mtf_base (v - 1) + mtf_l_size - 1);
448 v := v - 1;
449 mtf_base (v) := mtf_base (v) - 1;
450 exit when v = u;
451 end loop;
452 mtf_a (mtf_base (v)) := n;
453 if mtf_base (v) = 0 then
454 Move_MTF_Block;
455 end if;
456 end if;
457 cf_tab (seq_to_unseq (n)) := cf_tab (seq_to_unseq (n)) + 1;
458 tt (t) := Unsigned_32 (seq_to_unseq (n));
459 t := t + 1;
460 if t > sub_block_size * block_size then
461 raise data_error with "Index out of block's range [2]";
462 end if;
463 next_sym := Get_MTF_Value;
464 end if;
465 end loop;
466 tt_count := t;
467 Setup_Table;
468 end Receive_MTF_Values;
469
470 procedure BWT_Detransform is
471 a : Unsigned_32 := 0;
472 r : Natural_32;
473 i255 : Natural;
474 begin
475 for p in 0 .. tt_count - 1 loop
476 i255 := Natural (tt (p) and 16#ff#);
477 r := cf_tab (i255);
478 cf_tab (i255) := cf_tab (i255) + 1;
479 tt (r) := tt (r) or a;
480 a := a + 16#100#;
481 end loop;
482 end BWT_Detransform;
483
484 computed_combined_crc, computed_block_crc : Unsigned_32;
485 block_origin : Natural_32 := 0;
486
487 block_counter : Natural := 0;
488
489 procedure RLE_1 is
490 decode_available : Natural_32 := Natural_32'Last;
491 next_rle_idx : Integer_32 := -2;
492
493 function RLE_Byte return Byte with Inline is
494 b : Byte;
495 begin
496 if next_rle_idx not in tt'Range then
497 raise data_error with "BZip2: invalid index for data output";
498 end if;
499 b := Byte (tt (next_rle_idx) and 16#FF#);
500 next_rle_idx := Natural_32 (Shift_Right (tt (next_rle_idx), 8));
501 decode_available := decode_available - 1;
502 return b;
503 end RLE_Byte;
504
505 rle_len : Natural := 0;
506 data, old_data : Byte := 0;
507
508 procedure Flush_Run with Inline is
509 begin
510 for i in 1 .. rle_len loop
511 Write_Byte (old_data);
512 CRC.Update (computed_block_crc, old_data);
513 end loop;
514 end Flush_Run;
515
516 begin
517 decode_available := tt_count;
518 next_rle_idx := Natural_32 (Shift_Right (tt (block_origin), 8));
519
520 while decode_available > 0 loop
521 -- On first iteration, because rle_len = 0, the run won't be
522 -- flushed, then old_data being undefined is not an issue.
523 data := RLE_Byte;
524 if rle_len > 0 and then data /= old_data then
525 -- Run break.
526 Flush_Run;
527 rle_len := 1; -- New run with 1 element, data.
528 else
529 -- Length 0 (no old data), or old = new.
530 rle_len := rle_len + 1;
531 if rle_len = 4 then
532 if decode_available > 0 then
533 rle_len := rle_len + Natural (RLE_Byte);
534 end if;
535 Flush_Run; -- Force a run break.
536 rle_len := 0; -- Run is empty at this point.
537 end if;
538 end if;
539 old_data := data;
540 end loop;
541 Flush_Run;
542 end RLE_1;
543
544 -- Decode a new compressed block.
545 function Decode_Block return Boolean is
546 magic : String (1 .. 6);
547 stored_crc : Unsigned_32;
548 dummy : Boolean;
549 begin
550 for i in 1 .. 6 loop
551 magic (i) := Character'Val (Get_Byte);
552 end loop;
553 if magic = block_header_magic then
554 block_counter := block_counter + 1;
555 if check_crc then
556 CRC.Init (computed_block_crc);
557 end if;
558 stored_crc := Get_Cardinal_32;
559 if trace then
560 Ada.Text_IO.Put_Line ("Block CRC (stored): " & stored_crc'Image);
561 end if;
562 dummy := Get_Boolean; -- Randomized flag.
563 block_origin := Natural_32 (Get_Cardinal_24);
564 Receive_Mapping_Table;
565 alphabet_size_overall := inuse_count + 2;
566 Receive_Selectors;
567 Receive_Huffman_Bit_Lengths;
568 Make_Huffman_Tables;
569 -- Move-to-Front:
570 Receive_MTF_Values;
571 -- Undo the Burrows Wheeler Transformation.
572 BWT_Detransform;
573 --
574 RLE_1;
575 --
576 if trace then
577 Ada.Text_IO.Put_Line ("Block CRC (computed): " & computed_block_crc'Image);
578 end if;
579 if check_crc then
580 computed_block_crc := CRC.Final (computed_block_crc);
581 if computed_block_crc /= stored_crc then
582 raise block_crc_check_failed
583 with
584 "BZip2: mismatch in block" & block_counter'Image &
585 "'s CRC: computed =" & computed_block_crc'Image &
586 ", stored =" & stored_crc'Image;
587 end if;
588 computed_combined_crc := Rotate_Left (computed_combined_crc, 1) xor computed_block_crc;
589 if trace then
590 Ada.Text_IO.Put_Line ("Combined CRC (computed): " & computed_combined_crc'Image);
591 end if;
592 end if;
593 return True;
594 elsif magic = stream_footer_magic then
595 stored_crc := Get_Cardinal_32;
596 if check_crc and then stored_crc /= computed_combined_crc then
597 raise block_crc_check_failed
598 with
599 "BZip2: mismatch in combined blocks' CRC: computed =" &
600 computed_combined_crc'Image & "; stored =" & stored_crc'Image;
601 end if;
602 if trace then
603 Ada.Text_IO.Put_Line ("Combined CRC (stored): " & stored_crc'Image);
604 end if;
605 return False;
606 else
607 raise bad_block_magic with "BZip2: expecting block magic or stream footer";
608 end if;
609 end Decode_Block;
610
611 procedure Init_Stream_Decompression is
612 magic : String (1 .. 3);
613 b : Byte;
614 begin
615 -- Read the magic.
616 for i in magic'Range loop
617 b := Read_Byte;
618 magic (i) := Character'Val (b);
619 end loop;
620 if magic /= "BZh" then
621 raise bad_header_magic;
622 end if;
623 -- Read the block size and allocate the working array.
624 b := Read_Byte;
625 if b not in Character'Pos ('1') .. Character'Pos ('9') then
626 raise data_error with "Received bad BZip2 block size, should be in '1' .. '9'";
627 end if;
628 block_size := Natural_32 (b) - Character'Pos ('0');
629 tt := new U32_Array (0 .. block_size * sub_block_size);
630 computed_combined_crc := 0;
631 end Init_Stream_Decompression;
632
633 begin
634 Init_Stream_Decompression;
635 loop
636 exit when not Decode_Block;
637 end loop;
638 Dispose (tt);
639 end Decompress;
640
641 end BZip2.Decoding;
Web view of Ada source code generated by GNATHTML, project: ALI_Parse version 1.0.
Zip-Ada: Ada library for zip archive files (.zip).
Ada programming.
Some news about Zip-Ada and other Ada projects
on Gautier's blog.