Source file : bzip2.ads
1 -- BZip2
2 ---------
3 --
4 -- Pure Ada 2012+ code, 100% portable: OS-, CPU- and compiler- independent.
5 --
6 -- bzip2 compresses files using the Burrows-Wheeler block-sorting text
7 -- compression algorithm, and Huffman coding. Compression is generally
8 -- considerably better than that achieved by more conventional
9 -- LZ77/LZ78-based compressors, and approaches the performance of the
10 -- PPM family of statistical compressors.
11 --
12 -- bzip2 was created by Julian Seward in 1996.
13 -- Web site: https://sourceware.org/bzip2/
14 --
15 -- Documentation pointers:
16 --
17 -- BZip2 unofficial format specification (2016)
18 -- https://github.com/dsnet/compress/blob/master/doc/bzip2-format.pdf
19 --
20 -- Burrows-Wheeler transform
21 -- https://en.wikipedia.org/wiki/Burrows%E2%80%93Wheeler_transform
22 --
23 -- MTF Move-To-Front
24 -- https://en.wikipedia.org/wiki/Move-to-front_transform
25 --
26 -- Legal licensing note:
27 --
28 -- Copyright (c) 2018 .. 2025 Gautier de Montmollin
29 -- SWITZERLAND
30 --
31 -- Permission is hereby granted, free of charge, to any person obtaining a copy
32 -- of this software and associated documentation files (the "Software"), to deal
33 -- in the Software without restriction, including without limitation the rights
34 -- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
35 -- copies of the Software, and to permit persons to whom the Software is
36 -- furnished to do so, subject to the following conditions:
37 --
38 -- The above copyright notice and this permission notice shall be included in
39 -- all copies or substantial portions of the Software.
40 --
41 -- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
42 -- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
43 -- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
44 -- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
45 -- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
46 -- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
47 -- THE SOFTWARE.
48 --
49 -- NB: this is the MIT License, as found 21-Aug-2016 on the site
50 -- http://www.opensource.org/licenses/mit-license.php
51
52 with Ada.Direct_IO;
53 with Interfaces;
54
55 package BZip2 is
56
57 subtype Byte is Interfaces.Unsigned_8;
58
59 -- Ada.Direct_IO is only there for the Data_Bytes_Count type.
60 -- In case you want to avoid reference to Ada.Direct_IO,
61 -- you can customize the definition of Data_Bytes_Count, provided
62 -- it has enough capacity for counting bytes in the streams involved.
63 package BIO is new Ada.Direct_IO (Byte);
64 subtype Data_Bytes_Count is BIO.Count;
65
66 private
67
68 --------------------------------------------------------------
69 -- Constants used for both compression and decompression. --
70 -- BZ_* names are as found in bzlib_private.h. --
71 --------------------------------------------------------------
72
73 -- The run_a and run_b symbols are used to encode
74 -- the run-lengths in the 2nd RLE phase (the
75 -- encoding of MTF indices).
76
77 run_a : constant := 0; -- BZ_RUNA
78 run_b : constant := 1; -- BZ_RUNB
79
80 -- If all byte values are used, the MTF & RLE_2 encoding
81 -- needs this amount of symbols:
82 -- * 2 for run_a, run_b
83 -- * 255 for non-zero MTF indices
84 -- * 1 for EOB.
85 --
86 max_alphabet_size : constant := 258; -- BZ_MAX_ALPHA_SIZE
87
88 max_code_len_max : constant := 23; -- BZ_MAX_CODE_LEN
89 max_code_len_bzip2_1_0_2 : constant := 20; -- Actual maximum in bzip2 1.0.2
90 max_code_len_bzip2_1_0_3 : constant := 17; -- Actual maximum in bzip2 1.0.3+
91 --
92 -- ^ See comments in huffman.c and the hardcoded limit in decompress.c.
93 -- Longer codes will trigger a BZ_DATA_ERROR in the latter.
94 -- NB: the related C-compiled bzip2 executable shows:
95 -- "data integrity (CRC) error in data"
96 -- for all kinds of data errors, most of them unrelated to CRC checks!
97
98 -- Each "group" of data comprises 50 symbols and can use one of up
99 -- to 6 different entropy coders (for BZip2: Huffman tables).
100 -- NB: the use of the word "group" stems from the original bzip2 code and
101 -- has nothing to do with algebraic groups.
102
103 min_entropy_coders : constant := 2; -- Magic number found in a check in decompress.c ...
104 max_entropy_coders : constant := 6; -- BZ_N_GROUPS (this name is very confusing!)
105 group_size : constant := 50; -- BZ_G_SIZE
106
107 -- Constants used to calibrate the main memory pool.
108
109 max_block_size : constant := 9;
110 sub_block_size : constant := 100_000;
111 max_selectors : constant := 2 + ((max_block_size * sub_block_size) / group_size); -- BZ_MAX_SELECTORS
112
113 subtype Natural_32 is Interfaces.Integer_32 range 0 .. Interfaces.Integer_32'Last;
114 subtype Positive_32 is Interfaces.Integer_32 range 1 .. Interfaces.Integer_32'Last;
115
116 stream_header_magic : constant String := "BZh0";
117
118 stream_footer_magic : constant String :=
119 Character'Val (16#17#) & "rE8P" & Character'Val (16#90#);
120 -- ^ sqrt (pi) "decimal" digits, visible in hexadecimal representation!
121
122 block_header_magic : constant String := "1AY&SY";
123 -- ^ pi "decimal" digits, visible in hexadecimal representation!
124
125 ---------------------------------------------------------------------------
126 -- Cyclic redundancy check to verify uncompressed block data integrity --
127 ---------------------------------------------------------------------------
128
129 package CRC is
130 use Interfaces;
131 --
132 procedure Init (CRC_Value : out Unsigned_32);
133 function Final (CRC_Value : Unsigned_32) return Unsigned_32;
134 procedure Update (CRC_Value : in out Unsigned_32; val : Byte);
135 pragma Inline (Update);
136 end CRC;
137
138 end BZip2;
Web view of Ada source code generated by GNATHTML, project: ALI_Parse version 1.0.
Zip-Ada: Ada library for zip archive files (.zip).
Ada programming.
Some news about Zip-Ada and other Ada projects
on Gautier's blog.