Back to... Zip-Ada

Source file : bzip2.ads



   1  --  BZip2
   2  ---------
   3  --
   4  --  Pure Ada 2012+ code, 100% portable: OS-, CPU- and compiler- independent.
   5  --
   6  --  bzip2 compresses files using the Burrows-Wheeler block-sorting text
   7  --  compression algorithm, and Huffman coding. Compression is generally
   8  --  considerably better than that achieved by more conventional
   9  --  LZ77/LZ78-based compressors, and approaches the performance of the
  10  --  PPM family of statistical compressors.
  11  --
  12  --  bzip2 was created by Julian Seward in 1996.
  13  --  Web site: https://sourceware.org/bzip2/
  14  --
  15  --  Documentation pointers:
  16  --
  17  --    BZip2 unofficial format specification (2016)
  18  --      https://github.com/dsnet/compress/blob/master/doc/bzip2-format.pdf
  19  --
  20  --    Burrows-Wheeler transform
  21  --      https://en.wikipedia.org/wiki/Burrows%E2%80%93Wheeler_transform
  22  --
  23  --    MTF Move-To-Front
  24  --      https://en.wikipedia.org/wiki/Move-to-front_transform
  25  --
  26  --  Legal licensing note:
  27  --
  28  --  Copyright (c) 2018 .. 2025 Gautier de Montmollin
  29  --  SWITZERLAND
  30  --
  31  --  Permission is hereby granted, free of charge, to any person obtaining a copy
  32  --  of this software and associated documentation files (the "Software"), to deal
  33  --  in the Software without restriction, including without limitation the rights
  34  --  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  35  --  copies of the Software, and to permit persons to whom the Software is
  36  --  furnished to do so, subject to the following conditions:
  37  --
  38  --  The above copyright notice and this permission notice shall be included in
  39  --  all copies or substantial portions of the Software.
  40  --
  41  --  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  42  --  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  43  --  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  44  --  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  45  --  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  46  --  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  47  --  THE SOFTWARE.
  48  --
  49  --  NB: this is the MIT License, as found 21-Aug-2016 on the site
  50  --  http://www.opensource.org/licenses/mit-license.php
  51  
  52  with Ada.Direct_IO;
  53  with Interfaces;
  54  
  55  package BZip2 is
  56  
  57    subtype Byte is Interfaces.Unsigned_8;
  58  
  59    --  Ada.Direct_IO is only there for the Data_Bytes_Count type.
  60    --  In case you want to avoid reference to Ada.Direct_IO,
  61    --  you can customize the definition of Data_Bytes_Count, provided
  62    --  it has enough capacity for counting bytes in the streams involved.
  63    package BIO is new Ada.Direct_IO (Byte);
  64    subtype Data_Bytes_Count is BIO.Count;
  65  
  66  private
  67  
  68    --------------------------------------------------------------
  69    --  Constants used for both compression and decompression.  --
  70    --  BZ_* names are as found in bzlib_private.h.             --
  71    --------------------------------------------------------------
  72  
  73    --  The run_a and run_b symbols are used to encode
  74    --  the run-lengths in the 2nd RLE phase (the
  75    --  encoding of MTF indices).
  76  
  77    run_a : constant := 0;  --  BZ_RUNA
  78    run_b : constant := 1;  --  BZ_RUNB
  79  
  80    --  If all byte values are used, the MTF & RLE_2 encoding
  81    --  needs this amount of symbols:
  82    --     *   2  for run_a, run_b
  83    --     * 255  for non-zero MTF indices
  84    --     *   1  for EOB.
  85    --
  86    max_alphabet_size        : constant := 258;  --  BZ_MAX_ALPHA_SIZE
  87  
  88    max_code_len_max         : constant := 23;   --  BZ_MAX_CODE_LEN
  89    max_code_len_bzip2_1_0_2 : constant := 20;   --  Actual maximum in bzip2 1.0.2
  90    max_code_len_bzip2_1_0_3 : constant := 17;   --  Actual maximum in bzip2 1.0.3+
  91    --
  92    --  ^ See comments in huffman.c and the hardcoded limit in decompress.c.
  93    --    Longer codes will trigger a BZ_DATA_ERROR in the latter.
  94    --    NB: the related C-compiled bzip2 executable shows:
  95    --         "data integrity (CRC) error in data"
  96    --    for all kinds of data errors, most of them unrelated to CRC checks!
  97  
  98    --  Each "group" of data comprises 50 symbols and can use one of up
  99    --  to 6 different entropy coders (for BZip2: Huffman tables).
 100    --  NB: the use of the word "group" stems from the original bzip2 code and
 101    --  has nothing to do with algebraic groups.
 102  
 103    min_entropy_coders : constant := 2;    --  Magic number found in a check in decompress.c ...
 104    max_entropy_coders : constant := 6;    --  BZ_N_GROUPS (this name is very confusing!)
 105    group_size         : constant := 50;   --  BZ_G_SIZE
 106  
 107    --  Constants used to calibrate the main memory pool.
 108  
 109    max_block_size : constant := 9;
 110    sub_block_size : constant := 100_000;
 111    max_selectors  : constant := 2 + ((max_block_size * sub_block_size) / group_size);  --  BZ_MAX_SELECTORS
 112  
 113    subtype Natural_32  is Interfaces.Integer_32 range 0 .. Interfaces.Integer_32'Last;
 114    subtype Positive_32 is Interfaces.Integer_32 range 1 .. Interfaces.Integer_32'Last;
 115  
 116    stream_header_magic : constant String := "BZh0";
 117  
 118    stream_footer_magic  : constant String :=
 119      Character'Val (16#17#) & "rE8P" & Character'Val (16#90#);
 120    --  ^ sqrt (pi) "decimal" digits, visible in hexadecimal representation!
 121  
 122    block_header_magic : constant String := "1AY&SY";
 123    --  ^ pi "decimal" digits, visible in hexadecimal representation!
 124  
 125    ---------------------------------------------------------------------------
 126    --  Cyclic redundancy check to verify uncompressed block data integrity  --
 127    ---------------------------------------------------------------------------
 128  
 129    package CRC is
 130      use Interfaces;
 131      --
 132      procedure Init (CRC_Value : out Unsigned_32);
 133      function  Final (CRC_Value : Unsigned_32) return Unsigned_32;
 134      procedure Update (CRC_Value : in out Unsigned_32; val : Byte);
 135        pragma Inline (Update);
 136    end CRC;
 137  
 138  end BZip2;

Web view of Ada source code generated by GNATHTML, project: ALI_Parse version 1.0.
Zip-Ada: Ada library for zip archive files (.zip). Ada programming.
Some news about Zip-Ada and other Ada projects on Gautier's blog.