Back to... Zip-Ada

Source file : lzma.ads



   1  --  LZMA library
   2  ----------------
   3  --  Library for encoding and decoding data streams in the LZMA compression
   4  --  format invented by Igor Pavlov.
   5  --
   6  --  Pure Ada 95+ code, 100% portable: OS-, CPU- and compiler- independent.
   7  
   8  --  Legal licensing note:
   9  
  10  --  Copyright (c) 2016 .. 2019 Gautier de Montmollin
  11  --  SWITZERLAND
  12  
  13  --  Permission is hereby granted, free of charge, to any person obtaining a copy
  14  --  of this software and associated documentation files (the "Software"), to deal
  15  --  in the Software without restriction, including without limitation the rights
  16  --  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  17  --  copies of the Software, and to permit persons to whom the Software is
  18  --  furnished to do so, subject to the following conditions:
  19  
  20  --  The above copyright notice and this permission notice shall be included in
  21  --  all copies or substantial portions of the Software.
  22  
  23  --  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24  --  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25  --  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  26  --  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  27  --  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  28  --  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  29  --  THE SOFTWARE.
  30  
  31  --  NB: this is the MIT License, as found 21-Aug-2016 on the site
  32  --  http://www.opensource.org/licenses/mit-license.php
  33  
  34  with Ada.Direct_IO;  --  Only used for the type Data_Bytes_Count below.
  35  with Interfaces;
  36  with System;
  37  
  38  package LZMA is
  39  
  40    --  The compression and decompression procedures are located
  41    --  in child packages LZMA.Encoding and LZMA.Decoding respectively.
  42  
  43    --  Bits of last byte being used as context.
  44    --    With the value 8, LZMA uses a complete Markov chain for predicting
  45    --    a literal from the previous one, like PKZip's Reduce format.
  46    subtype Literal_Context_Bits_Range  is Integer range 0 .. 8;
  47  
  48    --  Position mod 2**bits is used, but for literal context only.
  49    subtype Literal_Position_Bits_Range is Integer range 0 .. 4;
  50  
  51    --  Position mod 2**bits is used in various places.
  52    subtype Position_Bits_Range         is Integer range 0 .. 4;
  53  
  54    Default_dictionary_size : constant := 2 ** 15;  --  32 KiB, like Deflate.
  55  
  56    subtype Byte is Interfaces.Unsigned_8;
  57  
  58    --  Ada.Direct_IO is only there for the Data_Bytes_Count type.
  59    --  In case you want to avoid reference to Ada.Direct_IO,
  60    --  you can customize the definition of Data_Bytes_Count, provided
  61    --  it has enough capacity for counting bytes in the streams involved.
  62    package BIO is new Ada.Direct_IO (Byte);
  63    subtype Data_Bytes_Count is BIO.Count;
  64  
  65  private
  66  
  67    use Interfaces;
  68  
  69    --  These integer types are defined in the LZMA specification
  70    --  (DRAFT version, 2015-06-14, by Igor Pavlov)
  71  
  72    type Unsigned is mod 2 ** System.Word_Size;
  73    subtype UInt64 is Unsigned_64;
  74    subtype UInt32 is Unsigned_32;
  75    subtype UInt16 is Unsigned_16;
  76  
  77    ----------------------------
  78    --  Finite state machine  --
  79    ----------------------------
  80  
  81    states_count : constant := 12;  --  LZMA specification name: "kNumStates"
  82    subtype State_range is Unsigned range 0 .. states_count - 1;
  83    type Transition is array (State_range) of State_range;
  84  
  85    ------------------------------------ From ...  0  1  2  3  4  5  6   7   8   9  10  11
  86    Update_State_Literal  : constant Transition := (0, 0, 0, 0, 1, 2, 3,  4,  5,  6,  4,  5);
  87    Update_State_Match    : constant Transition := (7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10);
  88    Update_State_Rep      : constant Transition := (8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11);
  89    Update_State_ShortRep : constant Transition := (9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11);
  90  
  91    --  Context for improving compression of aligned data,
  92    --  modulo 2**n = 2, 4, 8 or 16 (max) bytes, or disabled: n = 0.
  93    max_pos_bits : constant := 4;  --  LZMA specification name: "kNumPosBitsMax"
  94    max_pos_states_count : constant := 2**max_pos_bits;
  95    subtype Pos_state_range is Unsigned range 0 .. max_pos_states_count - 1;
  96  
  97    ----------------------------------------
  98    --  Probability model for bit coding  --
  99    ----------------------------------------
 100  
 101    probability_model_bits  : constant := 11;  --  LZMA specification name: "kNumBitModelTotalBits"
 102    probability_model_count : constant := 2 ** probability_model_bits;
 103  
 104    probability_change_bits : constant := 5;   --  LZMA specification name: "kNumMoveBits"
 105  
 106    --  All probabilities are initialized with p=0.5. LZMA specification name: "PROB_INIT_VAL"
 107    initial_probability : constant := probability_model_count / 2;
 108  
 109    --  Type for storing probabilities, must have at least Probability_model_bits bits.
 110    --  LZMA specification recommends UInt16. LzmaEnc.c uses UInt16 or optionally UInt32.
 111    type CProb is new UInt16;
 112  
 113    --  Integer (signed) used as index because there is a -1 (unused) index in Pos_coder_range.
 114    type CProb_array is array (Integer range <>) of CProb;
 115  
 116    align_bits       : constant := 4;  --  LZMA specification name: "kNumAlignBits"
 117    align_table_size : constant := 2 ** align_bits;
 118    align_mask       : constant := align_table_size - 1;
 119  
 120    subtype Bits_3_range is Integer range 0 .. 2**3 - 1;
 121    subtype Bits_6_range is Integer range 0 .. 2**6 - 1;
 122    subtype Bits_8_range is Integer range 0 .. 2**8 - 1;
 123    subtype Bits_NAB_range is Integer range 0 .. 2**align_bits - 1;
 124  
 125    subtype Probs_3_bits is CProb_array (Bits_3_range);
 126    subtype Probs_6_bits is CProb_array (Bits_6_range);
 127    subtype Probs_8_bits is CProb_array (Bits_8_range);
 128    subtype Probs_NAB_bits is CProb_array (Bits_NAB_range);
 129  
 130    --------------------------------------------------
 131    --  Probabilities for the binary decision tree  --
 132    --------------------------------------------------
 133  
 134    type Probs_State is array (State_range) of CProb;
 135    type Probs_State_and_Pos_State is array (State_range, Pos_state_range) of CProb;
 136  
 137    type Probs_for_Switches is record
 138      --  This is the context for the switch between a Literal and a LZ Distance-Length code
 139      match     : Probs_State_and_Pos_State := (others => (others => initial_probability));
 140      --  These are contexts for various repetition modes
 141      rep       : Probs_State := (others => initial_probability);
 142      rep_g0    : Probs_State := (others => initial_probability);
 143      rep_g1    : Probs_State := (others => initial_probability);
 144      rep_g2    : Probs_State := (others => initial_probability);
 145      rep0_long : Probs_State_and_Pos_State := (others => (others => initial_probability));
 146    end record;
 147  
 148    ------------------------------------
 149    --  Probabilities for LZ lengths  --
 150    ------------------------------------
 151  
 152    type Low_Mid_Coder_Probs is array (Pos_state_range) of Probs_3_bits;
 153  
 154    --  Probabilities used for encoding LZ lengths. LZMA specification name: "CLenDecoder"
 155    type Probs_for_LZ_Lengths is record
 156      choice_1   : CProb               := initial_probability;  --  0: low coder; 1: mid or high
 157      choice_2   : CProb               := initial_probability;  --  0: mid; 1: high
 158      low_coder  : Low_Mid_Coder_Probs := (others => (others => initial_probability));
 159      mid_coder  : Low_Mid_Coder_Probs := (others => (others => initial_probability));
 160      high_coder : Probs_8_bits        := (others => initial_probability);
 161    end record;
 162  
 163    --------------------------------------
 164    --  Probabilities for LZ distances  --
 165    --------------------------------------
 166  
 167    len_to_pos_states : constant := 4;
 168    subtype Slot_Coder_Range is Unsigned range 0 .. len_to_pos_states - 1;
 169    type Slot_Coder_Probs is array (Slot_Coder_Range) of Probs_6_bits;
 170    Dist_slot_bits : constant := 6;  --  "kNumPosSlotBits"
 171  
 172    Start_dist_model_index : constant :=  4;  --  "kStartPosModelIndex"
 173    End_dist_model_index   : constant := 14;  --  LZMA specification name: "kEndPosModelIndex"
 174    Num_full_distances  : constant := 2 ** (End_dist_model_index / 2);  --  "kNumFullDistances"
 175  
 176    --  Pos_coder_range: index -1 is never used as such but appears
 177    --  when calling Bit_Tree_Reverse_Encode (as in the original C version, RcTree_ReverseEncode).
 178    subtype Pos_coder_range is Integer range -1 .. Num_full_distances - End_dist_model_index;
 179    subtype Pos_coder_probs is CProb_array (Pos_coder_range);
 180  
 181    type Probs_for_LZ_Distances is record
 182      slot_coder  : Slot_Coder_Probs := (others => (others => initial_probability));
 183      align_coder : Probs_NAB_bits   := (others => initial_probability);
 184      pos_coder   : Pos_coder_probs  := (others => initial_probability);
 185    end record;
 186  
 187    --------------------------------------
 188    --  All probabilities used by LZMA  --
 189    --------------------------------------
 190  
 191    type All_probabilities (last_lit_prob_index : Integer) is record
 192      --  Literals:
 193      lit     : CProb_array (0 .. last_lit_prob_index) := (others => initial_probability);
 194      --  Distances:
 195      dist    : Probs_for_LZ_Distances;
 196      --  Lengths:
 197      len     : Probs_for_LZ_Lengths;
 198      rep_len : Probs_for_LZ_Lengths;
 199      --  Decision tree switches:
 200      switch  : Probs_for_Switches;
 201    end record;
 202  
 203    -------------
 204    --  Misc.  --
 205    -------------
 206  
 207    --  Minimum dictionary (= plain text buffer of n previous bytes)
 208    --  size is 4096. LZMA specification name: "LZMA_DIC_MIN"
 209    Min_dictionary_size : constant := 2 ** 12;
 210  
 211    --  Log2-style encoding of LZ lengths
 212    Len_low_bits     : constant := 3;
 213    Len_low_symbols  : constant := 2 ** Len_low_bits;
 214    Len_mid_bits     : constant := 3;
 215    Len_mid_symbols  : constant := 2 ** Len_mid_bits;
 216    Len_high_bits    : constant := 8;
 217    Len_high_symbols : constant := 2 ** Len_high_bits;
 218    Len_symbols      : constant := Len_low_symbols + Len_mid_symbols + Len_high_symbols;
 219  
 220    Min_match_length : constant := 2;  --  "LZMA_MATCH_LEN_MIN"
 221    Max_match_length : constant := Min_match_length + Len_symbols - 1;  --  "LZMA_MATCH_LEN_MAX"
 222  
 223    subtype Match_length_range is Integer range Min_match_length .. Max_match_length;
 224  
 225    --  Fake distance, used as an end-of-stream marker.
 226    end_of_stream_magic_distance : constant := 16#FFFF_FFFF#;
 227  
 228    --------------------------------------------------
 229    --  Binary values of various decision switches  --
 230    --------------------------------------------------
 231  
 232    --  LZ literal vs. DL code
 233    Literal_choice : constant := 0;
 234    DL_code_choice : constant := 1;
 235  
 236    --  Within DL code: "Simple match" vs. "Rep match"
 237    Simple_match_choice : constant := 0;
 238    Rep_match_choice    : constant := 1;
 239  
 240    --  Within "Rep match": "Distance is rep0" vs. "Distance is not rep0"
 241    The_distance_is_rep0_choice     : constant := 0;
 242    The_distance_is_not_rep0_choice : constant := 1;
 243    --  Within "Distance is rep0":
 244    The_length_is_1_choice     : constant := 0;
 245    The_length_is_not_1_choice : constant := 1;
 246    --  Within "Distance is not rep0": "Distance is rep1" vs. "Distance is not rep1"
 247    The_distance_is_rep1_choice     : constant := 0;
 248    The_distance_is_not_rep1_choice : constant := 1;
 249    --  Within "Distance is not rep1": "Distance is rep2" vs. "Distance is not rep2"
 250    The_distance_is_rep2_choice     : constant := 0;
 251    The_distance_is_not_rep2_choice : constant := 1;
 252  
 253    ----------------------
 254    --  Range encoding  --
 255    ----------------------
 256  
 257    --  Normalization threshold. When the range width is below that value,
 258    --  a shift is needed.
 259    width_threshold : constant := 2**24;  --  LZMA specification name: "kTopValue"
 260  
 261    --  The following article (the only reference in the LZMA specification)
 262    --  explains how range encoding works:
 263    --
 264    --     G. N. N. Martin, Range encoding: an algorithm for removing redundancy
 265    --     from a digitized message, Video & Data Recording Conference,
 266    --     Southampton, UK, July 24-27, 1979.
 267  
 268  end LZMA;

Web view of Ada source code generated by GNATHTML, project: ALI_Parse version 1.0.
Zip-Ada: Ada library for zip archive files (.zip). Ada programming.
Some news about Zip-Ada and other Ada projects on Gautier's blog.