Back to... Zip-Ada

Source file : lzma-decoding.adb


-- LZMA.Decoding - Ada 95 translation of LzmaSpec.cpp, LZMA Reference Decoder 9.31
-- LzmaSpec.cpp : 2013-07-28 : Igor Pavlov : Public domain
--
--  Rework in 2016 by G. de Montmollin.
--    - some confusing identifiers were changed:
--        mostly, "range" was renamed "width", various names for probability data
--        have been renamed "probs", different things called "pos" have been renamed
--    - the whole probability model has been encapsulated
--    - parts common to encoding were moved to the root LZMA package.

with Ada.Unchecked_Deallocation;
with Ada.Exceptions;                    use Ada.Exceptions;

package body LZMA.Decoding is

  type Byte_buffer is array(UInt32 range <>) of Byte;
  type p_Byte_buffer is access Byte_buffer;

  type Out_Window is record
    buf       : p_Byte_buffer := null;
    pos       : UInt32        := 0;
    size      : UInt32;
    is_full   : Boolean       := False;
    total_pos : Unsigned      := 0;
  end record;

  procedure Create(o: in out Out_Window; dictionary_size: UInt32) is
  begin
    o.buf  := new Byte_buffer(0..dictionary_size-1);
    o.size := dictionary_size;
  end Create;

  type Range_Decoder is record
    width     : UInt32  := 16#FFFF_FFFF#;  --  (*)
    code      : UInt32  := 0;
    corrupted : Boolean := False;
  end record;
  --  (*) called "range" in LZMA spec and "remaining width" in G.N.N. Martin's
  --      article about range encoding.

  procedure Init(o: in out Range_Decoder) is
  begin
    if Read_Byte /= 0 then
      o.corrupted := True;
    end if;
    for i in 0..3 loop
      o.code := Shift_Left(o.code, 8) or UInt32(Read_Byte);
    end loop;
    if o.code = o.width then
      o.corrupted := True;
    end if;
  end Init;

  procedure Decode_Properties(o: in out LZMA_Decoder_Info; b: Byte_buffer) is
    d: Unsigned := Unsigned(b(b'First));
  begin
    if d >= 9 * 5 * 5 then
      Raise_Exception(LZMA_Error'Identity, "Incorrect LZMA properties");
      -- raise LZMA_Error with "Incorrect LZMA properties"; -- Ada 2005+
    end if;
    o.lc := Literal_context_bits_range(d mod 9);
    d := d / 9;
    o.lp := Literal_position_bits_range(d mod 5);
    o.pb := Position_bits_range(d / 5);
    o.dictSizeInProperties := 0;
    for i in 0..3 loop
      o.dictSizeInProperties := o.dictSizeInProperties +
        UInt32(b(UInt32(i) + 1 + b'First)) * 2 ** (8 * i);
    end loop;
    o.dictionary_size := o.dictSizeInProperties;
    if o.dictionary_size < Min_dictionary_size then
      o.dictionary_size := Min_dictionary_size;
    end if;
  end Decode_Properties;

  procedure Decode_Contents(o: in out LZMA_Decoder_Info; res: out LZMA_Result) is
    state : State_range := 0;
    --  Small stack of recent distances used for LZ. Required: initialized with zero values.
    rep0, rep1, rep2, rep3 : UInt32 := 0;
    pos_state: Pos_state_range;
    --  Local copies of invariant properties.
    unpack_size_def: constant Boolean:= o.unpackSizeDefined;
    literal_pos_mask: constant UInt32:= 2 ** o.lp - 1;
    lc: constant Literal_context_bits_range:= o.lc;
    --
    use type BIO.Count;
    Marker_exit: exception;
    out_win : Out_Window;
    --  Local range decoder
    range_dec: Range_Decoder;
    --  Entire probability model. Max lit prob index: 3,145,727.
    probs: All_probabilities(last_lit_prob_index => 16#300# * 2 ** (o.lc + o.lp) - 1);

    --  Normalize corresponds to G.N.N. Martin's revised algorithm's adding of
    --  trailing digits - for encoding. Here we decode and know the encoded
    --  data, brought by Read_Byte.
    procedure Normalize is
    pragma Inline(Normalize);
    begin
      --  Assertion: the width is large enough for the normalization to be needed
      --  once per bit decoding. Worst case: width = 2**24 before; bound = (2**13) * (2**5-1)
      --  new width's (leading binary digit) = 2**17; after normalization: 2**(17+8) = 2**25.
      if range_dec.width < width_threshold then
        range_dec.width := Shift_Left(range_dec.width, 8);
        range_dec.code  := Shift_Left(range_dec.code, 8) or UInt32(Read_Byte);
      end if;
    end Normalize;

    procedure Decode_Bit(prob: in out CProb; symbol: out Unsigned) is
    pragma Inline(Decode_Bit);
      cur_prob: constant CProb:= prob; -- Local copy
      bound: constant UInt32:= Shift_Right(range_dec.width, Probability_model_bits) * UInt32(cur_prob);
      --  See encoder for explanations about the maths.
    begin
      if range_dec.code < bound then
        prob:= cur_prob + Shift_Right(Probability_model_count - cur_prob, Probability_change_bits);
        range_dec.width := bound;
        Normalize;
        symbol := 0;
      else
        prob:= cur_prob - Shift_Right(cur_prob, Probability_change_bits);
        range_dec.code  := range_dec.code - bound;
        range_dec.width := range_dec.width - bound;
        Normalize;
        symbol := 1;
      end if;
    end Decode_Bit;

    function Is_Empty return Boolean is
    pragma Inline(Is_Empty);
    begin
      return out_win.pos = 0 and then not out_win.is_full;
    end Is_Empty;

    procedure Put_Byte(b: Byte) is
    pragma Inline(Put_Byte);
    begin
      out_win.total_pos := out_win.total_pos + 1;
      out_win.buf(out_win.pos):= b;
      out_win.pos := out_win.pos + 1;
      if out_win.pos = out_win.size then
        out_win.pos := 0;
        out_win.is_full := True;
      end if;
      Write_Byte(b);
    end Put_Byte;

    function Get_Byte(dist: UInt32) return Byte is
    pragma Inline(Get_Byte);
    begin
      if dist <= out_win.pos then
        return out_win.buf(out_win.pos - dist);
      else
        return out_win.buf(out_win.pos - dist + out_win.size);
      end if;
    end Get_Byte;

    procedure Process_Literal is
    pragma Inline(Process_Literal);
      prev_byte    : Byte:= 0;
      symbol       : Unsigned:= 1;
      lit_state    : Integer;
      probs_idx    : Integer;
      bit          : Unsigned;
    begin
      if o.unpackSize = 0 and then unpack_size_def then
        Raise_Exception(
          LZMA_Error'Identity,
          "Decoded data will exceed expected data size (Process_Literal)"
        );
      end if;
      --
      if not Is_Empty then
        prev_byte := Get_Byte(dist => 1);
      end if;
      lit_state :=
        Integer(
          Shift_Left(UInt32(out_win.total_pos) and literal_pos_mask, lc) +
          Shift_Right(UInt32(prev_byte), 8 - lc)
        );
      probs_idx:= 16#300# * lit_state;
      if state < 7 then
        loop
          Decode_Bit(probs.lit(probs_idx + Integer(symbol)), bit);
          symbol := (symbol + symbol) or bit;
          exit when symbol >= 16#100#;
        end loop;
      else
        declare
          --
          --  The probabilities used for decoding this literal assume
          --  that the current literal sequence resembles to the last
          --  distance-length copied sequence.
          --
          match_byte     : UInt32 := UInt32(Get_Byte(dist => rep0 + 1));
          match_bit      : UInt32;    -- either 0 or 16#100#
          prob_idx_match : Integer;   -- either 0 (normal case without match), 16#100# or 16#200#
          bit, bit_b     : Unsigned;
        begin
          loop
            match_byte := match_byte + match_byte;
            match_bit  := match_byte and 16#100#;
            prob_idx_match:= Integer(16#100# + match_bit);
            Decode_Bit(probs.lit(probs_idx + prob_idx_match + Integer(symbol)), bit);
            symbol := (symbol + symbol) or bit;
            exit when symbol >= 16#100#;
            if match_bit /= Shift_Left(UInt32(bit), 8) then
              -- No bit match, then give up byte match
              loop
                Decode_Bit(probs.lit(probs_idx + Integer(symbol)), bit_b);
                symbol := (symbol + symbol) or bit_b;
                exit when symbol >= 16#100#;
              end loop;
              exit;
            end if;
          end loop;
        end;
      end if;
      Put_Byte(Byte(symbol - 16#100#)); -- The output of a simple literal happens here.
      --
      state := Update_State_Literal(state);
      o.unpackSize:= o.unpackSize - 1;
    end Process_Literal;

    dict_size : constant UInt32:= o.dictionary_size;

    function Is_Finished_OK return Boolean is
    pragma Inline(Is_Finished_OK);
    begin
      return range_dec.code = 0;
    end Is_Finished_OK;

    procedure Process_Distance_and_Length is
    pragma Inline(Process_Distance_and_Length);
      --
      procedure Bit_Tree_Decode(
        prob     : in out CProb_array;
        num_bits :        Positive;
        m        :    out Unsigned)
      is
      pragma Inline(Bit_Tree_Decode);
        bit: Unsigned;
      begin
        m:= 1;
        for count in reverse 1..num_bits loop
          Decode_Bit(prob(Integer(m) + prob'First), bit);
          m:= m + m + bit;
        end loop;
        m:= m - 2**num_bits;
      end Bit_Tree_Decode;
      --
      len: Unsigned:= 0;
      --
      procedure Copy_Match(dist: UInt32) is
      pragma Inline(Copy_Match);
        b2, b3: Byte;
        len32: constant UInt32:= UInt32(len);
        will_fill: constant Boolean:= out_win.pos + len32 >= out_win.size;
        --
        procedure Easy_case is
        pragma Inline(Easy_case);
          src_from, src_to: UInt32;
          b: Byte;
        begin
          -- src and dest within circular buffer bounds. May overlap (len32 > dist).
          src_from := out_win.pos - dist;
          src_to   := out_win.pos - dist + len32 - 1;
          if len32 <= dist then -- No overlap: src_to < out_win.pos
            out_win.buf(out_win.pos .. out_win.pos + len32 - 1):= out_win.buf(src_from .. src_to);
            for i in src_from .. src_to loop
              Write_Byte(out_win.buf(i));
            end loop;
          else -- Overlap: to >= out_win.pos . Need to copy in forward order.
            for i in src_from .. src_to loop
              b:= out_win.buf(i);
              out_win.buf(i + dist):= b;
              Write_Byte(b);
            end loop;
          end if;
          out_win.pos := out_win.pos + len32;
        end Easy_case;
        --
        procedure Modulo_case is
        pragma Inline(Modulo_case);
        begin
          -- src starts below 0 or dest goes beyond size-1
          for count in reverse 1..len loop
            if dist <= out_win.pos then
              b2:= out_win.buf(out_win.pos - dist);
              out_win.buf(out_win.pos):= b2;
              out_win.pos := out_win.pos + 1;
              if out_win.pos = out_win.size then
                out_win.pos := 0;
              end if;
              Write_Byte(b2);
            else
              b3:= out_win.buf(out_win.size - dist + out_win.pos);
              out_win.buf(out_win.pos):= b3;
              out_win.pos := out_win.pos + 1;
              if out_win.pos = out_win.size then
                out_win.pos := 0;
              end if;
              Write_Byte(b3);
            end if;
          end loop;
        end Modulo_case;
      begin
        out_win.is_full := will_fill or else out_win.is_full;
        out_win.total_pos := out_win.total_pos + len;
        if dist <= out_win.pos and not will_fill then
          Easy_case;
        else
          Modulo_case;
        end if;
      end Copy_Match;
      --
      procedure Decode_Distance(dist: out UInt32) is
      pragma Inline(Decode_Distance);
        --
        decode_direct: UInt32;
        --
        procedure Decode_Direct_Bits(num_bits : Natural) is
        pragma Inline(Decode_Direct_Bits);
          t: UInt32;
        begin
          decode_direct := 0;
          for count in reverse 1..num_bits loop
            range_dec.width := Shift_Right(range_dec.width, 1);
            range_dec.code := range_dec.code - range_dec.width;
            t := - Shift_Right(range_dec.code, 31);
            range_dec.code := range_dec.code + (range_dec.width and t);
            if range_dec.code = range_dec.width then
              range_dec.corrupted := True;
            end if;
            Normalize;
            decode_direct := decode_direct + decode_direct + t + 1;
          end loop;
        end Decode_Direct_Bits;
        --
        procedure Bit_Tree_Reverse_Decode(prob: in out CProb_array; num_bits: in Natural) is
        pragma Inline(Bit_Tree_Reverse_Decode);
          m: Unsigned := 1;
          bit: Unsigned;
        begin
          for i in 0..num_bits-1 loop
            Decode_Bit(prob(Integer(m) + prob'First), bit);
            m := m + m + bit;
            dist := dist or Shift_Left(UInt32(bit), i);
          end loop;
        end Bit_Tree_Reverse_Decode;
        --
        --  len has been set up previously by Decode_Length.
        len_state     : constant Unsigned := Unsigned'Min(len, Len_to_pos_states - 1);
        dist_slot     : Unsigned;
        numDirectBits : Natural;
        --
      begin  --  Decode_Distance
        Bit_Tree_Decode(probs.dist.slot_coder(len_state), Dist_slot_bits, dist_slot);
        if dist_slot < Start_dist_model_index then
          dist:= UInt32(dist_slot);
          return;
        end if;
        numDirectBits := Natural(Shift_Right(UInt32(dist_slot), 1) - 1);
        dist := Shift_Left(2 or (UInt32(dist_slot) and 1), numDirectBits);
        if dist_slot < End_dist_model_index then
          Bit_Tree_Reverse_Decode(
            probs.dist.pos_coder(Integer(dist) - Integer(dist_slot) .. Pos_coder_range'Last),
            numDirectBits
          );
        else
          Decode_Direct_Bits(numDirectBits - Align_bits);
          dist:= dist + Shift_Left(decode_direct, Align_bits);
          Bit_Tree_Reverse_Decode(probs.dist.align_coder, Align_bits);
        end if;
      end Decode_Distance;
      --
      procedure Decode_Length(probs_len: in out Probs_for_LZ_Lengths) is
      pragma Inline(Decode_Length);
        choice: Unsigned;
      begin
        Decode_Bit(probs_len.choice_1, choice);
        if choice = 0 then
          Bit_Tree_Decode(probs_len.low_coder(pos_state), Len_low_bits, len);
          --  final length is in 2 + [0..7]
          return;
        end if;
        Decode_Bit(probs_len.choice_2, choice);
        if choice = 0 then
          Bit_Tree_Decode(probs_len.mid_coder(pos_state), Len_mid_bits, len);
          len:= len + Len_low_symbols;
          --  final length is in 2 + [8..15]
          return;
        end if;
        Bit_Tree_Decode(probs_len.high_coder, Len_high_bits, len);
        len:= len + Len_low_symbols + Len_mid_symbols;
        --  final length is in 2 + [16..271]
      end Decode_Length;
      --
      function Check_Distance return Boolean is
      pragma Inline(Check_Distance);
      begin
        return rep0 <= out_win.pos or out_win.is_full;
      end Check_Distance;
      --
      isError: Boolean;
      dist: UInt32;
      bit_a, bit_b, bit_c, bit_d, bit_e: Unsigned;
      --
    begin -- Process_Distance_and_Length
      Decode_Bit(probs.switch.rep(state), bit_a);
      if bit_a = Simple_match_choice then
        --  "Simple Match"
        rep3 := rep2;
        rep2 := rep1;
        rep1 := rep0;
        Decode_Length(probs.len);
        state := Update_State_Match(state);
        Decode_Distance(dist => rep0);
        if rep0 = 16#FFFF_FFFF# then
          if Is_Finished_OK then
            raise Marker_exit;
          else
            Raise_Exception(
              LZMA_Error'Identity,
              "Range decoder not finished on EOS marker (in Process_Distance_and_Length)"
            );
          end if;
        end if;
        if (o.unpackSize = 0 and then unpack_size_def) or
            rep0 >= dict_size or not Check_Distance
        then
          Raise_Exception(
            LZMA_Error'Identity,
            "Decoded data will exceed expected data size (in Process_Distance_and_Length, #2)." &
            "; Distance =" & UInt32'Image(rep0) &
            "; Dictionary size =" & UInt32'Image(dict_size) &
            "; Position =" & UInt32'Image(out_win.pos) &
            "; Is window full ? " & Boolean'Image(out_win.is_full)
          );
        end if;
      else
        --  "Rep Match"
        if o.unpackSize = 0 and then unpack_size_def then
          Raise_Exception(
            LZMA_Error'Identity,
            "Decoded data will exceed expected data size (in Process_Distance_and_Length, #1)"
          );
        end if;
        if Is_Empty then
          Raise_Exception(
            LZMA_Error'Identity,
            "Output window buffer is empty (in Process_Distance_and_Length)"
          );
        end if;
        Decode_Bit(probs.switch.rep_g0(state), bit_b);
        if bit_b = The_distance_is_rep0_choice then
          Decode_Bit(probs.switch.rep0_long(state, pos_state), bit_c);
          if bit_c = The_length_is_1_choice then
            state := Update_State_ShortRep(state);
            Put_Byte(Get_Byte(dist => rep0 + 1));
            o.unpackSize:= o.unpackSize - 1;
            return;  -- GdM: this way, we go to the next iteration (C++: continue)
          end if;
        else
          Decode_Bit(probs.switch.rep_g1(state), bit_d);
          if bit_d = The_distance_is_rep1_choice then
            dist := rep1;
          else
            Decode_Bit(probs.switch.rep_g2(state), bit_e);
            if bit_e = The_distance_is_rep2_choice then
              dist := rep2;
            else
              dist := rep3;
              rep3 := rep2;
            end if;
            rep2 := rep1;
          end if;
          rep1 := rep0;
          rep0 := dist;
        end if;
        Decode_Length(probs.rep_len);
        state := Update_State_Rep(state);
      end if;
      len := len + Min_match_length;
      isError := False;
      if o.unpackSize < Data_Bytes_Count(len) and then unpack_size_def then
        len := Unsigned(o.unpackSize);
        isError := True;
      end if;
      -- The LZ distance/length copy happens here.
      Copy_Match(dist => rep0 + 1);
      o.unpackSize:= o.unpackSize - Data_Bytes_Count(len);
      if isError then
        Raise_Exception(
          LZMA_Error'Identity,
          "Decoded data will exceed expected data size (in Process_Distance_and_Length, #3)"
        );
      end if;
    end Process_Distance_and_Length;

    bit_choice: Unsigned;
    pos_bits_mask : constant UInt32 := 2 ** o.pb - 1;
    size_defined_and_marker_not_mandatory: constant Boolean:=
      unpack_size_def and not o.markerIsMandatory;

    procedure Finalize is
      procedure Dispose is new Ada.Unchecked_Deallocation(Byte_buffer, p_Byte_buffer);
    begin
      Dispose(out_win.buf);
      o.range_dec_corrupted:= range_dec.corrupted;
    end Finalize;

  begin
    Create(out_win, o.dictionary_size);
    Init(range_dec);
    loop
      if o.unpackSize = 0
        and then Is_Finished_OK
        and then size_defined_and_marker_not_mandatory
      then
        res:= LZMA_finished_without_marker;
        Finalize;
        return;
      end if;
      pos_state := Pos_state_range(UInt32(out_win.total_pos) and pos_bits_mask);
      Decode_Bit(probs.switch.match(state, pos_state), bit_choice);
      -- LZ decoding happens here: either we have a new literal in 1 byte, or we copy past data.
      if bit_choice = Literal_choice then
        Process_Literal;
      else
        Process_Distance_and_Length;
      end if;
    end loop;
  exception
    when Marker_exit =>
      res:= LZMA_finished_with_marker;
      Finalize;
  end Decode_Contents;

  procedure Decode_Header(o: in out LZMA_Decoder_Info; hints: LZMA_Hints) is
    header: Byte_buffer(0..12);
    b: Byte;
    use type BIO.Count;
    last_bit: Natural;
  begin
    o.unpackSize := 0;
    o.unpackSizeDefined := False;

    for i in header'Range loop
      header(i):= Read_Byte;
      exit when i = 4 and not hints.has_size;
    end loop;

    Decode_Properties(o, header);

    if hints.has_size then
      for i in UInt32'(0)..7 loop
        b:= header(5 + i);
        if b /= 16#FF# then
          o.unpackSizeDefined := True;
        end if;
      end loop;
      if o.unpackSizeDefined then
        for i in UInt32'(0)..7 loop
          b:= header(5 + i);
          if b /= 0 then
            for bit in 0..7 loop
              if (b and Shift_Left(Byte'(1),bit)) /= 0 then
                last_bit:= bit;
              end if;
            end loop;
            last_bit:= last_bit + Natural(8 * i);
            if last_bit > Data_Bytes_Count'Size - 1 then
              Raise_Exception(
                LZMA_Error'Identity,
                "Indicated size bits for decoded data," &
                Natural'Image(last_bit) &
                ", exceeds the maximum file size bits," &
                Natural'Image(Data_Bytes_Count'Size - 1)
              );
            else
              o.unpackSize := o.unpackSize + Data_Bytes_Count(b) * 2 ** Natural(8 * i);
            end if;
          end if;
        end loop;
        o.unpackSize_as_defined:= o.unpackSize;
      else
        o.unpackSize:= Data_Bytes_Count'Last;
      end if;
    else
      o.unpackSize:= hints.given_size;
      o.unpackSizeDefined:= True;
    end if;
    o.markerIsMandatory := hints.marker_expected or not o.unpackSizeDefined;
  end Decode_Header;

  procedure Decode(o: in out LZMA_Decoder_Info; hints: LZMA_Hints; res: out LZMA_Result) is
  begin
    Decode_Header(o, hints);
    Decode_Contents(o, res);
    if hints.fail_on_bad_range_code and o.range_dec_corrupted then
      Raise_Exception(LZMA_Error'Identity, "Range decoder had a corrupted value (code = range)");
    end if;
  end Decode;

  procedure Decompress(hints: LZMA_Hints) is
    o: LZMA_Decoder_Info;
    res: LZMA_Result;
  begin
    Decode(o, hints, res);
  end Decompress;

  function Literal_context_bits(o: LZMA_Decoder_Info) return Natural is
  begin
    return o.lc;
  end Literal_context_bits;

  function Literal_pos_bits(o: LZMA_Decoder_Info) return Natural is
  begin
    return o.lp;
  end Literal_pos_bits;

  function Pos_bits(o: LZMA_Decoder_Info) return Natural is
  begin
    return o.pb;
  end Pos_bits;

  function Unpack_size_defined(o: LZMA_Decoder_Info) return Boolean is
  begin
    return o.unpackSizeDefined;
  end Unpack_size_defined;

  function Unpack_size_as_defined(o: LZMA_Decoder_Info) return Data_Bytes_Count is
  begin
    return o.unpackSize_as_defined;
  end Unpack_size_as_defined;

  function Dictionary_size(o: LZMA_Decoder_Info) return Interfaces.Unsigned_32 is
  begin
    return o.dictionary_size;
  end Dictionary_size;

  function Dictionary_size_in_properties(o: LZMA_Decoder_Info) return Interfaces.Unsigned_32 is
  begin
    return o.dictSizeInProperties;
  end Dictionary_size_in_properties;

  function Range_decoder_corrupted(o: LZMA_Decoder_Info) return Boolean is
  begin
    return o.range_dec_corrupted;
  end Range_decoder_corrupted;

end LZMA.Decoding;

Zip-Ada: Ada library for zip archive files (.zip). Ada programming.
Some news about Zip-Ada and other related Ada projects on Gautier's blog.