Back to... Zip-Ada

Source file : huffman-encoding-length_limited_coding.adb



   1  --  Huffman.Encoding.Length_Limited_Coding
   2  ------------------------------------------
   3  --  Legal licensing note:
   4  
   5  --  Copyright (c) 2016 .. 2019 Gautier de Montmollin (maintainer of the Ada version)
   6  --  SWITZERLAND
   7  --
   8  --  The copyright holder is only the maintainer of the Ada version;
   9  --  authors of the C code and those of the algorithm are cited below.
  10  
  11  --  Permission is hereby granted, free of charge, to any person obtaining a copy
  12  --  of this software and associated documentation files (the "Software"), to deal
  13  --  in the Software without restriction, including without limitation the rights
  14  --  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  15  --  copies of the Software, and to permit persons to whom the Software is
  16  --  furnished to do so, subject to the following conditions:
  17  
  18  --  The above copyright notice and this permission notice shall be included in
  19  --  all copies or substantial portions of the Software.
  20  
  21  --  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  22  --  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  23  --  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  24  --  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  25  --  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  26  --  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  27  --  THE SOFTWARE.
  28  
  29  --  NB: this is the MIT License, as found 21-Aug-2016 on the site
  30  --  http://www.opensource.org/licenses/mit-license.php
  31  
  32  --  Author: lode.vandevenne [*] gmail [*] com (Lode Vandevenne)
  33  --  Author: jyrki.alakuijala [*] gmail [*] com (Jyrki Alakuijala)
  34  
  35  --  Bounded package merge algorithm, based on the paper
  36  --    "A Fast and Space-Economical Algorithm for Length-Limited Coding
  37  --    Jyrki Katajainen, Alistair Moffat, Andrew Turpin".
  38  
  39  --  Translated by G. de Montmollin to Ada from katajainen.c (Zopfli project), 7-Feb-2016
  40  --
  41  --  Main technical differences to katajainen.c:
  42  --    - pointers are not used, array indices instead
  43  --    - all structures are allocated on stack
  44  --    - sub-programs are nested, then unneeded parameters are removed
  45  
  46  procedure Huffman.Encoding.Length_Limited_Coding
  47    (frequencies : in     Count_Array;
  48     bit_lengths :    out Length_Array)
  49  is
  50    subtype Index_Type is Count_Type;
  51  
  52    null_index : constant Index_Type := Index_Type'Last;
  53  
  54    --  Nodes forming chains.
  55    type Node is record
  56      weight : Count_Type;
  57      count  : Count_Type;                --  Number of leaves before this chain.
  58      tail   : Index_Type := null_index;  --  Previous node(s) of this chain, or null_index if none.
  59      in_use : Boolean    := False;       --  Tracking for garbage collection.
  60    end record;
  61  
  62    type Leaf_Node is record
  63      weight : Count_Type;
  64      symbol : Alphabet;
  65    end record;
  66  
  67    --  Memory pool for nodes.
  68    pool : array (0 .. Index_Type (2 * max_bits * (max_bits + 1) - 1)) of Node;
  69    pool_next : Index_Type := pool'First;
  70  
  71    type Index_pair is array (Index_Type'(0) .. 1) of Index_Type;
  72    lists : array (0 .. Index_Type (max_bits - 1)) of Index_pair;
  73  
  74    type Leaf_array is array (Index_Type range <>) of Leaf_Node;
  75    leaves : Leaf_array (0 .. frequencies'Length - 1);
  76  
  77    num_symbols : Count_Type := 0;  --  Amount of symbols with frequency > 0.
  78    num_Boundary_PM_runs : Count_Type;
  79  
  80    too_many_symbols_for_length_limit : exception;
  81    zero_length_but_nonzero_frequency : exception;
  82    nonzero_length_but_zero_frequency : exception;
  83    length_exceeds_length_limit       : exception;
  84    buggy_sorting                     : exception;
  85  
  86    procedure Init_Node (weight, count : Count_Type; tail, node_idx : Index_Type) is
  87    begin
  88      pool (node_idx).weight := weight;
  89      pool (node_idx).count  := count;
  90      pool (node_idx).tail   := tail;
  91      pool (node_idx).in_use := True;
  92    end Init_Node;
  93  
  94    --  Finds a free location in the memory pool. Performs garbage collection if needed.
  95    --  If use_lists = True, used to mark in-use nodes during garbage collection.
  96  
  97    function Get_Free_Node (use_lists : Boolean) return Index_Type is
  98      node_idx : Index_Type;
  99    begin
 100      loop
 101        if pool_next > pool'Last then
 102          --  Garbage collection.
 103          for i in pool'Range loop
 104            pool (i).in_use := False;
 105          end loop;
 106          if use_lists then
 107            for i in 0 .. Index_Type (max_bits * 2 - 1) loop
 108              node_idx := lists (i / 2)(i mod 2);
 109              while node_idx /= null_index loop
 110                pool (node_idx).in_use := True;
 111                node_idx := pool (node_idx).tail;
 112              end loop;
 113            end loop;
 114          end if;
 115          pool_next := pool'First;
 116        end if;
 117        exit when not pool (pool_next).in_use;  -- Found one.
 118        pool_next := pool_next + 1;
 119      end loop;
 120      pool_next := pool_next + 1;
 121      return pool_next - 1;
 122    end Get_Free_Node;
 123  
 124    --  Performs a Boundary Package-Merge step. Puts a new chain in the given list. The
 125    --  new chain is, depending on the weights, a leaf or a combination of two chains
 126    --  from the previous list.
 127    --  index: The index of the list in which a new chain or leaf is required.
 128    --  final: Whether this is the last time this function is called. If it is then it
 129    --  is no more needed to recursively call self.
 130  
 131    procedure Boundary_PM (index : Index_Type; final : Boolean) is
 132      newchain  : Index_Type;
 133      oldchain  : Index_Type;
 134      lastcount : constant Count_Type := pool (lists (index)(1)).count;  --  Count of last chain of list.
 135      sum : Count_Type;
 136    begin
 137      if index = 0 and lastcount >= num_symbols then
 138        return;
 139      end if;
 140      newchain := Get_Free_Node (use_lists => True);
 141      oldchain := lists (index)(1);
 142      --  These are set up before the recursive calls below, so that there is a list
 143      --  pointing to the new node, to let the garbage collection know it's in use.
 144      lists (index) := (oldchain, newchain);
 145  
 146      if index = 0 then
 147        --  New leaf node in list 0.
 148        Init_Node (leaves (lastcount).weight, lastcount + 1, null_index, newchain);
 149      else
 150        sum := pool (lists (index - 1)(0)).weight + pool (lists (index - 1)(1)).weight;
 151        if lastcount < num_symbols and then sum > leaves (lastcount).weight then
 152          --  New leaf inserted in list, so count is incremented.
 153          Init_Node (leaves (lastcount).weight, lastcount + 1, pool (oldchain).tail, newchain);
 154        else
 155          Init_Node (sum, lastcount, lists (index - 1)(1), newchain);
 156          if not final then
 157            --  Two lookahead chains of previous list used up, create new ones.
 158            Boundary_PM (index - 1, False);
 159            Boundary_PM (index - 1, False);
 160          end if;
 161        end if;
 162      end if;
 163    end Boundary_PM;
 164  
 165    --  Initializes each list with as lookahead chains the two leaves with lowest weights.
 166  
 167    procedure Init_Lists is
 168      node0 : constant Index_Type := Get_Free_Node (use_lists => False);
 169      node1 : constant Index_Type := Get_Free_Node (use_lists => False);
 170    begin
 171      Init_Node (leaves (0).weight, 1, null_index, node0);
 172      Init_Node (leaves (1).weight, 2, null_index, node1);
 173      lists := (others => (node0, node1));
 174    end Init_Lists;
 175  
 176    --  Converts result of boundary package-merge to the bit_lengths. The result in the
 177    --  last chain of the last list contains the amount of active leaves in each list.
 178    --  chain: Chain to extract the bit length from (last chain from last list).
 179  
 180    procedure Extract_Bit_Lengths (chain : Index_Type) is
 181      node_idx : Index_Type := chain;
 182    begin
 183      while node_idx /= null_index loop
 184        for i in 0 .. pool (node_idx).count - 1 loop
 185          bit_lengths (leaves (i).symbol) := bit_lengths (leaves (i).symbol) + 1;
 186        end loop;
 187        node_idx := pool (node_idx).tail;
 188      end loop;
 189    end Extract_Bit_Lengths;
 190  
 191    function "<"(a, b : Leaf_Node) return Boolean is
 192    begin
 193      return a.weight < b.weight;
 194    end "<";
 195  
 196    procedure Quick_sort (a : in out Leaf_array) is
 197      n : constant Index_Type := a'Length;
 198      i, j : Index_Type;
 199      p, t : Leaf_Node;
 200    begin
 201      if n < 2 then
 202        return;
 203      end if;
 204      p := a (n / 2 + a'First);
 205      i := 0;
 206      j := n - 1;
 207      loop
 208        while a (i + a'First) < p loop
 209          i := i + 1;
 210        end loop;
 211        while p < a (j + a'First) loop
 212          j := j - 1;
 213        end loop;
 214        exit when i >= j;
 215        t := a (i + a'First);
 216        a (i + a'First) := a (j + a'First);
 217        a (j + a'First) := t;
 218        i := i + 1;
 219        j := j - 1;
 220      end loop;
 221      Quick_sort (a (a'First .. a'First + i - 1));
 222      Quick_sort (a (a'First + i .. a'Last));
 223    end Quick_sort;
 224  
 225    paranoid : constant Boolean := False;
 226  
 227  begin
 228    bit_lengths := (others => 0);
 229    --  Count used symbols and place them in the leaves.
 230    for a in Alphabet loop
 231      if frequencies (a) > 0 then
 232        leaves (num_symbols) := (frequencies (a), a);
 233        num_symbols := num_symbols + 1;
 234      end if;
 235    end loop;
 236    --  Check special cases and error conditions.
 237    if num_symbols > 2 ** max_bits then
 238      raise too_many_symbols_for_length_limit;  --  Error, too few max_bits to represent symbols.
 239    end if;
 240    if num_symbols = 0 then
 241      return;  --  No symbols at all. OK.
 242    end if;
 243    if num_symbols = 1 then
 244      bit_lengths (leaves (0).symbol) := 1;
 245      return;  --  Only one symbol, give it bit length 1, not 0. OK.
 246    end if;
 247    --  Sort the leaves from lightest to heaviest.
 248    Quick_sort (leaves (0 .. num_symbols - 1));
 249    if paranoid then
 250      for i in 1 .. num_symbols - 1 loop
 251        if leaves (i) < leaves (i - 1) then
 252          raise buggy_sorting;
 253        end if;
 254      end loop;
 255    end if;
 256    Init_Lists;
 257    --  In the last list, 2 * num_symbols - 2 active chains need to be created. Two
 258    --  are already created in the initialization. Each Boundary_PM run creates one.
 259    num_Boundary_PM_runs := 2 * num_symbols - 4;
 260    for i in 1 .. num_Boundary_PM_runs loop
 261      Boundary_PM (Index_Type (max_bits - 1), i = num_Boundary_PM_runs);
 262    end loop;
 263    Extract_Bit_Lengths (lists (Index_Type (max_bits - 1))(1));
 264    if paranoid then
 265      --  Done; some checks before leaving. Not checked: completeness of Huffman codes.
 266      for a in Alphabet loop
 267        if frequencies (a) = 0 then
 268          if bit_lengths (a) > 0 then
 269            raise nonzero_length_but_zero_frequency;  --  Never happened so far
 270          end if;
 271        else
 272          if bit_lengths (a) = 0 then
 273            raise zero_length_but_nonzero_frequency;  --  Happened before null_index fix
 274          elsif bit_lengths (a) > max_bits then
 275            raise length_exceeds_length_limit;        --  Never happened so far
 276          end if;
 277        end if;
 278      end loop;
 279    end if;
 280  end Huffman.Encoding.Length_Limited_Coding;

Web view of Ada source code generated by GNATHTML, project: ALI_Parse version 1.0.
Zip-Ada: Ada library for zip archive files (.zip). Ada programming.
Some news about Zip-Ada and other Ada projects on Gautier's blog.