Back to... Zip-Ada

Source file : bzip2-encoding.adb



   1  --  BZip2.Encoding - a standalone, generic BZip2 encoder.
   2  ------------------
   3  --
   4  --  Examples of use:
   5  --    BZip2_Enc, a standalone encoder to .bz2 files
   6  --    Zip.Compress.BZip2_E, creates Zip files entries with BZip2 encoding
   7  
   8  --  Legal licensing note:
   9  
  10  --  Copyright (c) 2024 .. 2025 Gautier de Montmollin
  11  --  SWITZERLAND
  12  
  13  --  Permission is hereby granted, free of charge, to any person obtaining a copy
  14  --  of this software and associated documentation files (the "Software"), to deal
  15  --  in the Software without restriction, including without limitation the rights
  16  --  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  17  --  copies of the Software, and to permit persons to whom the Software is
  18  --  furnished to do so, subject to the following conditions:
  19  
  20  --  The above copyright notice and this permission notice shall be included in
  21  --  all copies or substantial portions of the Software.
  22  
  23  --  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24  --  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25  --  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  26  --  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  27  --  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  28  --  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  29  --  THE SOFTWARE.
  30  
  31  --  NB: this is the MIT License, as found on the site
  32  --  http://www.opensource.org/licenses/mit-license.php
  33  
  34  -----------------
  35  
  36  --  To do:
  37  --
  38  --    - Performance: use Suffix-Array-Induced-Sorting for the BWT.
  39  --        See https://github.com/dsnet/compress/blob/master/bzip2/bwt.go
  40  --            https://sites.google.com/site/yuta256/sais
  41  --    - Segmentation: brute-force recursive binary segmentation as in EncodeBlock2 in
  42  --        7-Zip's BZip2Encoder.cpp .
  43  --    - Use tasking to parallelize the block compression jobs.
  44  --
  45  --  Already tried without significant success:
  46  --
  47  --    - Use the permutation of entropy coders that minimizes the
  48  --        size of compression structure.
  49  --    - Brute-force over different strategies to tweak frequencies for avoiding
  50  --        zero occurrences (see Avoid_Zeros). Unfortunately, the gains are offset by larger
  51  --        compression structures (the Huffman trees descriptors take more room).
  52  --    - Use k-means machine learning method to re-allocate clusters to entropy coders.
  53  --        Removed from code on 2025-02-23.
  54  --    - Use a "noisiness" (instead of a "bumpiness") function of a group's frequencies
  55  --        in the sorting key for the initial clustering.
  56  --    - Set up the initial clustering by slicing the global frequency histogram
  57  --        "horizontally" (on the symbol axis) to create artificial truncated histograms
  58  --        and allocate them to the data groups. It obviously traps the model into
  59  --        a suboptimal local optimum in the 258-dimensional criterion space.
  60  --        This method is used by the original BZip2 program.
  61  --        Removed from code on 2025-02-08.
  62  
  63  with Data_Segmentation;
  64  with Huffman.Encoding.Length_Limited_Coding;
  65  
  66  with Ada.Containers.Generic_Constrained_Array_Sort,
  67       Ada.Strings.Unbounded,
  68       Ada.Text_IO,
  69       Ada.Unchecked_Deallocation;
  70  
  71  package body BZip2.Encoding is
  72  
  73    procedure Encode
  74      (option    : Compression_Option := block_900k;
  75       size_hint : Stream_Size_Type   := unknown_size)
  76    is
  77      use Interfaces;
  78  
  79      subtype Bit_Pos_Type is Natural range 0 .. 7;
  80      bit_buffer : Byte := 0;
  81      bit_pos : Bit_Pos_Type := 7;
  82  
  83      procedure Flush_Bit_Buffer is
  84      begin
  85        Write_Byte (bit_buffer);
  86        bit_buffer := 0;
  87        bit_pos := 7;
  88      end Flush_Bit_Buffer;
  89  
  90      procedure Put_Bits (data : Unsigned_32; amount : Positive) is
  91      begin
  92        for count in reverse 1 .. amount loop
  93          if (data and Shift_Left (Unsigned_32'(1), count - 1)) /= 0 then
  94            bit_buffer := bit_buffer or Shift_Left (Unsigned_8'(1), bit_pos);
  95          end if;
  96          if bit_pos = 0 then
  97            Flush_Bit_Buffer;
  98          else
  99            bit_pos := bit_pos - 1;
 100          end if;
 101        end loop;
 102      end Put_Bits;
 103  
 104      procedure Put_Bits (b : Boolean) is
 105      begin
 106        Put_Bits (Boolean'Pos (b), 1);
 107      end Put_Bits;
 108  
 109      procedure Put_Bits (s : String) is
 110      begin
 111        for c of s loop
 112          Put_Bits (Character'Pos (c), 8);
 113        end loop;
 114      end Put_Bits;
 115  
 116      level : constant Natural_32 :=
 117        (case option is
 118           when block_100k => 1,
 119           when block_400k => 4,
 120           when block_900k => 9);
 121  
 122      block_capacity : constant Natural_32 := sub_block_size * level;
 123  
 124      --  We use in this package 4 large heap-allocated arrays.
 125      --  It is possible to use Ada.Containers.Vectors but the run time
 126      --  is longer, possibly due to indirect access to data and various
 127      --  checks. For instance, replacing Buffer below with a Vector makes
 128      --  the encoding ~26% slower.
 129  
 130      type Buffer is array (Natural_32 range <>) of Byte;
 131      type Buffer_Access is access Buffer;
 132  
 133      procedure Unchecked_Free is new Ada.Unchecked_Deallocation (Buffer, Buffer_Access);
 134  
 135      combined_crc : Unsigned_32 := 0;
 136  
 137      block_counter : Natural := 0;
 138  
 139      quiet          : constant := 0;
 140      headlines      : constant := 1;
 141      detailed       : constant := 2;
 142      super_detailed : constant := 3;  --  Details down to symbols.
 143  
 144      verbosity_level : constant := quiet;
 145  
 146      procedure Trace (msg : String; verbosity : Natural) with Inline is
 147      begin
 148        if verbosity_level >= verbosity then
 149          Ada.Text_IO.Put_Line ("BZip2: " & msg);
 150        end if;
 151      end Trace;
 152  
 153      procedure Trace (prefix : String; b : Buffer; verbosity : Natural) with Inline is
 154      begin
 155        if verbosity_level >= verbosity then
 156          declare
 157            use Ada.Strings.Unbounded;
 158            msg : Unbounded_String;
 159          begin
 160            for bt of b loop
 161              if bt in 32 .. 126 then
 162                msg := msg & Character'Val (bt);
 163              else
 164                msg := msg & '(' & bt'Image & ')';
 165              end if;
 166            end loop;
 167            Trace (prefix & To_String (msg), verbosity);
 168          end;
 169        end if;
 170      end Trace;
 171  
 172      --  Each block is limited either by the data available
 173      --  by the block capacity.
 174      --  It means that each encoding step has an end and
 175      --  that we can theoretically go on with the next step,
 176      --  perhaps at the price of using more memory.
 177  
 178      procedure Encode_Block (raw_buf : Buffer) is
 179  
 180        -----------------------------------
 181        --  Initial Run-Length Encoding  --
 182        -----------------------------------
 183  
 184        rle_1_block_size : Natural_32 := 0;
 185        block_crc : Unsigned_32;
 186        in_use : array (Byte) of Boolean := (others => False);
 187  
 188        rle_1_data : Buffer_Access := new Buffer (1 .. block_capacity * 5 / 4);
 189        --  Worst case: all data consist of runs of 4 bytes -> 5 bytes with RLE_1.
 190  
 191        procedure RLE_1 is
 192          b_prev : Byte := 0;  --  Initialization is to reassure the compiler.
 193          run : Natural := 0;
 194  
 195          procedure Store_Run with Inline is
 196            procedure Store (x : Byte) with Inline is
 197            begin
 198              rle_1_block_size := rle_1_block_size + 1;
 199              rle_1_data (rle_1_block_size) := x;
 200              in_use (x) := True;
 201            end Store;
 202          begin
 203            for count in 1 .. Integer'Min (4, run) loop
 204              Store (b_prev);
 205            end loop;
 206            if run >= 4 then
 207              pragma Assert (run <= 259);
 208              Store (Byte (run - 4));
 209            end if;
 210            run := 1;
 211          end Store_Run;
 212  
 213          start : Boolean := True;
 214        begin
 215          CRC.Init (block_crc);
 216          for b of raw_buf loop
 217            CRC.Update (block_crc, b);
 218            if start or else b /= b_prev then
 219              --  Startup or Run break:
 220              Store_Run;
 221              start := False;
 222            elsif run = 259 then
 223              --  Force a run break, even though b = b_prev:
 224              Store_Run;
 225            else
 226              run := run + 1;
 227            end if;
 228            b_prev := b;
 229          end loop;
 230          Store_Run;
 231          Trace ("RLE_1: raw buffer length:  " & raw_buf'Length'Image,   headlines);
 232          Trace ("RLE_1-processed block size:" & rle_1_block_size'Image, headlines);
 233          if verbosity_level >= super_detailed then
 234            Trace ("RLE_1: ", rle_1_data (1 .. rle_1_block_size), super_detailed);
 235          end if;
 236        end RLE_1;
 237  
 238        ---------------------------------
 239        --  Burrows-Wheeler Transform  --
 240        ---------------------------------
 241  
 242        bwt_data : Buffer_Access;
 243        bwt_index : Natural_32 := 0;  --  0-based.
 244  
 245        procedure BWT is
 246  
 247          subtype Offset_Range is Integer_32 range 0 .. rle_1_block_size - 1;
 248  
 249          type Offset_Table is array (Offset_Range) of Offset_Range;
 250          type Offset_Table_Access is access Offset_Table;
 251  
 252          procedure Unchecked_Free is new Ada.Unchecked_Deallocation (Offset_Table, Offset_Table_Access);
 253  
 254          --  Compare the message, rotated with two different offsets.
 255          function Lexicographically_Smaller (left, right : Offset_Range) return Boolean with Inline is
 256            il, ir : Integer_32;
 257            l, r : Byte;
 258          begin
 259            pragma Assert (rle_1_data'First = 1);
 260            il := 1 + (if left  = 0 then 0 else rle_1_block_size - left);
 261            ir := 1 + (if right = 0 then 0 else rle_1_block_size - right);
 262            for i in Offset_Range loop
 263              l := rle_1_data (il);
 264              r := rle_1_data (ir);
 265              if l < r then
 266                return True;
 267              elsif l > r then
 268                return False;
 269              end if;
 270              il := il + 1;
 271              if il > rle_1_block_size then
 272                il := 1;
 273              end if;
 274              ir := ir + 1;
 275              if ir > rle_1_block_size then
 276                ir := 1;
 277              end if;
 278            end loop;
 279            --  Equality in contents.
 280            return left < right;  --  Ensures stable sorting.
 281          end Lexicographically_Smaller;
 282  
 283          procedure Offset_Sort is new Ada.Containers.Generic_Constrained_Array_Sort
 284            (Index_Type   => Offset_Range,
 285             Element_Type => Offset_Range,
 286             Array_Type   => Offset_Table,
 287             "<"          => Lexicographically_Smaller);
 288  
 289           offset : Offset_Table_Access := new Offset_Table;
 290  
 291        begin
 292          for i in Offset_Range loop
 293            offset (i) := i;
 294          end loop;
 295  
 296          Offset_Sort (offset.all);  --  <--- The BW Transform is done here.
 297  
 298          bwt_data := new Buffer (1 .. rle_1_block_size);
 299          for i in Offset_Range loop
 300            --  Copy last column of the matrix into transformed message:
 301            bwt_data (1 + i) := rle_1_data (1 + (rle_1_block_size - 1 - offset (i)) mod rle_1_block_size);
 302            if offset (i) = 0 then
 303              --  Found the row index of the original message.
 304              bwt_index := i;
 305            end if;
 306          end loop;
 307  
 308          if verbosity_level >= super_detailed then
 309            if rle_1_block_size = 0 then
 310              Trace ("BWT:   (empty block)", super_detailed);
 311            else
 312              Trace ("BWT:   ", bwt_data.all, super_detailed);
 313              Trace ("BWT index:" & bwt_index'Image, super_detailed);
 314            end if;
 315          end if;
 316          Unchecked_Free (offset);
 317          Unchecked_Free (rle_1_data);
 318        exception
 319          when others =>
 320            Unchecked_Free (offset);
 321            raise;
 322        end BWT;
 323  
 324        ----------------------------------------------------
 325        --  Move-to-Front and second Run-Length Encoding  --
 326        ----------------------------------------------------
 327  
 328        subtype Max_Alphabet is Integer range 0 .. max_alphabet_size - 1;
 329  
 330        type MTF_Array is array (Positive_32 range <>) of Max_Alphabet;
 331        type MTF_Array_Access is access MTF_Array;
 332  
 333        procedure Unchecked_Free is new Ada.Unchecked_Deallocation (MTF_Array, MTF_Array_Access);
 334  
 335        mtf_data : MTF_Array_Access;
 336        mtf_last : Natural_32 := 0;
 337  
 338        unseq_to_seq : array (Byte) of Byte;
 339  
 340        normal_symbols_in_use : Natural;
 341        last_symbol_in_use : Natural;
 342        EOB : Natural;
 343  
 344        procedure MTF_and_RLE_2 is
 345  
 346          procedure Prepare_Mapping is
 347          begin
 348            normal_symbols_in_use := 0;
 349            for i in Byte loop
 350              if in_use (i) then
 351                unseq_to_seq (i) := Byte (normal_symbols_in_use);
 352                normal_symbols_in_use := normal_symbols_in_use + 1;
 353              end if;
 354            end loop;
 355  
 356            last_symbol_in_use := normal_symbols_in_use + 3 - 1 - 1;
 357            --  ^ + 3 : the special symbols RUN_A, RUN_B, EOB
 358            --    - 1 : value 0 has no symbol (RUN_A and RUN_B are used for the runs of 0)
 359            --    - 1 : zero-based
 360  
 361            EOB := last_symbol_in_use;
 362  
 363            Trace ("Normal symbols in use:" & normal_symbols_in_use'Image, detailed);
 364          end Prepare_Mapping;
 365  
 366          procedure Store (a : Max_Alphabet) is
 367          begin
 368            mtf_last := mtf_last + 1;
 369            mtf_data (mtf_last) := a;
 370          end Store;
 371  
 372          run : Natural_32 := 0;
 373  
 374          procedure Store_Run with Inline is
 375            rc : Unsigned_32;
 376          begin
 377            if run > 0 then
 378              --  Output a binary representation of `run`
 379              --  using RUN_A for 0's RUN_B for 1's.
 380              rc := Unsigned_32 (run + 1);
 381              loop
 382                Store (Max_Alphabet (rc and 1));
 383                rc := Shift_Right (rc, 1);
 384                exit when rc < 2;
 385              end loop;
 386              --  Reset the run count.
 387              run := 0;
 388            end if;
 389          end Store_Run;
 390  
 391          mtf_symbol : array (0 .. 255) of Byte;
 392          idx : Natural;
 393          bt_seq : Byte;
 394  
 395        begin
 396          Prepare_Mapping;
 397  
 398          mtf_data := new MTF_Array (1 .. 1 + 2 * rle_1_block_size);
 399  
 400          for i in mtf_symbol'Range loop
 401            mtf_symbol (i) := Byte (i);
 402          end loop;
 403  
 404          Big_MTF_RLE_2_Loop :
 405          for bt of bwt_data.all loop
 406            bt_seq := unseq_to_seq (bt);
 407  
 408            --  MTF part:
 409  
 410            Search_Value :
 411            for search in mtf_symbol'Range loop
 412              if mtf_symbol (search) = bt_seq then
 413                idx := search;
 414                exit Search_Value;
 415              end if;
 416            end loop Search_Value;
 417  
 418            Move_Value_to_Front :
 419            for i in reverse 1 .. idx loop
 420              mtf_symbol (i) := mtf_symbol (i - 1);
 421            end loop Move_Value_to_Front;
 422            mtf_symbol (0) := bt_seq;
 423  
 424            --  RLE part:
 425  
 426            if idx = 0 then
 427              run := run + 1;
 428            else
 429              Store_Run;
 430              Store (1 + idx);  --  Value stored is >= 2. Values 0 and 1 are RUN_A, RUN_B.
 431            end if;
 432  
 433          end loop Big_MTF_RLE_2_Loop;
 434  
 435          Store_Run;
 436          Store (EOB);
 437  
 438          Unchecked_Free (bwt_data);
 439        end MTF_and_RLE_2;
 440  
 441        ----------------------
 442        --  Entropy Coding  --
 443        ----------------------
 444  
 445        subtype Entropy_Coder_Range is Integer range 1 .. max_entropy_coders;
 446  
 447        descr :
 448          array (Entropy_Coder_Range) of
 449            Huffman.Encoding.Descriptor (Max_Alphabet);
 450  
 451        entropy_coder_count : Entropy_Coder_Range;
 452        selector_count : Integer_32;
 453  
 454        selector : array (1 .. 1 + block_capacity / group_size) of Entropy_Coder_Range;
 455  
 456        procedure Entropy_Calculations is
 457  
 458          subtype Alphabet_in_Use is Integer range 0 .. last_symbol_in_use;
 459  
 460          type Count_Array is array (Alphabet_in_Use) of Natural_32;
 461  
 462          procedure Avoid_Zeros (freq : in out Count_Array) is
 463            zeroes : Natural := 0;
 464          begin
 465            for stat of freq loop
 466              if stat = 0 then
 467                zeroes := zeroes + 1;
 468              end if;
 469            end loop;
 470            case zeroes is
 471              when 0 =>
 472                --  Zero zeroes, zero problem :-)
 473                null;
 474              when 1 .. 100 =>
 475                --  Turn the "0"'s into "1"'s.
 476                for stat of freq loop
 477                  stat := Natural_32'Max (1, stat);
 478                end loop;
 479              when others =>
 480                --  Turn the "0"'s into an actual "1/2".
 481                for stat of freq loop
 482                  stat := (if stat = 0 then 1 else stat * 2);
 483                end loop;
 484            end case;
 485          end Avoid_Zeros;
 486  
 487          max_code_len : Positive;
 488  
 489          procedure Output_Frequency_Matrix is
 490            use Ada.Text_IO;
 491            f : File_Type;
 492            file_name : String := "freq" & block_counter'Image & ".csv";
 493            freq : Count_Array := (others => 0);
 494            symbol : Alphabet_in_Use;
 495            sep : constant Character := ';';
 496          begin
 497            --  In this file, rows represent groups of data,
 498            --  columns represent the frequencies of each symbol.
 499            file_name (file_name'First + 4) := '_';
 500            Create (f, Out_File, file_name);
 501            for mtf_idx in 1 .. mtf_last loop
 502              symbol := mtf_data (mtf_idx);
 503              freq (symbol) := freq (symbol) + 1;
 504              if mtf_idx rem group_size = 0 or else mtf_idx = mtf_last then
 505                --  Dump group's statistics:
 506                for s in Alphabet_in_Use loop
 507                  Put (f, freq (s)'Image & sep);
 508                end loop;
 509                New_Line (f);
 510                freq := (others => 0);
 511              end if;
 512            end loop;
 513            Close (f);
 514          end Output_Frequency_Matrix;
 515  
 516          type Huffman_Length_Array is array (Alphabet_in_Use) of Natural;
 517  
 518          procedure Define_Descriptor (freq : in out Count_Array; des : Entropy_Coder_Range) is
 519            procedure LLHCL is new
 520              Huffman.Encoding.Length_Limited_Coding
 521                (Alphabet     => Alphabet_in_Use,
 522                 Count_Type   => Natural_32,
 523                 Count_Array  => Count_Array,
 524                 Length_Array => Huffman_Length_Array,
 525                 max_bits     => max_code_len);
 526            len : Huffman_Length_Array;
 527            pragma Assert (max_code_len <= max_code_len_bzip2_1_0_2);
 528          begin
 529            Avoid_Zeros (freq);
 530            LLHCL (freq, len);
 531            for symbol in Alphabet_in_Use loop
 532              descr (des)(symbol).bit_length := len (symbol);
 533            end loop;
 534            Huffman.Encoding.Prepare_Codes
 535              (descr (des)(Alphabet_in_Use), max_code_len, False);
 536          end Define_Descriptor;
 537  
 538          -----------------------------------------------------
 539          --  Radically simple but functional entropy coder  --
 540          -----------------------------------------------------
 541  
 542          procedure Single_Entropy_Coder is
 543            freq : Count_Array := (others => 0);
 544          begin
 545            for symbol of mtf_data (1 .. mtf_last) loop
 546              freq (symbol) := freq (symbol) + 1;
 547            end loop;
 548            max_code_len := max_code_len_bzip2_1_0_3;
 549            Define_Descriptor (freq, 1);
 550            entropy_coder_count := 2;  --  The canonical BZip2 decoder requires >= 2 coders.
 551            descr (2) := descr (1);    --  We actually don't use the copy (psssht), but need to define it!
 552            for i in 1 .. selector_count loop
 553              selector (Integer_32 (i)) := 1;
 554            end loop;
 555          end Single_Entropy_Coder;
 556  
 557          --------------------------------------------------------------------------
 558          --  Define multiple entropy coders (max 6) and assign them to the       --
 559          --  various groups of data (max 18000, with 50 symbols in each group).  --
 560          --  The art is to gather the groups into meaningful clusters.           --
 561          --  Each cluster will use one of the entropy coders.                    --
 562          --------------------------------------------------------------------------
 563  
 564          procedure Multiple_Entropy_Coders is
 565  
 566            subtype Selector_Range is Positive_32 range 1 .. selector_count;
 567  
 568            --  Create an initial clustering depending on a ranking using a key
 569            --  computed on a subset of the alphabet (a mix of RUN_A, RUN_B,
 570            --  low-index MTF values).
 571            --  Look at the first few columns of some output
 572            --  of Output_Frequency_Matrix to find why.
 573            --
 574            procedure Initial_Clustering_Ranking_Method (sample_width : Positive) is
 575  
 576              type Pair is record
 577                key   : Natural_32;   --  Occurrences of symbols that are in the subset.
 578                index : Positive_32;  --  Group number.
 579              end record;
 580  
 581              type Ranking_Array is array (Selector_Range) of Pair;
 582  
 583              ranking : Ranking_Array;
 584  
 585              function Smaller_Key (left, right : Pair) return Boolean is (left.key < right.key);
 586  
 587              procedure Ranking_Sort is new Ada.Containers.Generic_Constrained_Array_Sort
 588                (Index_Type   => Selector_Range,
 589                 Element_Type => Pair,
 590                 Array_Type   => Ranking_Array,
 591                 "<"          => Smaller_Key);
 592  
 593              type Cluster_Attribution is array (Positive range <>) of Entropy_Coder_Range;
 594  
 595              procedure Initial_Clustering_by_Rank (attr : Cluster_Attribution) is
 596                na : constant Positive_32 := attr'Length;
 597                ns : constant Selector_Range := selector_count;
 598                a32 : Positive_32;
 599              begin
 600                for attr_idx in attr'Range loop
 601                  a32 := Integer_32 (attr_idx) - Integer_32 (attr'First) + 1;  --  a32 = 1, 2, 3, .. na.
 602                  for i in 1 + (a32 - 1) * ns / na .. a32 * ns / na loop
 603                    selector (ranking (i).index) := attr (attr_idx);
 604                  end loop;
 605                end loop;
 606              end Initial_Clustering_by_Rank;
 607  
 608              pos_countdown : Natural := group_size;
 609              sel_idx : Positive_32 := 1;
 610              key : Natural_32 := 0;
 611              last_symbol_sampled : constant Natural := Integer'Min (EOB - 1, run_a + sample_width - 1);
 612              symbol : Alphabet_in_Use;
 613            begin
 614              --  Populate the frequency stats for the ranking, grouped by data group.
 615              --
 616              for mtf_idx in 1 .. mtf_last loop
 617                symbol := mtf_data (mtf_idx);
 618                if symbol in run_a .. last_symbol_sampled then
 619                  key := key + 1;
 620                end if;
 621                pos_countdown := pos_countdown - 1;
 622                if pos_countdown = 0 then
 623                  ranking (sel_idx) := (key => key, index => sel_idx);
 624                  pos_countdown := group_size;
 625                  sel_idx := sel_idx + 1;
 626                  key := 0;
 627                end if;
 628              end loop;
 629              if pos_countdown < group_size then
 630                --  Finish last, incomplete group.
 631                ranking (sel_idx) := (key => key, index => sel_idx);
 632              end if;
 633  
 634              --  The construction of initial clusters can now be
 635              --  done easily using the following sorting:
 636              --
 637              Ranking_Sort (ranking);
 638  
 639              --  Example with two clusters:
 640              --   - Low values (more random data) for the cluster #2.
 641              --   - High values (more redundant data) for #1.
 642              --
 643              case entropy_coder_count is
 644                when 1 => null;  --  Not supported by canonical BZip2.
 645                when 2 => Initial_Clustering_by_Rank ((2, 1));
 646                when 3 => Initial_Clustering_by_Rank ((3, 1, 2));
 647                when 4 => Initial_Clustering_by_Rank ((4, 2, 1, 3));
 648                when 5 => Initial_Clustering_by_Rank ((5, 3, 1, 2, 4));
 649                when 6 => Initial_Clustering_by_Rank ((6, 4, 2, 1, 3, 5));
 650              end case;
 651  
 652            end Initial_Clustering_Ranking_Method;
 653  
 654            procedure Define_Descriptors is
 655              pos_countdown : Natural             := group_size;
 656              selector_idx  : Positive_32         := 1;
 657              cluster       : Entropy_Coder_Range := selector (1);
 658              symbol        : Alphabet_in_Use;
 659              freq_cluster : array (1 .. entropy_coder_count) of Count_Array :=  (others => (others => 0));
 660            begin
 661              --  Populate the frequency stats, grouped by cluster (= entropy coder choice):
 662              for mtf_idx in 1 .. mtf_last loop
 663                symbol := mtf_data (mtf_idx);
 664                freq_cluster (cluster)(symbol) := freq_cluster (cluster)(symbol) + 1;
 665                pos_countdown := pos_countdown - 1;
 666                if pos_countdown = 0 and then mtf_idx < mtf_last then
 667                  pos_countdown := group_size;
 668                  selector_idx := selector_idx + 1;
 669                  cluster := selector (selector_idx);
 670                end if;
 671              end loop;
 672              --  Create Huffman codes based on the said frequencies:
 673              for cl in 1 .. entropy_coder_count loop
 674                Define_Descriptor (freq_cluster (cl), cl);
 675              end loop;
 676            end Define_Descriptors;
 677  
 678            defector_groups : Natural;
 679  
 680            procedure Simulate_Entropy_Coding_Variants_and_Reclassify is
 681              pos_countdown : Natural             := group_size;
 682              selector_idx  : Positive_32         := 1;
 683              cluster       : Entropy_Coder_Range := selector (1);
 684              symbol        : Alphabet_in_Use;
 685              bit_count     : array (1 .. entropy_coder_count) of Natural := (others => 0);
 686  
 687              --  We simulate the encoding of selectors (for its cost).
 688              mtf_cluster_value : array (1 .. entropy_coder_count) of Positive;
 689              mtf_cluster_idx   : Positive;
 690  
 691              procedure Optimize_Group is
 692                min_bits : Natural := Natural'Last;
 693                best : Entropy_Coder_Range := cluster;
 694                cost : Natural;
 695              begin
 696                --  At this point we have computed the costs in bits for
 697                --  encoding the current group of data using various entropy coders.
 698                --  Now we look at the extra cost of switching entropy coders.
 699                for cl in 1 .. entropy_coder_count loop
 700                  cost := bit_count (cl);
 701                  --  Here we account the mtf encoding of the selectors.
 702                  for search in mtf_cluster_value'Range loop
 703                    if mtf_cluster_value (search) = cl then
 704                      mtf_cluster_idx := search;
 705                      exit;
 706                    end if;
 707                  end loop;
 708                  cost := cost + mtf_cluster_idx;
 709                  --
 710                  if cost < min_bits then
 711                    --  Encoder #cl is cheaper.
 712                    min_bits := cost;
 713                    best := cl;
 714                  end if;
 715                end loop;
 716  
 717                if best /= cluster then
 718                  --  We have found a cheaper encoding by switching to another cluster.
 719                  --  -> the group #sel_idx changes party (re-allocation).
 720                  selector (selector_idx) := best;
 721                  defector_groups := defector_groups + 1;
 722                end if;
 723  
 724                --  Now do the "definitive" (but still simulated)
 725                --  mtf for the chosen cluster index.
 726                for search in mtf_cluster_value'Range loop
 727                  if mtf_cluster_value (search) = selector (selector_idx) then
 728                    mtf_cluster_idx := search;
 729                    exit;
 730                  end if;
 731                end loop;
 732                --  Move the value to the first place.
 733                for j in reverse 2 .. mtf_cluster_idx loop
 734                  mtf_cluster_value (j) := mtf_cluster_value (j - 1);
 735                end loop;
 736                mtf_cluster_value (1) := selector (selector_idx);
 737              end Optimize_Group;
 738  
 739            begin
 740              --  Cost analysis by simulation and re-classification
 741              --  (or said otherwise, re-allocation).
 742              --
 743              for w in mtf_cluster_value'Range loop
 744                --  We start with 1, 2, 3, ...:
 745                mtf_cluster_value (w) := w;
 746              end loop;
 747  
 748              defector_groups := 0;
 749              pos_countdown := group_size;
 750              for mtf_idx in 1 .. mtf_last loop
 751                symbol := mtf_data (mtf_idx);
 752                for cl in 1 .. entropy_coder_count loop
 753                   --  For each cluster cl, simulate output assuming
 754                   --  the current group belongs to cluster cl.
 755                   bit_count (cl) := bit_count (cl) + descr (cl) (symbol).bit_length;
 756                end loop;
 757                pos_countdown := pos_countdown - 1;
 758                if pos_countdown = 0 then
 759                  Optimize_Group;
 760                  pos_countdown := group_size;
 761                  if mtf_idx < mtf_last then
 762                    bit_count := (others => 0);
 763                    selector_idx := selector_idx + 1;
 764                    cluster := selector (selector_idx);
 765                  end if;
 766                end if;
 767              end loop;
 768              if pos_countdown < group_size then
 769                --  Optimize last, incomplete group.
 770                Optimize_Group;
 771              end if;
 772            end Simulate_Entropy_Coding_Variants_and_Reclassify;
 773  
 774            low_cluster_usage : Boolean := False;
 775  
 776            procedure Cluster_Statistics is
 777              stat_cluster : array (Entropy_Coder_Range) of Natural_32 := (others => 0);
 778              cl : Entropy_Coder_Range;
 779              uniform_usage : constant Natural_32 := selector_count / Natural_32 (entropy_coder_count);
 780              threshold_denominator : constant := 2;
 781            begin
 782              low_cluster_usage := False;
 783              --  Compute cluster usage.
 784              for i in Selector_Range loop
 785                cl := selector (i);
 786                stat_cluster (cl) := stat_cluster (cl) + 1;
 787              end loop;
 788              for c in 1 .. entropy_coder_count loop
 789                Trace
 790                   ("          Cluster" & c'Image & " is used by" &
 791                    stat_cluster (c)'Image & " groups.", detailed);
 792                if stat_cluster (c) < uniform_usage / threshold_denominator then
 793                  low_cluster_usage := True;
 794                  Trace ("          ---> Low Cluster Usage!", detailed);
 795                end if;
 796              end loop;
 797            end Cluster_Statistics;
 798  
 799            procedure Construct (sample_width : Natural) is
 800              reclassification_iteration_limit : constant := 10;
 801            begin
 802              Initial_Clustering_Ranking_Method (sample_width);
 803              Trace
 804                ("   Construct with" & entropy_coder_count'Image & " coders", detailed);
 805  
 806              --  Compute the entropy coders based on the current
 807              --  clustering, then refine the (group -> cluster) attribution.
 808              --  A group can join another cluster if the number of bits
 809              --  in the output is smaller. However, it will influence the
 810              --  frequencies of both affected clusters.
 811              --
 812              for iteration in 1 .. reclassification_iteration_limit loop
 813                Cluster_Statistics;
 814                Define_Descriptors;
 815                Simulate_Entropy_Coding_Variants_and_Reclassify;
 816                Trace
 817                  ("   Iteration" & iteration'Image &
 818                   ". Defector groups:" & defector_groups'Image,
 819                   detailed);
 820                exit when defector_groups = 0;
 821              end loop;
 822              if defector_groups > 0 then
 823                --  The cluster optimization loop has exited before
 824                --  full stabilization (clusters have changed).
 825                Define_Descriptors;
 826              end if;
 827              Cluster_Statistics;
 828            end Construct;
 829  
 830            function Compute_Total_Entropy_Cost return Natural_32 is
 831              --  We simulate the sending of the whole block and
 832              --  look at the costs related to the entropy coding.
 833  
 834              function Compute_Selectors_Cost return Natural_32 is
 835                value : array (1 .. entropy_coder_count) of Positive;
 836                mtf_idx : Positive;
 837                bits : Natural_32 := 0;
 838              begin
 839                for w in value'Range loop
 840                  value (w) := w;
 841                end loop;
 842                for i in Selector_Range loop
 843                  for search in value'Range loop
 844                    if value (search) = selector (i) then
 845                      mtf_idx := search;
 846                      exit;
 847                    end if;
 848                  end loop;
 849                  for j in reverse 2 .. mtf_idx loop
 850                    value (j) := value (j - 1);
 851                  end loop;
 852                  value (1) := selector (i);
 853                  bits := bits + Integer_32 (mtf_idx);
 854                end loop;
 855                return bits;
 856              end Compute_Selectors_Cost;
 857  
 858              function Compute_Huffman_Bit_Lengths_Cost return Natural_32 is
 859                current_bit_length, new_bit_length : Natural;
 860                bits : Natural_32 := 0;
 861              begin
 862                for coder in 1 .. entropy_coder_count loop
 863                  current_bit_length := descr (coder)(0).bit_length;
 864                  bits := bits +  5;
 865                  for i in 0 .. last_symbol_in_use loop
 866                    new_bit_length := descr (coder)(i).bit_length;
 867                    Adjust_Bit_length :
 868                    loop
 869                      if current_bit_length = new_bit_length then
 870                        bits := bits + 1;
 871                        exit Adjust_Bit_length;
 872                      else
 873                        bits := bits + 2;
 874                        if current_bit_length < new_bit_length then
 875                          current_bit_length := current_bit_length + 1;
 876                        else
 877                          current_bit_length := current_bit_length - 1;
 878                        end if;
 879                      end if;
 880                    end loop Adjust_Bit_length;
 881                  end loop;
 882                end loop;
 883                return bits;
 884              end Compute_Huffman_Bit_Lengths_Cost;
 885  
 886              pos_countdown : Natural             := group_size;
 887              selector_idx  : Positive_32         := 1;
 888              cluster       : Entropy_Coder_Range := selector (1);
 889              bits          : Natural_32 := 0;
 890            begin
 891              --  Simulate the sending of the data itself:
 892              for mtf_idx in 1 .. mtf_last loop
 893                bits := bits + Natural_32 (descr (cluster)(mtf_data (mtf_idx)).bit_length);
 894                pos_countdown := pos_countdown - 1;
 895                if pos_countdown = 0 and then mtf_idx < mtf_last then
 896                  pos_countdown := group_size;
 897                  selector_idx := selector_idx + 1;
 898                  cluster := selector (selector_idx);
 899                end if;
 900              end loop;
 901              --  We add to the compressed data cost, the cost of switching coders
 902              --  over the whole block and the cost of sending the compression
 903              --  structures of the coders.
 904              bits := bits + Compute_Selectors_Cost + Compute_Huffman_Bit_Lengths_Cost;
 905              return bits;
 906            end Compute_Total_Entropy_Cost;
 907  
 908            cost              : Natural_32;
 909            best_cost         : Natural_32 := Natural_32'Last;
 910            best_ec_count     : Entropy_Coder_Range;
 911            best_max_code_len : Positive;
 912            best_sample_width : Natural;
 913  
 914            type Value_Array is array (Positive range <>) of Natural;
 915  
 916            -----------------------------------------------------------------
 917            --  Choices for brute-force search of the best entropy coding  --
 918            -----------------------------------------------------------------
 919  
 920            max_code_len_choices : constant Value_Array :=
 921            (case option is
 922               when block_100k => (1 => 16),
 923               when block_400k => (1 => 16),
 924               when block_900k => (15, 17));
 925  
 926            coder_choices : constant Value_Array :=
 927            (case option is
 928               when block_100k => (4, 6),
 929               when block_400k => (4, 6),
 930               when block_900k =>
 931                 (case mtf_last is
 932                    when     1 ..  5_000 => (2, 3, 6),
 933                    when 5_001 .. 10_000 => (3, 4, 6),
 934                    when others          => (3, 4, 5, 6)));
 935  
 936            sample_width_choices : constant Value_Array :=
 937            (case option is
 938               when block_100k => (1 => 4),
 939               when block_400k => (1 => 4),
 940               when block_900k => (3, 4));
 941  
 942            --  In a former version, we had 210 combinations of
 943            --  brute-force choices for option block_900k, making
 944            --  that option run 13x longer that with only 1 combination!
 945            --  See also timings in doc/za_work.xls, sheet BZip2.
 946  
 947          begin
 948            --  Brute-force: test some max code lengths:
 949            for max_code_len_test of max_code_len_choices loop
 950              max_code_len := max_code_len_test;
 951              --  Brute-force: test some sample widths:
 952              for sample_width_test of sample_width_choices loop
 953                --  Brute-force: test some amounts of entropy coders:
 954                for ec_test in reverse min_entropy_coders .. max_entropy_coders loop
 955                  if low_cluster_usage
 956                    --  ^ At least one cluster of previous iteration is not used much.
 957                    or else (for some value of coder_choices => value = ec_test)
 958                  then
 959                    entropy_coder_count := ec_test;
 960                    Construct (sample_width_test);
 961                    cost := Compute_Total_Entropy_Cost;
 962                    if cost < best_cost then
 963                      best_cost         := cost;
 964                      best_ec_count     := ec_test;
 965                      best_max_code_len := max_code_len;
 966                      best_sample_width := sample_width_test;
 967                    end if;
 968                  end if;
 969                end loop;
 970              end loop;
 971            end loop;
 972  
 973            max_code_len        := best_max_code_len;
 974            entropy_coder_count := best_ec_count;
 975            Trace
 976              ("Max len:" & max_code_len'Image &
 977               ", coders:" & entropy_coder_count'Image &
 978               ", sample width:" & best_sample_width'Image,
 979               detailed);
 980            Construct (best_sample_width);
 981          end Multiple_Entropy_Coders;
 982  
 983          trace_frequency_matrix : constant Boolean := False;
 984          use_single_coder : constant Boolean := False;
 985  
 986        begin
 987          selector_count := 1 + (mtf_last - 1) / group_size;
 988          if trace_frequency_matrix then
 989            Output_Frequency_Matrix;
 990          end if;
 991  
 992          if use_single_coder then
 993            Single_Entropy_Coder;
 994          else
 995            Multiple_Entropy_Coders;
 996          end if;
 997        end Entropy_Calculations;
 998  
 999        ---------------------------------
1000        --  Output of compressed data  --
1001        ---------------------------------
1002  
1003        procedure Put_Block_Header is
1004        begin
1005          Put_Bits (block_header_magic);
1006          block_crc := CRC.Final (block_crc);
1007          Put_Bits (block_crc, 32);
1008          Trace ("Block CRC:   " & block_crc'Image, detailed);
1009          combined_crc := Rotate_Left (combined_crc, 1) xor block_crc;
1010          Trace ("Combined CRC:" & combined_crc'Image, detailed);
1011          Put_Bits (0, 1);  --  Randomized flag, always False.
1012          Put_Bits (Unsigned_32 (bwt_index), 24);
1013        end Put_Block_Header;
1014  
1015        procedure Put_Block_Trees_Descriptors is
1016  
1017          procedure Put_Mapping_Table is
1018            in_use_16 : array (Byte range 0 .. 15) of Boolean := (others => False);
1019          begin
1020            for i in in_use_16'Range loop
1021              for j in in_use_16'Range loop
1022                if in_use (i * 16 + j) then
1023                  in_use_16 (i) := True;
1024                end if;
1025              end loop;
1026            end loop;
1027  
1028            --  Send the first 16 bits which tell which pieces are stored.
1029            for i in in_use_16'Range loop
1030              Put_Bits (in_use_16 (i));
1031            end loop;
1032            --  Send detail of the used pieces.
1033            for i in in_use_16'Range loop
1034              if in_use_16 (i) then
1035                for j in in_use_16'Range loop
1036                  Put_Bits (in_use (i * 16 + j));
1037                end loop;
1038              end if;
1039            end loop;
1040          end Put_Mapping_Table;
1041  
1042          procedure Put_Selectors is
1043            value : array (1 .. entropy_coder_count) of Positive;
1044            mtf_idx : Positive;
1045          begin
1046            Put_Bits (Unsigned_32 (selector_count), 15);
1047            for w in value'Range loop
1048              --  We start with 1, 2, 3, ...:
1049              value (w) := w;
1050            end loop;
1051            for i in 1 .. selector_count loop
1052              for search in value'Range loop
1053                if value (search) = selector (i) then
1054                  mtf_idx := search;
1055                  exit;
1056                end if;
1057              end loop;
1058              --  Move the value to the first place.
1059              for j in reverse 2 .. mtf_idx loop
1060                value (j) := value (j - 1);
1061              end loop;
1062              value (1) := selector (i);
1063              --  MTF-transformed index for the selected entropy coder.
1064              for bar in 1 .. mtf_idx  - 1 loop
1065                --  Output as many '1' bit as the value of mtf_idx - 1:
1066                Put_Bits (1, 1);
1067              end loop;
1068              Put_Bits (0, 1);
1069            end loop;
1070          end Put_Selectors;
1071  
1072          procedure Put_Huffman_Bit_Lengths is
1073            current_bit_length, new_bit_length : Natural;
1074          begin
1075            for coder in 1 .. entropy_coder_count loop
1076              current_bit_length := descr (coder)(0).bit_length;
1077              Put_Bits (Unsigned_32 (current_bit_length), 5);
1078              for i in 0 .. last_symbol_in_use loop
1079                new_bit_length := descr (coder)(i).bit_length;
1080                Adjust_Bit_length :
1081                loop
1082                  if current_bit_length = new_bit_length then
1083                    Put_Bits (0, 1);
1084                    exit Adjust_Bit_length;
1085                  else
1086                    Put_Bits (1, 1);
1087                    if current_bit_length < new_bit_length then
1088                      current_bit_length := current_bit_length + 1;
1089                      Put_Bits (0, 1);
1090                    else
1091                      current_bit_length := current_bit_length - 1;
1092                      Put_Bits (1, 1);
1093                    end if;
1094                  end if;
1095                end loop Adjust_Bit_length;
1096              end loop;
1097            end loop;
1098          end Put_Huffman_Bit_Lengths;
1099  
1100        begin
1101          Put_Mapping_Table;
1102          Put_Bits (Unsigned_32 (entropy_coder_count), 3);
1103          Put_Selectors;
1104          Put_Huffman_Bit_Lengths;
1105        end Put_Block_Trees_Descriptors;
1106  
1107        procedure Entropy_Output is
1108          pos_countdown : Natural             := group_size;
1109          selector_idx  : Positive_32         := 1;
1110          cluster       : Entropy_Coder_Range := selector (1);
1111          symbol        : Max_Alphabet;
1112        begin
1113          for mtf_idx in 1 .. mtf_last loop
1114            symbol := mtf_data (mtf_idx);
1115  
1116            Put_Bits
1117              (Unsigned_32
1118                (descr (cluster) (symbol).code),
1119                 descr (cluster) (symbol).bit_length);
1120  
1121            pos_countdown := pos_countdown - 1;
1122            if pos_countdown = 0 and then mtf_idx < mtf_last then
1123              pos_countdown := group_size;
1124              selector_idx := selector_idx + 1;
1125              cluster := selector (selector_idx);
1126            end if;
1127          end loop;
1128  
1129          Unchecked_Free (mtf_data);
1130        end Entropy_Output;
1131  
1132      begin
1133        block_counter := block_counter + 1;
1134        Trace ("Block" & block_counter'Image, headlines);
1135  
1136        --  Data transformation (no output):
1137        RLE_1;
1138        BWT;
1139        MTF_and_RLE_2;
1140        Entropy_Calculations;
1141  
1142        --  Now we output the block's compressed data:
1143        Put_Block_Header;
1144        Put_Block_Trees_Descriptors;
1145        Entropy_Output;
1146  
1147      exception
1148        when others =>
1149          Unchecked_Free (rle_1_data);
1150          Unchecked_Free (bwt_data);
1151          Unchecked_Free (mtf_data);
1152          raise;
1153      end Encode_Block;
1154  
1155      stream_rest : Stream_Size_Type := size_hint;
1156  
1157      --------------------------------------------
1158      --  Data acquisition and block splitting  --
1159      --------------------------------------------
1160  
1161      procedure Read_and_Split_Block (dyn_block_capacity : Natural_32) is
1162  
1163        --  In the cases RLE_1 compression is efficient, the
1164        --  input buffer can contain much more that the post RLE_1 block.
1165        --  Best case: all runs of 259 bytes, factor 259/5 = 51.8.
1166        --  The latter has to fit into the agreed capacity (a multiple of 100_000).
1167        --  So, we define a conveniently large input buffer.
1168  
1169        multiplier : constant := 10;
1170  
1171        raw_buf : Buffer_Access := new Buffer (1 .. multiplier * dyn_block_capacity);
1172  
1173        package Segmentation_for_BZip2 is
1174          new Data_Segmentation
1175            (Index                 => Natural_32,
1176             Alphabet              => Byte,
1177             Buffer_Type           => Buffer,
1178             discrepancy_threshold => 2.0,
1179             index_threshold       => 80_000,
1180             window_size           => 80_000);
1181  
1182        single_segment : constant Boolean := False;
1183        seg : Segmentation_for_BZip2.Segmentation;
1184  
1185        raw_buf_index : Natural_32 := 0;
1186        index_start   : Natural_32 := 1;
1187  
1188        --  We have to simulate RLE_1 to avoid block size overflows
1189        --  in the decoder.
1190        --  RLE_1 often expands the data (and sometimes does it
1191        --  considerably) when it meets runs of length 4: 5 bytes are
1192        --  stored in that case.
1193        --  So the worst case expansion is by a factor 5/4.
1194  
1195        rle_1_block_size : Natural_32 := 0;
1196        b : Byte;
1197        b_prev : Byte := 0;  --  Initialization is to reassure the compiler.
1198        run : Natural := 0;
1199  
1200        procedure Simulate_Store_Run with Inline is
1201        begin
1202          rle_1_block_size := rle_1_block_size + Integer_32 (Integer'Min (4, run));
1203          if run >= 4 then
1204            pragma Assert (run <= 259);
1205            rle_1_block_size := rle_1_block_size + 1;
1206          end if;
1207          run := 1;
1208        end Simulate_Store_Run;
1209  
1210        start : Boolean := True;
1211  
1212      begin
1213        --  Data acquisition:
1214        while More_Bytes
1215          and then rle_1_block_size + 5 < dyn_block_capacity
1216          --  ^ The +5 is because sometimes a pack of max 5 bytes is sent by Store_Run.
1217          and then raw_buf_index < raw_buf'Last
1218        loop
1219          b := Read_Byte;
1220          raw_buf_index := raw_buf_index + 1;
1221          raw_buf (raw_buf_index) := b;
1222          if stream_rest /= unknown_size then
1223            stream_rest := stream_rest - 1;
1224          end if;
1225          if start or else b /= b_prev then
1226            --  Startup or Run break:
1227            Simulate_Store_Run;
1228            start := False;
1229          elsif run = 259 then
1230            --  Force a run break, even though b = b_prev:
1231            Simulate_Store_Run;
1232          else
1233            run := run + 1;
1234          end if;
1235          b_prev := b;
1236        end loop;
1237        Simulate_Store_Run;
1238  
1239        if single_segment then
1240          --  No segmentation /splitting:
1241          Encode_Block (raw_buf (1 .. raw_buf_index));
1242        else
1243          Segmentation_for_BZip2.Segment_by_Entropy (raw_buf (1 .. raw_buf_index), seg);
1244  
1245          if seg.Is_Empty then
1246            Encode_Block (raw_buf (1 .. 0));
1247          else
1248            for s of seg loop
1249              Encode_Block (raw_buf (index_start .. s));
1250              index_start := s + 1;
1251            end loop;
1252          end if;
1253  
1254          if Integer (seg.Length) > 1 then
1255            Trace ("Segmentation into" & seg.Length'Image & " segments", headlines);
1256            for s of seg loop
1257              Trace ("  Segment limit at" & s'Image, headlines);
1258            end loop;
1259          end if;
1260  
1261        end if;
1262  
1263        Unchecked_Free (raw_buf);
1264      exception
1265        when others =>
1266          Unchecked_Free (raw_buf);
1267          raise;
1268      end Read_and_Split_Block;
1269  
1270      procedure Write_Stream_Header is
1271        magic : String := stream_header_magic;
1272      begin
1273        magic (magic'Last) := Character'Val (Character'Pos ('0') + level);
1274        Put_Bits (magic);
1275      end Write_Stream_Header;
1276  
1277      procedure Write_Stream_Footer is
1278      begin
1279        Put_Bits (stream_footer_magic);
1280        Put_Bits (combined_crc, 32);
1281        if bit_pos < 7 then
1282          Flush_Bit_Buffer;
1283        end if;
1284      end Write_Stream_Footer;
1285  
1286      --  Vertically challenged blocks.
1287      small_block_prop_min : constant := 0.05;  --  Below that, not worth the trouble.
1288      small_block_prop_max : constant := 0.30;  --  Above that, not worth the trouble either.
1289  
1290    begin
1291      Write_Stream_Header;
1292      loop
1293        if Float (stream_rest) in
1294          Float (block_capacity) * (1.0 + small_block_prop_min) ..
1295          Float (block_capacity) * (1.0 + small_block_prop_max)
1296        then
1297          --  Avoid encoding the last block as a "too" small one (poorer compression)
1298          --  if we can balance the last two blocks.
1299          --  NB: a more sophisticated balancing using (1.0 - small_block_prop_max)
1300          --  did not deliver convincing results.
1301          Read_and_Split_Block (Natural_32 (stream_rest) / 2);
1302        else
1303          Read_and_Split_Block (block_capacity);
1304        end if;
1305        exit when not More_Bytes;
1306      end loop;
1307      Write_Stream_Footer;
1308    end Encode;
1309  
1310  end BZip2.Encoding;

Web view of Ada source code generated by GNATHTML, project: ALI_Parse version 1.0.
Zip-Ada: Ada library for zip archive files (.zip). Ada programming.
Some news about Zip-Ada and other Ada projects on Gautier's blog.