Source file : zip-compress-shrink.adb
1 -- Legal licensing note:
2
3 -- Copyright (c) 2006 .. 2022 Gautier de Montmollin (see spec. for credits)
4 -- SWITZERLAND
5
6 -- Permission is hereby granted, free of charge, to any person obtaining a copy
7 -- of this software and associated documentation files (the "Software"), to deal
8 -- in the Software without restriction, including without limitation the rights
9 -- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 -- copies of the Software, and to permit persons to whom the Software is
11 -- furnished to do so, subject to the following conditions:
12
13 -- The above copyright notice and this permission notice shall be included in
14 -- all copies or substantial portions of the Software.
15
16 -- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 -- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 -- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 -- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 -- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 -- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 -- THE SOFTWARE.
23
24 -- NB: this is the MIT License, as found on the site
25 -- http://www.opensource.org/licenses/mit-license.php
26
27 with Ada.Unchecked_Deallocation;
28
29 procedure Zip.Compress.Shrink
30 (input,
31 output : in out Zip_Streams.Root_Zipstream_Type'Class;
32 input_size_known : Boolean;
33 input_size : Zip_64_Data_Size_Type; -- ignored if unknown
34 feedback : Feedback_Proc;
35 CRC : in out Interfaces.Unsigned_32; -- only updated here
36 crypto : in out CRC_Crypto.Crypto_pack;
37 output_size : out Zip_64_Data_Size_Type;
38 compression_ok : out Boolean) -- indicates compressed < uncompressed
39 is
40 use Interfaces;
41
42 ------------------
43 -- Buffered I/O --
44 ------------------
45
46 IO_buffers : IO_Buffers_Type;
47
48 procedure Put_byte (B : Unsigned_8) is
49 begin
50 IO_buffers.OutBuf (IO_buffers.OutBufIdx) := B;
51 IO_buffers.OutBufIdx := IO_buffers.OutBufIdx + 1;
52 if IO_buffers.OutBufIdx > IO_buffers.OutBuf.all'Last then
53 Write_Block (IO_buffers, input_size_known, input_size, output, output_size, crypto);
54 end if;
55 end Put_byte;
56
57 procedure Flush_output is
58 begin
59 if IO_buffers.OutBufIdx > 1 then
60 Write_Block (IO_buffers, input_size_known, input_size, output, output_size, crypto);
61 end if;
62 end Flush_output;
63
64 --------------------------------------------------------------------------
65
66 ------------------------------------------------------
67 -- Bit code buffer, for sending data at bit level --
68 ------------------------------------------------------
69
70 -- Output buffer. Bits are inserted starting at the right (least
71 -- significant bits). The width of bit_buffer must be at least 16 bits.
72 subtype U32 is Unsigned_32;
73 bit_buffer : U32 := 0;
74 -- Number of valid bits in bit_buffer. All bits above the last valid bit are always zero.
75 valid_bits : Integer := 0;
76
77 procedure Flush_bit_buffer is
78 begin
79 while valid_bits > 0 loop
80 Put_byte (Byte (bit_buffer and 16#FF#));
81 bit_buffer := Shift_Right (bit_buffer, 8);
82 valid_bits := Integer'Max (0, valid_bits - 8);
83 end loop;
84 bit_buffer := 0;
85 end Flush_bit_buffer;
86
87 Min_bits : constant := 9; -- Starting code size of 9 bits
88 Max_bits : constant := 13; -- Maximum code size of 13 bits
89
90 subtype Code_size_type is Integer range 1 .. Max_bits;
91 code_size : Code_size_type; -- Size of codes (in bits) currently being written
92
93 -- Send a value on a given number of bits.
94 procedure Put_code (code : Natural) is
95 pragma Inline (Put_code);
96 begin
97 -- Put bits from code at the left of existing ones. They might be shifted away
98 -- partially on the left side (or even entirely if valid_bits is already = 32).
99 bit_buffer := bit_buffer or Shift_Left (U32 (code), valid_bits);
100 valid_bits := valid_bits + code_size;
101 if valid_bits > 32 then
102 -- Flush 32 bits to output as 4 bytes
103 Put_byte (Byte (bit_buffer and 16#FF#));
104 Put_byte (Byte (Shift_Right (bit_buffer, 8) and 16#FF#));
105 Put_byte (Byte (Shift_Right (bit_buffer, 16) and 16#FF#));
106 Put_byte (Byte (Shift_Right (bit_buffer, 24) and 16#FF#));
107 valid_bits := valid_bits - 32;
108 -- Empty buffer and put on it the rest of the code
109 bit_buffer := Shift_Right (U32 (code), code_size - valid_bits);
110 end if;
111 end Put_code;
112
113 Table_full : Boolean; -- Flag indicating a full symbol table
114
115 -- Define data types needed to implement a code table for LZW compression
116 type CodeRec is record -- Code Table record format...
117 Child : Integer; -- Index of 1st suffix for this prefix
118 Sibling : Integer; -- Index of next suffix in chain
119 Suffix : Natural; -- Suffix
120 end record;
121
122 TABLESIZE : constant := 8191; -- We'll need 4K entries in table
123
124 -- PKZip's Shrink is a variant of the LZW algorithm in that the
125 -- compressor controls the code increase and the table clearing.
126 -- See appnote.txt, section 5.1.
127 Special_Code : constant := 256;
128 Code_for_increasing_code_size : constant := 1;
129 Code_for_clearing_table : constant := 2;
130
131 FIRSTENTRY : constant := 257; -- First available table entry
132 UNUSED : constant := -1; -- Prefix indicating an unused code table entry
133
134 type Code_array is array (0 .. TABLESIZE) of CodeRec;
135 -- Define the code table
136
137 type Table_access is access Code_array;
138 procedure Dispose is new Ada.Unchecked_Deallocation (Code_array, Table_access);
139
140 Code_table : Table_access := null; -- Points to code table for LZW compression
141
142 -- Define data types needed to implement a free node list
143 type Free_list_array is array (FIRSTENTRY .. TABLESIZE) of Natural;
144 type Free_list_access is access Free_list_array;
145
146 procedure Dispose is
147 new Ada.Unchecked_Deallocation (Free_list_array, Free_list_access);
148
149 Free_list : Free_list_access := null; -- Table of free code table entries
150 Next_free : Integer; -- Index into free list table
151
152 ----------------------------------------------------------------------------
153 -- The following routines are used to allocate, initialize, and de-allocate
154 -- various dynamic memory structures used by the LZW compression algorithm
155 ----------------------------------------------------------------------------
156
157 procedure Build_Data_Structures is
158 begin
159 Code_table := new Code_array;
160 Free_list := new Free_list_array;
161 end Build_Data_Structures;
162
163 ---------------------------------------------------------------------------
164 procedure Destroy_Data_Structures is
165 begin
166 Dispose (Code_table);
167 Dispose (Free_list);
168 end Destroy_Data_Structures;
169
170 ---------------------------------------------------------------------------
171
172 procedure Initialize_Data_Structures is
173 begin
174 for I in 0 .. TABLESIZE loop
175 Code_table (I).Child := UNUSED;
176 Code_table (I).Sibling := UNUSED;
177 if I <= 255 then
178 Code_table (I).Suffix := I;
179 end if;
180 if I >= 257 then
181 Free_list (I) := I;
182 end if;
183 end loop;
184 Next_free := FIRSTENTRY;
185 Table_full := False;
186 end Initialize_Data_Structures;
187
188 ---------------------------------------------------------------------------
189 -- The following routines handle manipulation of the LZW Code Table
190 ---------------------------------------------------------------------------
191
192 ClearList : array (0 .. 1023) of Unsigned_8;
193 -- Bit mapped structure used in during adaptive resets
194
195 procedure Prune (Parent : Integer) is
196 -- Prune leaves from a subtree - Note: this is a recursive procedure
197 CurrChild : Integer;
198 NextSibling : Integer;
199 begin
200 CurrChild := Code_table (Parent).Child;
201 -- Find first Child that has descendants .. clear any that don't
202
203 while CurrChild /= UNUSED and then
204 Code_table (CurrChild).Child = UNUSED
205 loop
206 Code_table (Parent).Child := Code_table (CurrChild).Sibling;
207 Code_table (CurrChild).Sibling := UNUSED;
208 -- Turn on ClearList bit to indicate a cleared entry
209 ClearList (CurrChild / 8) :=
210 ClearList (CurrChild / 8) or
211 (Shift_Left (1, CurrChild mod 8));
212 CurrChild := Code_table (Parent).Child;
213 end loop;
214
215 if CurrChild /= UNUSED then -- If there are any children left ...
216 Prune (CurrChild);
217 NextSibling := Code_table (CurrChild).Sibling;
218 while NextSibling /= UNUSED loop
219 if Code_table (NextSibling).Child = UNUSED then
220 Code_table (CurrChild).Sibling :=
221 Code_table (NextSibling).Sibling;
222 Code_table (NextSibling).Sibling := UNUSED;
223 -- Turn on ClearList bit to indicate a cleared entry
224
225 ClearList (NextSibling / 8) :=
226 ClearList (NextSibling / 8) or
227 (Shift_Left (1, NextSibling mod 8));
228 NextSibling := Code_table (CurrChild).Sibling;
229 else
230 CurrChild := NextSibling;
231 Prune (CurrChild);
232 NextSibling := Code_table (CurrChild).Sibling;
233 end if;
234 end loop;
235 end if;
236 end Prune;
237
238 ---------------------------------------------------------------------------
239
240 procedure Clear_Table is
241 begin
242 ClearList := (others => 0);
243 -- Remove all leaf nodes by recursively pruning subtrees
244 for Node in 0 .. 255 loop
245 Prune (Node);
246 end loop;
247 -- Next, re-initialize our list of free table entries
248 Next_free := TABLESIZE + 1;
249 for Node in reverse FIRSTENTRY .. TABLESIZE loop
250 if (ClearList (Node / 8) and (Shift_Left (1, Node mod 8))) /= 0 then
251 Next_free := Next_free - 1;
252 Free_list (Next_free) := Node;
253 end if;
254 end loop;
255 --
256 Table_full := Next_free > TABLESIZE;
257 end Clear_Table;
258
259 ---------------------------------------------------------------------------
260
261 procedure Table_Add (Prefix_0 : Natural; Suffix : Natural) is
262 FreeNode : Natural;
263 Prefix : Natural := Prefix_0;
264 begin
265 if Next_free <= TABLESIZE then
266 FreeNode := Free_list (Next_free);
267 Next_free := Next_free + 1;
268 Code_table (FreeNode).Child := UNUSED;
269 Code_table (FreeNode).Sibling := UNUSED;
270 Code_table (FreeNode).Suffix := Suffix;
271 if Code_table (Prefix).Child = UNUSED then
272 Code_table (Prefix).Child := FreeNode;
273 else
274 Prefix := Code_table (Prefix).Child;
275 while Code_table (Prefix).Sibling /= UNUSED loop
276 Prefix := Code_table (Prefix).Sibling;
277 end loop;
278 Code_table (Prefix).Sibling := FreeNode;
279 end if;
280 end if;
281 --
282 Table_full := Next_free > TABLESIZE;
283 end Table_Add;
284
285 ---------------------------------------------------------------------------
286
287 ---------------------------------------------------------------------------
288 -- Search for a Prefix:Suffix pair in our Symbol table. If found, return
289 -- the index value where found. If not found, return False and set
290 -- Found_at to UNUSED.
291 ---------------------------------------------------------------------------
292 procedure Table_Lookup (
293 TargetPrefix : Integer;
294 TargetSuffix : Natural;
295 Found_at : out Integer;
296 Found : out Boolean
297 )
298 is
299 -- Was in 16-bit ASM
300 idx : Natural := TargetPrefix;
301 begin
302 -- Lookup an entry in the Hash Table. If found, return TRUE and set
303 -- parameter Found_at with the index of the entry at which the match
304 -- was found. If not found, return False and plug an UNUSED into Found_at.
305 if Code_table (idx).Child = UNUSED then
306 Found_at := UNUSED;
307 Found := False;
308 else
309 idx := Code_table (idx).Child;
310 loop
311 if Code_table (idx).Suffix = TargetSuffix then
312 Found_at := idx;
313 Found := True;
314 return;
315 elsif Code_table (idx).Sibling = UNUSED then
316 Found_at := UNUSED;
317 Found := False;
318 return;
319 else
320 idx := Code_table (idx).Sibling;
321 end if;
322 end loop;
323 end if;
324 end Table_Lookup;
325
326 ---------------------------------------------------------------------------
327 -- The actual Crunching algorithm
328 ---------------------------------------------------------------------------
329
330 Last_code : Integer := 0;
331 First_atom : Boolean; -- Flag indicating the START of a shrink operation
332 Max_code : Natural; -- Largest code that can be written in Code_size bits
333
334 procedure Shrink_Atom (Suffix : Integer) is
335 WhereFound : Integer;
336 lookup_ok : Boolean;
337 begin
338 if First_atom then -- If just getting started ...
339 bit_buffer := 0;
340 valid_bits := 0;
341 code_size := Min_bits; -- Initialize code size to minimum
342 Max_code := 2 ** code_size - 1;
343 Last_code := Suffix; -- get first character from input,
344 First_atom := False; -- and reset the first char flag.
345 elsif Suffix = UNUSED then -- Nothing to crunch... must be EOF on input
346 Put_code (Last_code); -- Write last prefix code
347 Flush_bit_buffer;
348 Flush_output; -- Flush our output buffer
349 elsif Table_full then
350 Put_code (Last_code);
351 -- NB: PKZip does not necessarily clear the table when
352 -- it is full. Hence the need for the special code below.
353 Put_code (Special_Code);
354 Put_code (Code_for_clearing_table);
355 Clear_Table;
356 Table_Add (Last_code, Suffix);
357 Last_code := Suffix;
358 else
359 Table_Lookup (Last_code, Suffix, WhereFound, lookup_ok);
360 if lookup_ok then
361 -- If Last_code:Suffix pair is found in the code table, then ...
362 -- ... set Last_code to the entry where the pair is located
363 Last_code := WhereFound;
364 else
365 -- Not in table
366 Put_code (Last_code); -- Write current Last_code code
367 Table_Add (Last_code, Suffix); -- Attempt to add to code table
368 Last_code := Suffix; -- Reset Last_code code for new char
369 if (
370 code_size < Max_bits and
371 not Table_full
372 -- 12-Dec-2007: the Pascal code had an out-of-range access
373 -- with Free_list(Next_free) below when the table was full!
374 -- NB: according to tests, and surely it can be proven,
375 -- the case (Code_size < Max_bits and Table_Full) never happens,
376 -- so that
377 -- "Code_size < Max_bits and then Free_list(Next_free) > Max_code"
378 -- could be sufficient. But until it is proven, I prefer to
379 -- keep the "and not Table_Full"
380 )
381 and then
382 Free_list (Next_free) > Max_code
383 then
384 -- Time to increase the code size and change the max. code
385 Put_code (Special_Code);
386 Put_code (Code_for_increasing_code_size);
387 code_size := code_size + 1;
388 Max_code := 2 ** code_size - 1;
389 end if;
390 end if;
391 end if;
392 end Shrink_Atom;
393
394 feedback_milestone,
395 Bytes_in : Zip_Streams.ZS_Size_Type := 0; -- Count of input file bytes processed
396
397 procedure Process_Input (Source : Byte_Buffer) is
398 PctDone : Natural;
399 user_aborting : Boolean;
400 Last_processed : Integer := Source'First - 1;
401 use Zip_Streams;
402 begin
403 if Source'Length < 1 then
404 Shrink_Atom (UNUSED);
405 else
406 for I in Source'Range loop
407 Bytes_in := Bytes_in + 1;
408 if feedback /= null then
409 if Bytes_in = 1 then
410 feedback (0, False, user_aborting);
411 end if;
412 if feedback_milestone > 0 and then -- Bugfix GdM 23-Dec-2002
413 ((Bytes_in - 1) mod feedback_milestone = 0
414 or Bytes_in = ZS_Size_Type (input_size))
415 then
416 if input_size_known then
417 PctDone := Integer ((100.0 * Float (Bytes_in)) / Float (input_size));
418 feedback (PctDone, False, user_aborting);
419 else
420 feedback (0, False, user_aborting);
421 end if;
422 if user_aborting then
423 raise User_abort;
424 end if;
425 end if;
426 end if;
427 Shrink_Atom (Integer (Source (I)));
428 Last_processed := I;
429 if input_size_known and then Bytes_in >= ZS_Size_Type (input_size) then
430 -- The job is done, even though there are more in the buffer
431 IO_buffers.InputEoF := True;
432 exit;
433 end if;
434 end loop;
435 Zip.CRC_Crypto.Update (CRC, Source (Source'First .. Last_processed));
436 end if;
437 end Process_Input;
438
439 procedure Deallocation is
440 begin
441 Destroy_Data_Structures;
442 Deallocate_Buffers (IO_buffers);
443 end Deallocation;
444
445 Remaining : Natural;
446
447 begin
448 Allocate_Buffers (IO_buffers, input_size_known, input_size);
449 Build_Data_Structures;
450 Initialize_Data_Structures;
451 output_size := 0;
452 --
453 begin
454 Read_Block (IO_buffers, input); -- Prime the input buffer
455 First_atom := True; -- 1st character flag for Crunch procedure
456 if input_size_known then
457 feedback_milestone := Zip_Streams.ZS_Size_Type (input_size / feedback_steps);
458 end if;
459 while not IO_buffers.InputEoF loop
460 Remaining := IO_buffers.MaxInBufIdx - IO_buffers.InBufIdx + 1;
461 if Remaining = 0 then
462 Read_Block (IO_buffers, input);
463 else
464 Process_Input (IO_buffers.InBuf (IO_buffers.InBufIdx .. IO_buffers.InBufIdx + Remaining - 1));
465 IO_buffers.InBufIdx := IO_buffers.InBufIdx + Remaining;
466 end if;
467 end loop;
468 Process_Input (IO_buffers.InBuf (1 .. 0)); -- This forces EOF processing
469 compression_ok := Bytes_in > 0;
470 exception
471 when Compression_inefficient =>
472 compression_ok := False;
473 end;
474 --
475 Deallocation;
476 exception
477 when others =>
478 Deallocation;
479 raise;
480 end Zip.Compress.Shrink;
Web view of Ada source code generated by GNATHTML, project: ALI_Parse version 1.0.
Zip-Ada: Ada library for zip archive files (.zip).
Ada programming.
Some news about Zip-Ada and other Ada projects
on Gautier's blog.