Source file : zip.ads
1 -- ________ ___ ______ ______ ___
2 -- /___..._/ |.| |.___.\ /. __ .\ __|.| ____
3 -- /../ |.| |.____/ |.|__|.| /....| __\..\
4 -- _/../___ |.| |.| === |..__..| |. = .| | = ..|
5 -- /_______/ |_| /__| /__| |_| \__\_| \__\_|
6
7 -- Zip library
8 ---------------
9 --
10 -- Library for manipulating archive files in the Zip format
11 --
12 -- Pure Ada 2012+ code, 100% portable: OS-, CPU- and compiler- independent.
13 --
14 -- Version / date / download info: see the version, reference, web strings
15 -- defined at the end of the public part of this package.
16
17 -- Legal licensing note:
18
19 -- Copyright (c) 1999 .. 2025 Gautier de Montmollin
20 -- SWITZERLAND
21
22 -- Permission is hereby granted, free of charge, to any person obtaining a copy
23 -- of this software and associated documentation files (the "Software"), to deal
24 -- in the Software without restriction, including without limitation the rights
25 -- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
26 -- copies of the Software, and to permit persons to whom the Software is
27 -- furnished to do so, subject to the following conditions:
28
29 -- The above copyright notice and this permission notice shall be included in
30 -- all copies or substantial portions of the Software.
31
32 -- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
33 -- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
34 -- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
35 -- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
36 -- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
37 -- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
38 -- THE SOFTWARE.
39
40 -- NB: this is the MIT License, as found 12-Sep-2007 on the site
41 -- http://www.opensource.org/licenses/mit-license.php
42
43 with Zip_Streams;
44 with Ada.Calendar, Ada.Finalization, Ada.Streams.Stream_IO, Ada.Text_IO;
45 with Interfaces;
46 with System;
47
48 package Zip is
49
50 -----------------------------------------------------------------
51 -- Zip_Info --
52 -----------------------------------------------------------------
53 -- Zip_Info contains the Zip file name (if it is a file) --
54 -- or its input stream access, and the archive's directory. --
55 -----------------------------------------------------------------
56
57 type Zip_Info is
58 new Ada.Finalization.Controlled with private;
59
60 -----------------------------------------------
61 -- Load the whole .zip directory contained --
62 -- in archive (from) for quick searching. --
63 -----------------------------------------------
64
65 type Duplicate_name_policy is
66 (admit_duplicates, -- two entries in the Zip archive may have the same full name
67 error_on_duplicate); -- raise exception on attempt to add twice the same entry name
68
69 -- Load from a file
70
71 procedure Load
72 (info : out Zip_Info;
73 from : in String; -- Zip file name
74 case_sensitive : in Boolean := False;
75 duplicate_names : in Duplicate_name_policy := error_on_duplicate);
76
77 -- Load from a stream
78
79 procedure Load
80 (info : out Zip_Info;
81 from : in out Zip_Streams.Root_Zipstream_Type'Class;
82 case_sensitive : in Boolean := False;
83 duplicate_names : in Duplicate_name_policy := error_on_duplicate);
84
85 Archive_corrupted,
86 Archive_open_error,
87 Duplicate_name : exception;
88
89 Zip_file_open_error : exception renames Archive_open_error; -- Archive is not always a file!
90 pragma Obsolescent (Zip_file_open_error, "Better use the name: Archive_open_error");
91
92 -- Zip_File_Error: exception renames Archive_corrupted; -- Now really obsolete.
93 -- pragma Obsolescent(Zip_File_Error); -- Now really obsolete.
94
95 function Is_loaded (info : in Zip_Info) return Boolean;
96
97 function Zip_Name (info : in Zip_Info) return String;
98
99 function Zip_Comment (info : in Zip_Info) return String;
100
101 function Zip_Stream (info : in Zip_Info) return Zip_Streams.Zipstream_Class_Access;
102
103 function Entries (info : in Zip_Info) return Natural;
104
105 procedure Delete (info : in out Zip_Info);
106 pragma Obsolescent (Delete, "Delete happens automatically since v.56.");
107
108 Forgot_to_load_zip_info : exception;
109
110 -- Data sizes in archive
111 subtype Zip_32_Data_Size_Type is Interfaces.Unsigned_32;
112 subtype Zip_64_Data_Size_Type is Interfaces.Unsigned_64;
113
114 ---------
115
116 -- Compression "methods" - actually, *formats* - in the "official" PKWARE Zip format.
117 -- Details in appnote.txt, part V.J
118 --
119 -- C : supported by Zip-Ada for compressing
120 -- D : supported by Zip-Ada for decompressing
121
122 type PKZip_method is
123 (store, -- C, D
124 shrink, -- C, D
125 reduce_1, -- C, D
126 reduce_2, -- C, D
127 reduce_3, -- C, D
128 reduce_4, -- C, D
129 implode, -- D
130 tokenize,
131 deflate, -- C, D
132 deflate_e, -- D - "Enhanced deflate" or "Deflate64"
133 bzip2_meth, -- C, D
134 lzma_meth, -- C, D
135 zstandard,
136 mp3_recomp,
137 xz_recomp,
138 jpeg_recomp,
139 wavpack,
140 ppmd,
141 unknown);
142
143 subtype Reduce_Format is PKZip_method range reduce_1 .. reduce_4;
144
145 -- Return a String image, nicer than the 'Image attribute.
146 function Image (m : PKZip_method) return String;
147
148 -- Technical: translates the method code as set in zip archives
149 function Method_from_Code (x : Interfaces.Unsigned_16) return PKZip_method;
150 function Method_from_Code (x : Natural) return PKZip_method;
151
152 -- Internal time definition
153 subtype Time is Zip_Streams.Time;
154 function Convert (date : in Ada.Calendar.Time) return Time
155 renames Zip_Streams.Calendar.Convert;
156 function Convert (date : in Time) return Ada.Calendar.Time
157 renames Zip_Streams.Calendar.Convert;
158
159 -- Entry names within Zip archives are encoded either with
160 -- * the IBM PC (the one with a monochrome screen, only text mode)'s
161 -- character set: IBM 437
162 -- or
163 -- * Unicode UTF-8
164 --
165 -- Documentation: PKWARE's Appnote.txt, APPENDIX D - Language Encoding (EFS)
166
167 type Zip_Name_Encoding is (IBM_437, UTF_8);
168
169 -- Traverse a whole Zip_Info directory in sorted order, giving the
170 -- name for each entry to an user-defined "Action" procedure.
171 -- Concretely, you can process a whole Zip file that way, by extracting data
172 -- with Extract, or open a reader stream with UnZip.Streams.
173 -- See the Comp_Zip or Find_Zip tools as application examples.
174 generic
175 with procedure Action (name : String); -- 'name' is compressed entry's name
176 procedure Traverse (z : Zip_Info);
177
178 -- Same as Traverse, but Action gives also full name information.
179 -- The pair (name, name_encoding) allows for an unambiguous Unicode
180 -- name decoding. See the AZip project for an implementation.
181 generic
182 with procedure Action
183 (name : String; -- 'name' is compressed entry's name
184 name_encoding : Zip_Name_Encoding);
185 --
186 procedure Traverse_Unicode (z : Zip_Info);
187
188 -- Same as Traverse, but Action gives also full technical informations
189 -- about the compressed entry.
190 generic
191 with procedure Action
192 (name : String; -- 'name' is compressed entry's name
193 file_index : Zip_Streams.ZS_Index_Type;
194 comp_size : Zip_64_Data_Size_Type;
195 uncomp_size : Zip_64_Data_Size_Type;
196 crc_32 : Interfaces.Unsigned_32;
197 date_time : Time;
198 method : PKZip_method;
199 name_encoding : Zip_Name_Encoding;
200 read_only : Boolean;
201 encrypted_2_x : Boolean; -- PKZip 2.x encryption
202 user_code : in out Integer);
203 --
204 procedure Traverse_verbose (z : Zip_Info);
205
206 -- Academic: see how well the name tree is balanced
207 procedure Tree_Stat
208 (z : in Zip_Info;
209 total : out Natural;
210 max_depth : out Natural;
211 avg_depth : out Float);
212
213 --------------------------------------------------------------------------
214 -- Offsets - various procedures giving 1-based indexes to local headers --
215 --------------------------------------------------------------------------
216
217 -- Find 1st offset in a Zip stream (i.e. the first's archived entry's offset)
218
219 procedure Find_first_Offset
220 (file : in out Zip_Streams.Root_Zipstream_Type'Class;
221 file_index : out Zip_Streams.ZS_Index_Type);
222
223 -- If the archive is empty (the 22 byte .zip file), there is no first entry or offset.
224 Archive_is_empty : exception;
225
226 -- Find offset of a certain compressed file
227 -- in a Zip file (file opened and kept open)
228
229 procedure Find_Offset
230 (file : in out Zip_Streams.Root_Zipstream_Type'Class;
231 name : in String;
232 case_sensitive : in Boolean;
233 file_index : out Zip_Streams.ZS_Index_Type;
234 comp_size : out Zip_64_Data_Size_Type;
235 uncomp_size : out Zip_64_Data_Size_Type;
236 crc_32 : out Interfaces.Unsigned_32);
237
238 -- Find offset of a certain compressed file in a pre-loaded Zip_Info data
239
240 procedure Find_Offset
241 (info : in Zip_Info;
242 name : in String;
243 name_encoding : out Zip_Name_Encoding;
244 file_index : out Zip_Streams.ZS_Index_Type;
245 comp_size : out Zip_64_Data_Size_Type;
246 uncomp_size : out Zip_64_Data_Size_Type;
247 crc_32 : out Interfaces.Unsigned_32);
248
249 -- Find offset of a certain compressed file in a pre-loaded Zip_Info data.
250 -- This version scans the whole catalogue and returns the index of the first
251 -- entry with a matching name, ignoring directory information.
252 -- For instance, if the Zip archive contains "zip-ada/zip_lib/zip.ads",
253 -- "zip.ads" will match - or even "ZIP.ads" if info has been loaded in case-insensitive mode.
254 -- Caution: this may be much slower than the exact search with Find_offset.
255
256 procedure Find_Offset_without_Directory
257 (info : in Zip.Zip_Info;
258 name : in String;
259 name_encoding : out Zip.Zip_Name_Encoding;
260 file_index : out Zip_Streams.ZS_Index_Type;
261 comp_size : out Zip_64_Data_Size_Type;
262 uncomp_size : out Zip_64_Data_Size_Type;
263 crc_32 : out Interfaces.Unsigned_32);
264
265 Entry_name_not_found : exception;
266 File_name_not_found : exception renames Entry_name_not_found;
267 pragma Obsolescent (File_name_not_found, "Better use the name: Entry_name_not_found");
268
269 function Exists (info : Zip_Info; name : String) return Boolean;
270
271 -- User code: any information e.g. as a result of a string search,
272 -- archive comparison, archive update, recompression,...
273
274 procedure Set_User_Code (info : Zip_Info; name : String; code : Integer);
275
276 function User_Code (info : Zip_Info; name : String) return Integer;
277
278 procedure Get_Sizes
279 (info : in Zip_Info;
280 name : in String;
281 comp_size : out Zip_64_Data_Size_Type;
282 uncomp_size : out Zip_64_Data_Size_Type);
283
284 -- User-defined procedure for feedback occuring during
285 -- compression or decompression (entry_skipped meaningful
286 -- only for the latter)
287
288 type Feedback_Proc is access
289 procedure
290 (percents_done : in Natural; -- %'s completed
291 entry_skipped : in Boolean; -- indicates one can show "skipped", no %'s
292 user_abort : out Boolean); -- e.g. transmit a "click on Cancel" here
293
294 -------------------------------------------------------------------------
295 -- Goodies - things used internally by Zip-Ada but are not bound to --
296 -- Zip archive purposes and that might be generally useful. --
297 -------------------------------------------------------------------------
298
299 -- Block_Read: general-purpose procedure (nothing really specific to Zip /
300 -- UnZip): reads either the whole buffer from a file, or if the end of
301 -- the file lays inbetween, a part of the buffer.
302 --
303 -- The procedure's names and parameters corresponds to Borland / Turbo
304 -- Pascal / Delphi's BlockRead's.
305
306 subtype Byte is Interfaces.Unsigned_8;
307 type Byte_Buffer is array (Integer range <>) of aliased Byte;
308 type p_Byte_Buffer is access Byte_Buffer;
309
310 procedure Block_Read
311 (file : in Ada.Streams.Stream_IO.File_Type;
312 buffer : out Byte_Buffer;
313 actually_read : out Natural);
314 -- ^ = buffer'Length if no end of file occurred
315 -- before last buffer element.
316
317 -- Same for general streams
318 --
319 procedure Block_Read
320 (stream : in out Zip_Streams.Root_Zipstream_Type'Class;
321 buffer : out Byte_Buffer;
322 actually_read : out Natural);
323 -- ^ = buffer'Length if no end of stream occurred
324 -- before last buffer element.
325
326 -- Same, but instead of giving actually_read, raises End_Error if
327 -- the buffer cannot be fully read.
328 -- This mimics the 'Read stream attribute; can be a lot faster, depending
329 -- on the compiler's run-time library.
330 procedure Block_Read
331 (stream : in out Zip_Streams.Root_Zipstream_Type'Class;
332 buffer : out Byte_Buffer);
333
334 -- This mimics the 'Write stream attribute; can be a lot faster, depending
335 -- on the compiler's run-time library.
336 -- NB: here we can use the root stream type: no question of size, index,...
337 procedure Block_Write
338 (stream : in out Ada.Streams.Root_Stream_Type'Class;
339 buffer : in Byte_Buffer);
340
341 -- Copy a chunk from a stream into another one, using a temporary buffer
342 procedure Copy_Chunk
343 (from : in out Zip_Streams.Root_Zipstream_Type'Class;
344 into : in out Ada.Streams.Root_Stream_Type'Class;
345 bytes : Natural;
346 buffer_size : Positive := 1024 * 1024;
347 Feedback : Feedback_Proc := null);
348
349 -- Copy a whole file into a stream, using a temporary buffer
350 procedure Copy_File
351 (file_name : String;
352 into : in out Ada.Streams.Root_Stream_Type'Class;
353 buffer_size : Positive := 1024 * 1024);
354
355 -- This does the same as Ada 2005's Ada.Directories.Exists
356 -- Just there as helper for Ada 95 only systems
357 --
358 function Exists (file_name : String) return Boolean;
359
360 -- Write a string containing line endings (possibly from another system)
361 -- into a text file, with the "correct", native line endings.
362 -- Works for displaying/saving correctly
363 -- CR&LF (DOS/Win), LF (UNIX), CR (Mac OS < 9)
364 --
365 procedure Put_Multi_Line
366 (out_file : Ada.Text_IO.File_Type;
367 text : String);
368
369 procedure Write_as_Text
370 (out_file : Ada.Text_IO.File_Type;
371 buffer : Byte_Buffer;
372 last_char : in out Character); -- track line-ending characters between writes
373
374 function Hexadecimal (x : Interfaces.Unsigned_32) return String;
375
376 -----------------------------------------------------------------
377 -- Information about this package - e.g., for an "about" box --
378 -----------------------------------------------------------------
379
380 version : constant String := "61";
381 reference : constant String := "29-Mar-2025";
382 -- Hopefully the latest version can be acquired from one of those URLs:
383 web : constant String := "https://unzip-ada.sourceforge.io/";
384 web2 : constant String := "https://sourceforge.net/projects/unzip-ada/";
385 web3 : constant String := "https://github.com/zertovitch/zip-ada";
386 web4 : constant String := "https://alire.ada.dev/crates/zipada";
387
388 ---------------------
389 -- Private items --
390 ---------------------
391
392 private
393
394 -- Zip_Info, 23.VI.1999.
395 --
396 -- The PKZIP central directory is coded here as a binary tree
397 -- to allow a fast retrieval of the searched offset in zip file.
398 -- E.g. for a 1000-file archive, the offset will be found in less
399 -- than 11 moves: 2**10=1024 (balanced case), without any read
400 -- in the archive.
401 --
402 -- Notes on search dictionary
403 ------------------------------
404 -- 19-Oct-2018: rev. 670 to 683 used a Vector and a Hashed Map
405 -- from Ada.Containers. The loading of the dictionary was
406 -- much faster (2x), but there were performance bottlenecks elsewhere,
407 -- not solved by profiling. On an archive with 18000 small entries of
408 -- around 1 KiB each, comp_zip ran 100x slower!
409 -- Neither the restricted use of Unbounded_String, nor the replacement
410 -- of the Vector by an array helped solving the performance issue.
411 -- 2022: second attempt with Vectors & Indefinite_Hashed_Maps (both a vector
412 -- and a map are needed because a Zip archive may contain entries with
413 -- duplicate keys; otherwise a map would be sufficient).
414 -- - Test_Zip_Info_Timing: load time on many_65535.zip:
415 -- 0.75 seconds (binary tree) -> 0.44 seconds (vector & map)
416 -- - But... comp_zip many_4096.zip many_4096.zip -q2:
417 -- 5.5 seconds (binary tree) -> 13.2 seconds (vector & map) !
418
419 type Dir_node;
420 type p_Dir_node is access Dir_node;
421
422 type Dir_node (name_len : Natural) is record
423 left, right : p_Dir_node;
424 dico_name : String (1 .. name_len); -- UPPER if case-insensitive search
425 file_name : String (1 .. name_len);
426 file_index : Zip_Streams.ZS_Index_Type;
427 comp_size : Zip_64_Data_Size_Type;
428 uncomp_size : Zip_64_Data_Size_Type;
429 crc_32 : Interfaces.Unsigned_32;
430 date_time : Time;
431 method : PKZip_method;
432 name_encoding : Zip_Name_Encoding;
433 read_only : Boolean; -- TBD: attributes of most supported systems
434 encrypted_2_x : Boolean;
435 user_code : Integer;
436 end record;
437
438 type Zip_archive_format_type is (Zip_32, Zip_64);
439
440 type p_String is access String;
441
442 type Zip_Info is new Ada.Finalization.Controlled with record
443 loaded : Boolean := False;
444 case_sensitive : Boolean;
445 zip_file_name : p_String; -- a file name...
446 zip_input_stream : Zip_Streams.Zipstream_Class_Access; -- ...or an input stream
447 -- ^ when not null, we use this, and not zip_file_name
448 dir_binary_tree : p_Dir_node;
449 total_entries : Natural;
450 zip_file_comment : p_String;
451 zip_archive_format : Zip_archive_format_type := Zip_32;
452 end record;
453
454 -- After a copy, need to clone a few things.
455 overriding procedure Adjust (info : in out Zip_Info);
456
457 -- Free heap-allocated memory.
458 overriding procedure Finalize (info : in out Zip_Info);
459
460 -- System.Word_Size: 13.3(8): A word is the largest amount of storage
461 -- that can be conveniently and efficiently manipulated by the hardware,
462 -- given the implementation's run-time model.
463 --
464 min_bits_32 : constant := Integer'Max (32, System.Word_Size);
465 min_bits_16 : constant := Integer'Max (16, System.Word_Size);
466
467 -- We define an Integer type which is at least 32 bits, but n bits
468 -- on a native n (> 32) bits architecture.
469 -- Integer_M16 is not needed: Integer already guarantees 16 bits
470 --
471 type Integer_M32 is range -2**(min_bits_32 - 1) .. 2**(min_bits_32 - 1) - 1;
472 subtype Natural_M32 is Integer_M32 range 0 .. Integer_M32'Last;
473 subtype Positive_M32 is Integer_M32 range 1 .. Integer_M32'Last;
474
475 type Unsigned_M16 is mod 2**min_bits_16;
476 type Unsigned_M32 is mod 2**min_bits_32;
477
478 -- Codes for compression formats in Zip archives
479 -- See PKWARE's Appnote, "4.4.5 compression method"
480 --
481 package Compression_format_code is
482 store_code : constant := 0;
483 shrink_code : constant := 1;
484 reduce_code : constant := 2;
485 implode_code : constant := 6;
486 tokenize_code : constant := 7;
487 deflate_code : constant := 8;
488 deflate_e_code : constant := 9;
489 bzip2_code : constant := 12;
490 lzma_code : constant := 14;
491 zstandard_code : constant := 93;
492 mp3_code : constant := 94;
493 xz_code : constant := 95;
494 jpeg_code : constant := 96;
495 wavpack_code : constant := 97;
496 ppmd_code : constant := 98;
497 end Compression_format_code;
498
499 end Zip;
Web view of Ada source code generated by GNATHTML, project: ALI_Parse version 1.0.
Zip-Ada: Ada library for zip archive files (.zip).
Ada programming.
Some news about Zip-Ada and other Ada projects
on Gautier's blog.