Source file : data_segmentation.ads
1 -- Data_Segmentation
2 ---------------------
3 --
4 -- Pure Ada 2012+ code, 100% portable: OS-, CPU- and compiler- independent.
5 --
6 -- The Data_Segmentation package offers tools for splitting data into more
7 -- homogeneous blocks, with the hope of compressing those blocks better.
8 --
9 -- Legal licensing note:
10 --
11 -- Copyright (c) 2025 Gautier de Montmollin
12 -- SWITZERLAND
13 --
14 -- Permission is hereby granted, free of charge, to any person obtaining a copy
15 -- of this software and associated documentation files (the "Software"), to deal
16 -- in the Software without restriction, including without limitation the rights
17 -- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18 -- copies of the Software, and to permit persons to whom the Software is
19 -- furnished to do so, subject to the following conditions:
20 --
21 -- The above copyright notice and this permission notice shall be included in
22 -- all copies or substantial portions of the Software.
23 --
24 -- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 -- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 -- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 -- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 -- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29 -- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
30 -- THE SOFTWARE.
31 --
32 -- NB: this is the MIT License, as found 21-Aug-2016 on the site
33 -- http://www.opensource.org/licenses/mit-license.php
34
35 with Ada.Containers.Vectors;
36
37 generic
38
39 type Index is range <>;
40 type Alphabet is (<>); -- Any discrete type (usually: Byte, Unsigned_8).
41 type Buffer_Type is array (Index range <>) of Alphabet;
42
43 discrepancy_threshold : Float; -- Discrepancy detection threshold (e.g.: 1.9).
44 index_threshold : Index; -- Do segmentation only above a certain distance (e.g.: 20_000).
45 window_size : Index; -- Sliding window size (e.g.: 10_000).
46
47 package Data_Segmentation is
48
49 subtype Positive_Index is Index range 1 .. Index'Last;
50 package Index_Vectors is new Ada.Containers.Vectors (Positive, Positive_Index);
51 subtype Segmentation is Index_Vectors.Vector;
52
53 procedure Segment_by_Entropy (buffer : in Buffer_Type; seg : out Segmentation);
54
55 end Data_Segmentation;
Web view of Ada source code generated by GNATHTML, project: ALI_Parse version 1.0.
Zip-Ada: Ada library for zip archive files (.zip).
Ada programming.
Some news about Zip-Ada and other Ada projects
on Gautier's blog.