1
0
mirror of https://github.com/madler/zlib synced 2025-03-28 21:13:15 +00:00

Merge 8e55222daedd73b574f8f60b381f1b6cd78efe86 into 5a82f71ed1dfc0bec044d9702463dbdf84ea3b71

This commit is contained in:
Bulent Abali 2025-03-10 10:43:00 +00:00 committed by GitHub
commit 9bcf84ead3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 459 additions and 0 deletions

View File

@ -2150,3 +2150,35 @@ local block_state deflate_huff(deflate_state *s, int flush) {
FLUSH_BLOCK(s, 0);
return block_done;
}
/* ===========================================================================
* Given histograms l_hist and d_hist builds the huffman table that
* can be used as the DHT in the front matter of Type 2 blocks.
* DHT is returned in the buffer strm->avail_out.
* Number of valid bits in the last valid byte of the buffer is also returned.
*/
extern int make_trees( deflate_state *s, int *l_hist, int *d_hist );
int ZEXPORT deflate_make_dht(strm, lhistg, dhistg, bits_valid)
z_streamp strm;
int *lhistg;
int *dhistg;
int *bits_valid;
{
deflate_state *s;
if (strm == Z_NULL || strm->state == Z_NULL ) {
return Z_STREAM_ERROR;
}
if (strm->next_out == Z_NULL ) {
ERR_RETURN(strm, Z_STREAM_ERROR);
}
if (strm->avail_out == 0 || bits_valid == NULL) {
ERR_RETURN(strm, Z_BUF_ERROR);
}
*bits_valid = make_trees( s = strm->state, lhistg, dhistg );
flush_pending( strm );
return Z_OK;
}

View File

@ -52,3 +52,7 @@ zran.h
index a zlib or gzip stream and randomly access it
- illustrates the use of Z_BLOCK, inflatePrime(), and
inflateSetDictionary() to provide random access
makedht.c
makes a dynamic huffman table given lit/len and distance histograms
-- illustrates the proper use of deflate_make_dht()

39
examples/jabber1.lzcount Normal file
View File

@ -0,0 +1,39 @@
32 : 2
39 : 1
84 : 1
97 : 1
98 : 1
114 : 1
115 : 1
119 : 1
256 : 1
0 : 0
1 : 0
2 : 0
3 : 0
4 : 0
5 : 0
6 : 0
7 : 0
8 : 0
9 : 0
10 : 0
11 : 0
12 : 0
13 : 0
14 : 0
15 : 0
16 : 0
17 : 0
18 : 0
19 : 0
20 : 0
21 : 0
22 : 0
23 : 0
24 : 0
25 : 0
26 : 0
27 : 0
28 : 0
29 : 0

314
examples/makedht.c Normal file
View File

@ -0,0 +1,314 @@
/*
Makes a dynamic huffman table given the symbol counts.
Based on zlib/examples/zpipe.c in zlib 1.2.8
cd to zlib root directory
./configure
make
cd examples
cc -O -I.. -o makedht makedht.c ../libz.a
deflate_make_dht( (z_stream *)strm, (int *)lhist, (int *)dhist, (int *)valid_bits );
Caller provides lhist and dhist int arrays. A dynamic huffman
table (DHT) formatted in the manner of Deflate Type 2 block is
returned in strm. Number of valid bits in the last byte is
returned in valid_bits.
format_cpb(char *cpbtxt, char *zbuf, int have, int valid_bits )
Pretty formats the DHT.
*/
/*
From command line, supply the Literal/Length/Distance symbols and
their counts in the *lzcount file. makedht then calls zlib to make
the dynamic huffman table (DHT). Makedht then writes human
readable DHT to stdout and binary DHT to <fname>.
[abali@hahn examples]$ ./makedht jabber1.lzcount jabber1.dht
bytes: 19 invalid bits: 4
--------------------------------
00000000000000000000000000000094
203826000000220058c5a6900244f0c3
d7770700000000000000000000000000
--------------------------------
Hex dump of the same:
[abali@hahn examples]$ xxd jabber1.dht
0000000: 2038 2600 0000 2200 58c5 a690 0244 f0c3 8&...".X....D..
0000010: d777 07 .w.
Notes:
Invalid bit count is the number of unused **left-most** bits in the
last byte. Bit endianness is due to the Deflate specification.
When DEBUG is enabled in zlib, Huffman codes assigned to each
symbol are also printed to stderr.
The -f flag asks zlib to produce a Huffman code for all the Lit/Len
(0-285) and Dist (0-29) symbols. The -f flag overrides the symbol
counts of 0 to 1, and therefore forces the code to be generated for
all the symbols. In the example below, you can see that the
result is larger when compared to the previous example.
[abali@hahn examples]$ ./makedht -f jabber1.lzcount jabber1.dht
bytes: 56 invalid bits: 4
--------------------------------
000000000000000000000000000001bc
bde300040208da443232b3f7cedeca48
56943d92ec952dbbec19d9ab4284ca4e
43c8deca56b2f7cc2a65454564af9292
f0fbff7e8ffbfd0f0000000000000000
--------------------------------
Sample *.lzcount file
[abali@hahn examples]$ cat jabber1.lzcount
32 : 2
39 : 1
84 : 1
97 : 1
98 : 1
114 : 1
115 : 1
119 : 1
256 : 1
0 : 0
1 : 0
29 : 0
*/
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "zlib.h"
#define CHUNK 16384
/*
Cpb stands for compression parameter block. format_cpb converts the
zlib produced DHT in zbuf to ASCII text and writes to cpbtxt. Have
is number of bytes in zbuf. Valid_bits is the value returned from
deflateMakeDHT(). */
void format_cpb(char *cpbtxt, char *zbuf, int have, int valid_bits )
{
int i, b;
char *ptr;
char tmp[CHUNK];
char hex[] = { '0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f' };
int invalid_bits;
/* last byte */
invalid_bits = ( valid_bits ) ? 8 - valid_bits : 0 ;
fprintf(stderr,"bytes: %d invalid bits: %d\n", have, invalid_bits );
/* format-clear the cpbparm file 1st line */
memset( cpbtxt, '0', 32 );
/* format the cpb bit count */
sprintf( tmp, "%x", 8 * have - invalid_bits );
b = strlen( tmp );
/* write the bit count to cpb parm 1st line */
strncpy( cpbtxt+(32-b), tmp, b );
/* continue from the next line */
ptr = cpbtxt+32;
for(i=0; i<have; i++) {
unsigned char byte;
if ( i % 16 == 0 ) *(ptr++) = '\n'; /* write 16 bytes per line */
byte = (unsigned char) zbuf[i];
*(ptr++) = hex[ (byte>>4)&0xf ]; /* convert hex to ASCII */
*(ptr++) = hex[ (byte )&0xf ];
}
/* padding for the last line */
for(i=have; i< (16*((have+15)/16)); i++) {
*(ptr++) = '0'; *(ptr++) = '0';
}
*(ptr++) = '\n'; *(ptr++) = 0;
}
int makedht(char *fname, int *lhist, int *dhist)
{
int ret, flush;
unsigned have;
z_stream strm;
char in[CHUNK];
char zbuf[CHUNK];
char cpbtxt[CHUNK];
FILE *cpbbin;
int i;
int valid_bits;
int b;
/* file for the CPB binary output */
if( NULL == ( cpbbin = fopen( fname, "w" )) ) {
fprintf( stderr, "error: cannot open %s\n", fname );
return 1;
}
/* allocate deflate state */
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
ret = deflateInit(&strm, Z_DEFAULT_COMPRESSION);
if (ret != Z_OK)
return ret;
strm.avail_out = CHUNK;
strm.next_out = zbuf;
ret = deflate_make_dht( &strm, lhist, dhist, &valid_bits );
assert(ret != Z_STREAM_ERROR);
have = CHUNK - strm.avail_out;
if (fwrite(zbuf, 1, have, cpbbin) != have || ferror(cpbbin)) {
(void)deflateEnd(&strm);
return Z_ERRNO;
}
fclose( cpbbin );
format_cpb( cpbtxt, zbuf, have, valid_bits );
fflush(stdout);
fputs("--------------------------------\n", stdout );
fputs( cpbtxt, stdout );
fputs("--------------------------------\n", stdout );
fflush(stdout);
(void)deflateEnd(&strm);
return Z_OK;
}
/* report a zlib or i/o error */
void zerr(int ret)
{
fputs("zpipe: ", stderr);
switch (ret) {
case Z_ERRNO:
if (ferror(stdin))
fputs("error reading stdin\n", stderr);
if (ferror(stdout))
fputs("error writing stdout\n", stderr);
break;
case Z_STREAM_ERROR:
fputs("invalid compression level\n", stderr);
break;
case Z_DATA_ERROR:
fputs("invalid or incomplete deflate data\n", stderr);
break;
case Z_MEM_ERROR:
fputs("out of memory\n", stderr);
break;
case Z_VERSION_ERROR:
fputs("zlib version mismatch!\n", stderr);
}
}
/* Initialize zero lzcounts to a val. If the same DHT will be used
repeatedly by different input data, the DHT must contain a symbol
for all possible input symbols. Changing zero counts to a nonzero
count ensures that in the DHT there is a code for every symbol. Of
course this comes at the expense of DHTs being larger */
void fill_zero_lzcounts(int *llhist, int *dhist, int val)
{
int i;
for(i=0; i<286; i++)
if( ! llhist[i] )
llhist[i] = val;
for(i=0; i<30; i++)
if( ! dhist[i] )
dhist[i] = val;
}
/* read lzcounts from file fname and write them to the int arrays
llhist and dhist for Lit/Len and Distance respectively */
int get_lzcounts(char *fname, int *llhist, int *dhist)
{
int i, lz, prev_lz, count, doll;
FILE *lzf;
char buf[1024];
if( NULL == ( lzf = fopen( fname, "r" )) ) {
fprintf( stderr, "error: cannot open %s\n", fname );
return 1;
}
for(i=0; i<286; i++)
llhist[i] = 0;
for(i=0; i<30; i++)
dhist[i] = 0;
prev_lz=0;
doll=1;
while( NULL != fgets( buf, 1023, lzf ) ) {
sscanf( buf, "%d : %d", &lz, &count );
if( prev_lz > lz ) /* detect LL to D transition */
doll = 0;
assert( (doll==1 && lz >= 0 && lz <= 285) || (doll==0 && lz >= 0 && lz <= 29 ) );
prev_lz = lz;
if( doll )
llhist[ lz ] = count;
else
dhist[ lz ] = count;
}
llhist[256] = 1; /* The EOB symbol is always present */
fclose( lzf );
return 0;
}
int main(int argc, char **argv)
{
int ret;
int lhist[286];
int dhist[30];
/* when -f argument is present */
if (argc == 4 && strcmp(argv[1], "-f") == 0) {
/* read LZ counts from file */
if( get_lzcounts( argv[2], lhist, dhist ) )
return 1;
/* change zero counts to one */
fill_zero_lzcounts( lhist, dhist, 1 );
/* make the dht */
ret = makedht( argv[3], lhist, dhist );
if (ret != Z_OK)
zerr(ret);
fflush(stderr);
return ret;
}
/* no -f argument */
else if( argc == 3 ) {
/* read LZ counts from file */
if( get_lzcounts( argv[1], lhist, dhist ) )
return 1;
/* make the dht */
ret = makedht( argv[2], lhist, dhist );
if (ret != Z_OK)
zerr(ret);
fflush(stderr);
return ret;
}
/* when argument count is wrong, report usage */
else {
fprintf( stderr, "usage:\n");
fprintf( stderr, "%s [-f] <lzcount> <dht.bin>\n", argv[0]);
fprintf( stderr, " <Lzcount> contains a symbol : count pair per line of input.\n");
fprintf( stderr, " Lit/Len symbols 0..285 must be followed by Distance symbols 0..29.\n");
fprintf( stderr, " Missing symbols have a count of 0 by default.\n");
fprintf( stderr, " The optional -f changes 0 counts to 1.\n");
fprintf( stderr, " Human readable output is printed to stdout.\n");
fprintf( stderr, " Number of bits in the DHT is printed in the first 16 bytes.\n");
fprintf( stderr, " Number of unused bits in the DHT tail byte is also printed.\n");
fprintf( stderr, " Binary output is dumped to dht.bin.\n");
return 1;
}
}

60
trees.c
View File

@ -1117,3 +1117,63 @@ int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc) {
}
return (s->sym_next == s->sym_end);
}
/* ===========================================================================
* Make deflate trees for a given literal/length and distance histograms
*/
int make_trees(s, l_hist, d_hist)
deflate_state *s;
int *l_hist;
int *d_hist;
{
int max_blindex;
int n;
int bits_valid;
init_block( s );
/* copy in histograms */
for (n = 0; n < D_CODES; n++)
s->dyn_dtree[n].Freq = d_hist[n];
for (n = 0; n < L_CODES; n++)
s->dyn_ltree[n].Freq = l_hist[n];
/* EOB symbol always present in a dynamic block */
s->dyn_ltree[END_BLOCK].Freq = 1;
build_tree(s, (tree_desc *) (&(s->l_desc))); /* Lit/Len tree */
build_tree(s, (tree_desc *) (&(s->d_desc))); /* Distance tree */
max_blindex = build_bl_tree(s); /* Code length codes */
/* send_bits(s, (DYN_TREES << 1), 3); no space for the 3 bit block header */
/* write trees to the pending buffer */
send_all_trees(s, s->l_desc.max_code + 1, s->d_desc.max_code + 1, max_blindex + 1);
/* flush the bit buffer and align output tail to the byte boundary */
bits_valid = s->bi_valid % 8;
bi_windup(s);
#ifdef ZLIB_DEBUG
do {
int n;
fprintf(stderr, "BL_CODES:\n");
for (n = 0; n < BL_CODES; n++)
if( s->bl_tree[n].Len != 0 && n <= s->bl_desc.max_code )
fprintf(stderr, "bl: %3d l: %2d c: 0x%X\n", n, s->bl_tree[n].Len, s->bl_tree[n].Code );
fprintf(stderr, "L_CODES:\n");
for (n = 0; n < L_CODES; n++)
if( s->dyn_ltree[n].Len != 0 && n <= s->l_desc.max_code )
fprintf(stderr, "ll: %3d l: %2d c: 0x%X\n", n, s->dyn_ltree[n].Len, s->dyn_ltree[n].Code );
fprintf(stderr, "D_CODES:\n");
for (n = 0; n < D_CODES; n++)
if( s->dyn_dtree[n].Len != 0 && n <= s->d_desc.max_code )
fprintf(stderr, "di: %3d l: %2d c: 0x%X\n", n, s->dyn_dtree[n].Len, s->dyn_dtree[n].Code );
fprintf(stderr, "\n");
} while(0);
fprintf(stderr, "valid bits in the last byte: %d\n", bits_valid );
#endif
return bits_valid;
}

10
zlib.h
View File

@ -1950,6 +1950,16 @@ ZEXTERN int ZEXPORTVA gzvprintf(gzFile file,
# endif
#endif
ZEXTERN int ZEXPORT deflate_make_dht OF(( z_streamp strm, int *lhistg, int *dhistg, int *bits_valid ));
/*
* Given histograms l_hist and d_hist, the function builds a huffman table that
* can be used as the front matter of Type 2 blocks.
* DHT is returned in the buffer strm->avail_out.
* Number of valid bits in the last valid byte of the buffer is also returned.
*/
#ifdef __cplusplus
}
#endif