summaryrefslogtreecommitdiff
path: root/base/slzwe.c
blob: 86ed6f5b70c8c9af4f4bc5c99ca946e011cc534c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
/* Copyright (C) 2001-2023 Artifex Software, Inc.
   All Rights Reserved.

   This software is provided AS-IS with no warranty, either express or
   implied.

   This software is distributed under license and may not be copied,
   modified or distributed except as expressly authorized under the terms
   of the license contained in the file LICENSE in this distribution.

   Refer to licensing information at http://www.artifex.com or contact
   Artifex Software, Inc.,  39 Mesa Street, Suite 108A, San Francisco,
   CA 94129, USA, for further information.
*/


/* LZW encoding filter */
#include "stdio_.h"	/* includes std.h */
#include "gdebug.h"
#include "strimpl.h"
#include "slzwx.h"

/********************************************************/
/* LZW routines are based on:				*/
/* Dr. Dobbs Journal --- Oct. 1989. 			*/
/* Article on LZW Data Compression by Mark R. Nelson 	*/
/********************************************************/

/* Define the special codes */
#define code_reset 256
#define code_eod 257
#define code_0 258			/* first assignable code */

/* Import the stream closing procedure */
extern stream_proc_release(s_LZW_release);

typedef struct lzw_encode_s {
        byte datum;			/* last byte of this code */
        ushort prefix;			/* code for prefix of this code */
} lzw_encode;

#define encode_max 4095		/* max # of codes, must be */
                                /* > code_0 and <= 4095 */
#define hash_size (encode_max + encode_max / 4)

struct lzw_encode_table_s {
        lzw_encode encode[encode_max];
        ushort hashed[hash_size];
};
gs_private_st_simple(st_lzwe_table, lzw_encode_table, "lzw_encode_table");

/* Hashing function */
#define encode_hash(code, chr)\
  ((uint)((code) * 59 + (chr) * ((hash_size / 256) | 1)) % hash_size)

/* Internal routine to put a code into the output buffer. */
/* Let S = ss->code_size, M = ss->next_code, N = 1 << M. */
/* Relevant invariants: 9 <= S <= 12; N / 2 <= M < N; 0 <= code < N; */
/* 1 <= ss->bits_left <= 8; only the rightmost (8 - ss->bits_left) */
/* bits of ss->bits contain valid data. */
static byte *
lzw_put_code(register stream_LZW_state *ss, byte *q, uint code)
{	uint size = ss->code_size;
        byte cb = (ss->bits << ss->bits_left) +
                (code >> (size - ss->bits_left));
        if_debug2m('W', ss->memory, "[w]writing 0x%x,%d\n", code, ss->code_size);
        *++q = cb;
        if ( (ss->bits_left += 8 - size) <= 0 )
        {	*++q = code >> -ss->bits_left;
                ss->bits_left += 8;
        }
        ss->bits = code;
        return q;
}

/* Internal routine to reset the encoding table */
static void
lzw_reset_encode(stream_LZW_state *ss)
{	register int c;
        lzw_encode_table *table = ss->table.encode;
        ss->next_code = code_0;
        ss->code_size = 9;
        ss->prev_code = code_eod;
        for ( c = 0; c < hash_size; c++ )
                table->hashed[c] = code_eod;
        for ( c = 0; c < 256; c++ )
        {	lzw_encode *ec = &table->encode[c];
                register ushort *tc = &table->hashed[encode_hash(code_eod, c)];
                while ( *tc != code_eod )
                  if ( ++tc == &table->hashed[hash_size] )
                    tc = &table->hashed[0];
                *tc = c;
                ec->datum = c, ec->prefix = code_eod;
        }
        table->encode[code_eod].prefix = code_reset;	/* guarantee no match */
}

#define ss ((stream_LZW_state *)st)

/* Initialize LZWEncode filter */
static int
s_LZWE_init(stream_state *st)
{	ss->bits_left = 8;
        ss->bits = 0; /* for Purify, the value unimportant due to ss->bits_left == 8 */
        ss->table.encode = gs_alloc_struct(st->memory,
                        lzw_encode_table, &st_lzwe_table, "LZWEncode init");
        if ( ss->table.encode == 0 )
                return ERRC;		/****** WRONG ******/
        ss->first = true;
        lzw_reset_encode(ss);
        return 0;
}

/* Process a buffer */
static int
s_LZWE_process(stream_state *st, stream_cursor_read *pr,
  stream_cursor_write *pw, bool last)
{	register const byte *p = pr->ptr;
        const byte *rlimit = pr->limit;
        register byte *q = pw->ptr;
        byte *wlimit = pw->limit;
        int code = ss->prev_code;
        lzw_encode_table *table = ss->table.encode;
        ushort *table_end = &table->hashed[hash_size];
        int status = 0;
        int limit_code;
#define set_limit_code()\
  limit_code = (1 << ss->code_size) - ss->EarlyChange;\
  if ( limit_code > encode_max ) limit_code = encode_max
        set_limit_code();
        if ( ss->first )
        {	/* Emit the initial reset code. */
                if ( wlimit - q < 2 )
                        return 1;
                q = lzw_put_code(ss, q, code_reset);
                ss->first = false;
        }
        while ( p < rlimit )
        {	byte c = p[1];
                ushort *tp;
                for ( tp = &table->hashed[encode_hash(code, c)]; ; )
                {	lzw_encode *ep = &table->encode[*tp];
                        if ( ep->prefix == code && ep->datum == c )
                        {	code = *tp;
                                p++;
                                break;
                        }
                        else if ( *tp != code_eod )
                        {	if ( ++tp == table_end )
                                  tp = &table->hashed[0]; /* wrap around */
                        }
                        else
                        {	/* end of recognized sequence */
                                if ( wlimit - q <= 4 )
                                {	status = 1;
                                        goto out;
                                }
                                q = lzw_put_code(ss, q, code);
                                if ( ss->next_code == limit_code )
                                {	/* Reached power of 2 or limit. */
                                        /* Determine which. */
                                        if ( ss->next_code == encode_max )
                                        {	q = lzw_put_code(ss, q, code_reset);
                                                lzw_reset_encode(ss);
                                                set_limit_code();
                                                goto cx;
                                        }
                                        ss->code_size++;
                                        set_limit_code();
                                }
                                if_debug3m('W', ss->memory, "[W]encoding 0x%x=0x%x+%c\n",
                                          ss->next_code, code, c);
                                *tp = ss->next_code++;
                                ep = &table->encode[*tp];
                                ep->datum = c;
                                ep->prefix = code;
cx:				code = code_eod;
                                break;
                        }
                }
        }
        if ( last && status == 0 )
        {	if ( wlimit - q < 4 )
                        status = 1;
                else
                  {	if ( code != code_eod )
                          {	q = lzw_put_code(ss, q, code);	/* put out final code */
                                if (ss->next_code == limit_code && ss->next_code != encode_max)
                                    ss->code_size++;
                          }
                        q = lzw_put_code(ss, q, code_eod);
                        if ( ss->bits_left < 8 )
                          *++q = ss->bits << ss->bits_left;  /* final byte */
                  }
        }
out:	ss->prev_code = code;
        pr->ptr = p;
        pw->ptr = q;
        return status;
}

#undef ss

/* Stream template */
const stream_template s_LZWE_template =
{	&st_LZW_state, s_LZWE_init, s_LZWE_process, 1, 4, s_LZW_release,
        s_LZW_set_defaults
};