forked from sysprog21/dict
-
Notifications
You must be signed in to change notification settings - Fork 0
/
tst.c
414 lines (376 loc) · 15.4 KB
/
tst.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
#include "tst.h"
/** max word length to store in ternary search tree, stack size */
#define WRDMAX 128
#define STKMAX (WRDMAX * 2)
/* This macro is used to replace some repeating pattern for rotating and
* deleting the node on ternary search tree. It will append kid of victim on
* suitable position, then free victim itself. Note that 'kid' should be
* victim->lokid or victim->hikid
*/
#define del_node(parent, victim, root, kid) \
do { \
if (!parent) { \
*root = kid; \
} else { \
if (victim == parent->lokid) \
parent->lokid = kid; \
else if (victim == parent->hikid) \
parent->hikid = kid; \
else \
parent->eqkid = kid; \
} \
free(victim); \
victim = NULL; \
} while (0)
/** ternary search tree node. */
typedef struct tst_node {
char key; /* char key for node (null for node with string) */
unsigned refcnt; /* refcnt tracks occurrence of word (for delete) */
struct tst_node *lokid; /* ternary low child pointer */
struct tst_node *eqkid; /* ternary equal child pointer */
struct tst_node *hikid; /* ternary high child pointer */
} tst_node;
/** struct to use for static stack to remove nodes. */
typedef struct tst_stack {
void *data[STKMAX];
size_t idx;
} tst_stack;
/** stack push/pop to store node pointers to delete word from tree.
* on delete, store all nodes from root to leaf containing word to
* allow word removal and reordering of tree.
*/
static void *tst_stack_push(tst_stack *s, void *node)
{
if (s->idx >= STKMAX)
return NULL;
return (s->data[(s->idx)++] = node);
}
static void *tst_stack_pop(tst_stack *s)
{
if (!s->idx)
return NULL;
void *node = s->data[--(s->idx)];
s->data[s->idx] = NULL;
return node;
}
/** delete current data-node and parent, update 'node' to new parent.
* before delete the current refcnt is checked, if non-zero, occurrences
* of the word remain in buffer the node is not deleted, if refcnt zero,
* the node is deleted. if 'freeword = 1' the copy of word allocated and
* stored as the node->eqkid is freed, if 'freeword = 0', node->eqkid is
* stored elsewhere and not freed, root node updated if changed. returns
* NULL on success (deleted), otherwise returns the address of victim
* if refcnt non-zero.
*/
static void *tst_del_word(tst_node **root,
tst_node *node,
tst_stack *stk,
const int freeword)
{
tst_node *victim = node; /* begin deletion w/victim */
tst_node *parent = tst_stack_pop(stk); /* parent to victim */
if (!victim->refcnt) { /* if last occurrence */
if (!victim->key && freeword) /* check key is nul */
free(victim->eqkid); /* free string (data) */
/* remove unique suffix chain - parent & victim nodes
* have no children. simple remove until the first parent
* found with children.
*/
while (!parent->lokid && !parent->hikid && !victim->lokid &&
!victim->hikid) {
parent->eqkid = NULL;
free(victim);
victim = parent;
parent = tst_stack_pop(stk);
if (!parent) { /* last word & root node */
free(victim);
return (void *) (*root = NULL);
}
}
/* check if victim is prefix for others (victim has lo/hi node).
* if both lo & hi children, check if lokid->hikid present, if not,
* move hikid to lokid->hikid, replace node with lokid and free node.
* if lokid->hikid present, check hikid->lokid. If not present, then
* move lokid to hikid->lokid, replace node with hikid free node.
*/
if (victim->lokid && victim->hikid) { /* victim has both lokid/hikid */
if (!victim->lokid->hikid) { /* check for hikid in lo tree */
/* rotate victim->hikid to victim->lokid->hikid, and
* rotate victim->lokid to place of victim.
*/
victim->lokid->hikid = victim->hikid;
del_node(parent, victim, root, victim->lokid);
} else if (!victim->hikid->lokid) { /* check for lokid in hi tree */
/* opposite rotation */
victim->hikid->lokid = victim->lokid;
del_node(parent, victim, root, victim->hikid);
} else /* can't rotate, return, leaving victim->eqkid NULL */
return NULL;
} else if (victim->lokid) { /* only lokid, replace victim with lokid */
del_node(parent, victim, root, victim->lokid);
} else if (victim->hikid) { /* only hikid, replace victim with hikid */
del_node(parent, victim, root, victim->hikid);
} else { /* victim - no children, but parent has other children */
if (victim == parent->lokid) { /* if parent->lokid - trim */
parent->lokid = NULL;
free(victim);
victim = NULL;
} else if (victim == parent->hikid) { /* if parent->hikid - trim */
parent->hikid = NULL;
free(victim);
victim = NULL;
} else { /* victim was parent->eqkid, but parent->lo/hikid exists */
parent->eqkid = NULL; /* set eqkid NULL */
free(victim); /* free current victim */
victim = parent; /* set parent = victim */
parent = tst_stack_pop(stk); /* get new parent */
/* if both victim hi/lokid are present */
if (victim->lokid && victim->hikid) {
/* same checks and rotations as above */
if (!victim->lokid->hikid) {
victim->lokid->hikid = victim->hikid;
del_node(parent, victim, root, victim->lokid);
} else if (!victim->hikid->lokid) {
victim->hikid->lokid = victim->lokid;
del_node(parent, victim, root, victim->hikid);
} else
return NULL;
}
/* if only lokid, rewire to parent */
else if (victim->lokid) {
del_node(parent, victim, root, victim->lokid);
}
/* if only hikid, rewire to parent */
else if (victim->hikid) {
del_node(parent, victim, root, victim->hikid);
}
}
}
} else /* node->refcnt non-zero */
printf(" %s (refcnt: %u) not removed.\n", (char *) node->eqkid,
node->refcnt);
return victim; /* return NULL on successful free, *node otherwise */
}
/** tst_ins_del() ins/del copy or reference of 's' from ternary search tree.
* insert all nodes required for 's' in tree at eqkid node of leaf. if 'del'
* is non-zero deletes 's' from tree, otherwise insert 's' at node->eqkid
* with node->key set to the nul-character after final node in search path. if
* 'cpy' is non-zero allocate storage for 's', otherwise save pointer to 's'.
* if 's' already exists in tree, increment node->refcnt. (to be used for del).
* returns address of 's' in tree on successful insert (or on delete if refcnt
* non-zero), NULL on allocation failure on insert, or on successful removal
* of 's' from tree.
*/
void *tst_ins_del(tst_node **root, const char *s, const int del, const int cpy)
{
int diff;
const char *p = s;
tst_stack stk = {.data = {NULL}, .idx = 0};
tst_node *curr, **pcurr;
if (!root || !s)
return NULL; /* validate parameters */
if (strlen(s) + 1 > STKMAX / 2) /* limit length to 1/2 STKMAX */
return NULL; /* 128 char word length is plenty */
pcurr = root; /* start at root */
while ((curr = *pcurr)) { /* iterate to insertion node */
diff = *p - curr->key; /* get ASCII diff for >, <, = */
if (diff == 0) { /* if char equal to node->key */
if (*p++ == 0) { /* check if word is duplicate */
if (del) { /* delete instead of insert */
(curr->refcnt)--; /* decrement reference count */
/* chk refcnt, del 's', return NULL on successful del */
return tst_del_word(root, curr, &stk, cpy);
} else
curr->refcnt++; /* increment refcnt if word exists */
return (void *) curr->eqkid; /* pointer to word / NULL on del */
}
pcurr = &(curr->eqkid); /* get next eqkid pointer address */
} else if (diff < 0) { /* if char less than node->key */
pcurr = &(curr->lokid); /* get next lokid pointer address */
} else { /* if char greater than node->key */
pcurr = &(curr->hikid); /* get next hikid pointer address */
}
if (del)
tst_stack_push(&stk, curr); /* push node on stack for del */
}
if (del)
return (void *) -1;
/* if not duplicate, insert remaining chars into tree rooted at curr */
for (;;) {
/* allocate memory for node, and fill. use calloc (or include
* string.h and initialize w/memset) to avoid valgrind warning
* "Conditional jump or move depends on uninitialised value(s)"
*/
if (!(*pcurr = calloc(1, sizeof **pcurr))) {
fprintf(stderr, "error: tst_insert(), memory exhausted.\n");
return NULL;
}
curr = *pcurr;
curr->key = *p;
curr->refcnt = 1;
curr->lokid = curr->hikid = curr->eqkid = NULL;
if (!*root) /* handle assignment to root if no root */
*root = *pcurr;
/* Place nodes until end of the string, at end of stign allocate
* space for data, copy data as final eqkid, and return.
*/
if (*p++ == 0) {
if (cpy) { /* allocate storage for 's' */
const char *eqdata = strdup(s);
if (!eqdata)
return NULL;
curr->eqkid = (tst_node *) eqdata;
return (void *) eqdata;
} else { /* save pointer to 's' (allocated elsewhere) */
curr->eqkid = (tst_node *) s;
return (void *) s;
}
}
pcurr = &(curr->eqkid);
}
}
/** tst_search(), non-recursive find of a string internary tree.
* returns pointer to 's' on success, NULL otherwise.
*/
void *tst_search(const tst_node *p, const char *s)
{
const tst_node *curr = p;
while (curr) { /* loop over each char in 's' */
int diff = *s - curr->key; /* calculate the difference */
if (diff == 0) { /* handle the equal case */
if (*s == 0) /* if *s = curr->key = nul-char, 's' found */
return (void *) curr->eqkid; /* return pointer to 's' */
s++;
curr = curr->eqkid;
} else if (diff < 0) /* handle the less than case */
curr = curr->lokid;
else
curr = curr->hikid; /* handle the greater than case */
}
return NULL;
}
/** fill ptr array 'a' with strings matching prefix at node 'p'.
* the 'a' array will hold pointers to stored strings with prefix
* matching the string passed to tst_matching, ending in 'c', the
* nchr'th char in in each matched string.
*/
void tst_suggest(const tst_node *p,
const char c,
const size_t nchr,
char **a,
int *n,
const int max)
{
if (!p || *n >= max)
return;
tst_suggest(p->lokid, c, nchr, a, n, max);
if (p->key)
tst_suggest(p->eqkid, c, nchr, a, n, max);
else if (*(((char *) p->eqkid) + nchr - 1) == c && *n < max)
a[(*n)++] = (char *) p->eqkid;
tst_suggest(p->hikid, c, nchr, a, n, max);
}
/** tst_search_prefix fills ptr array 'a' with words prefixed with 's'.
* once the node containing the first prefix matching 's' is found
* tst_suggest is called to travers the ternary_tree beginning
* at the node filling 'a' with pointers to all words that contain
* the prefix upto 'max' words updating 'n' with the number of word
* in 'a'. a pointer to the first node is returned on success
* NULL otherwise.
*/
void *tst_search_prefix(const tst_node *root,
const char *s,
char **a,
int *n,
const int max)
{
const tst_node *curr = root;
const char *start = s;
if (!*s)
return NULL;
/* get length of s */
for (; *start; start++)
; /* wait */
const size_t nchr = start - s;
start = s; /* reset start to s */
*n = 0; /* initialize n - 0 */
/* Loop while we haven't hit a NULL node or returned */
while (curr) {
int diff = *s - curr->key; /* calculate the difference */
if (diff == 0) { /* handle the equal case */
/* check if prefix number of chars reached */
if ((size_t)(s - start) == nchr - 1) {
/* call tst_suggest to fill a with pointer to matching words */
tst_suggest(curr, curr->key, nchr, a, n, max);
return (void *) curr;
}
if (*s == 0) /* no matching prefix found in tree */
return (void *) curr->eqkid;
s++;
curr = curr->eqkid;
} else if (diff < 0) /* handle the less than case */
curr = curr->lokid;
else
curr = curr->hikid; /* handle the greater than case */
}
return NULL;
}
/** tst_traverse_fn(), traverse tree calling 'fn' on each word.
* prototype fonr 'fn' is void fn(const void *, void *). data can
* be NULL if unused.
*/
void tst_traverse_fn(const tst_node *p,
void(fn)(const void *, void *),
void *data)
{
if (!p)
return;
tst_traverse_fn(p->lokid, fn, data);
if (p->key)
tst_traverse_fn(p->eqkid, fn, data);
else
fn(p, data);
tst_traverse_fn(p->hikid, fn, data);
}
/** free the ternary search tree rooted at p, data storage internal. */
void tst_free_all(tst_node *p)
{
if (!p)
return;
tst_free_all(p->lokid);
if (p->key)
tst_free_all(p->eqkid);
tst_free_all(p->hikid);
if (!p->key)
free(p->eqkid);
free(p);
}
/** free the ternary search tree rooted at p, data storage external. */
void tst_free(tst_node *p)
{
if (!p)
return;
tst_free(p->lokid);
if (p->key)
tst_free(p->eqkid);
tst_free(p->hikid);
free(p);
}
/** access functions tst_get_key(), tst_get_refcnt, & tst_get_string().
* provide access to struct members through opaque pointers availale
* to program.
*/
char tst_get_key(const tst_node *node)
{
return node->key;
}
unsigned tst_get_refcnt(const tst_node *node)
{
return node->refcnt;
}
char *tst_get_string(const tst_node *node)
{
if (node && !node->key)
return (char *) node->eqkid;
return NULL;
}