source: main/branches/64_bit_Greenstone/greenstone2/common-src/indexers/mg/lib/rx.c@ 23508

Last change on this file since 23508 was 23508, checked in by sjm84, 13 years ago

Committing 64 bit changes into the branch

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 175.1 KB
Line 
1
2/* Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
3
4This file is part of the librx library.
5
6Librx is free software; you can redistribute it and/or modify it under
7the terms of the GNU Library General Public License as published by
8the Free Software Foundation; either version 2, or (at your option)
9any later version.
10
11Librx is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU Library General Public
17License along with this software; see the file COPYING.LIB. If not,
18write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA
1902139, USA. */
20
21/* NOTE!!! AIX is so losing it requires this to be the first thing in the
22 * file.
23 * Do not put ANYTHING before it!
24 */
25#if !defined (__GNUC__) && defined (_AIX)
26 #pragma alloca
27#endif
28
29/* To make linux happy? */
30#ifndef _GNU_SOURCE
31#define _GNU_SOURCE
32#endif
33
34#if HAVE_CONFIG_H
35# ifdef __WIN32__ /* [RPAP - Feb 97: WIN32 Port] */
36# include <win32cfg.h>
37# include <malloc.h>
38# else
39# include <sysfuncs.h>
40# endif
41#endif
42
43
44
45char rx_version_string[] = "GNU Rx version 0.07.1";
46
47 /* ``Too hard!''
48 * -- anon.
49 */
50
51
52
53#include <stdio.h>
54#include <ctype.h>
55#ifndef isgraph
56#define isgraph(c) (isprint (c) && !isspace (c))
57#endif
58#ifndef isblank
59#define isblank(c) ((c) == ' ' || (c) == '\t')
60#endif
61
62#include <sys/types.h>
63
64#undef MAX
65#undef MIN
66#define MAX(a, b) ((a) > (b) ? (a) : (b))
67#define MIN(a, b) ((a) < (b) ? (a) : (b))
68
69typedef char boolean;
70#define false 0
71#define true 1
72
73#ifndef __GCC__
74#undef __inline__
75#define __inline__
76#endif
77
78/* Emacs already defines alloca, sometimes. */
79#ifndef alloca
80
81/* Make alloca work the best possible way. */
82#ifdef __GNUC__
83#define alloca __builtin_alloca
84#else /* not __GNUC__ */
85#if HAVE_ALLOCA_H
86#include <alloca.h>
87#else /* not __GNUC__ or HAVE_ALLOCA_H */
88#ifndef _AIX /* Already did AIX, up at the top. */
89char *alloca ();
90#endif /* not _AIX */
91#endif /* not HAVE_ALLOCA_H */
92#endif /* not __GNUC__ */
93
94#endif /* not alloca */
95
96/* Memory management and stuff for emacs. */
97
98#define CHARBITS 8
99#define remalloc(M, S) (M ? realloc (M, S) : malloc (S))
100
101
102/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
103 * use `alloca' instead of `malloc' for the backtracking stack.
104 *
105 * Emacs will die miserably if we don't do this.
106 */
107
108#ifdef REGEX_MALLOC
109#define REGEX_ALLOCATE malloc
110#else /* not REGEX_MALLOC */
111#ifdef __WIN32__ /* [RPAP - Feb 97: WIN32 Port] */
112#define REGEX_ALLOCATE _alloca
113#else
114#define REGEX_ALLOCATE alloca
115#endif /* __WIN32__ */
116#endif /* not REGEX_MALLOC */
117
118
119#ifdef RX_WANT_RX_DEFS
120#define RX_DECL extern
121#define RX_DEF_QUAL
122#else
123#define RX_WANT_RX_DEFS
124#define RX_DECL static
125#define RX_DEF_QUAL static
126#endif
127#include "rx.h"
128#undef RX_DECL
129#define RX_DECL RX_DEF_QUAL
130
131
132#ifndef emacs
133
134#ifdef SYNTAX_TABLE
135extern char *re_syntax_table;
136#else /* not SYNTAX_TABLE */
137
138#ifdef __STDC__
139static void
140init_syntax_once (void)
141#else
142static void
143init_syntax_once ()
144#endif
145{
146 register int c;
147 static int done = 0;
148
149 if (done)
150 return;
151
152 bzero (re_syntax_table, sizeof re_syntax_table);
153
154 for (c = 'a'; c <= 'z'; c++)
155 re_syntax_table[c] = Sword;
156
157 for (c = 'A'; c <= 'Z'; c++)
158 re_syntax_table[c] = Sword;
159
160 for (c = '0'; c <= '9'; c++)
161 re_syntax_table[c] = Sword;
162
163 re_syntax_table['_'] = Sword;
164
165 done = 1;
166}
167#endif /* not SYNTAX_TABLE */
168#endif /* not emacs */
169
170
171/* Compile with `-DRX_DEBUG' and use the following flags.
172 *
173 * Debugging flags:
174 * rx_debug - print information as a regexp is compiled
175 * rx_debug_trace - print information as a regexp is executed
176 */
177
178#ifdef RX_DEBUG
179
180int rx_debug_compile = 0;
181int rx_debug_trace = 0;
182static struct re_pattern_buffer * dbug_rxb = 0;
183
184#ifdef __STDC__
185typedef void (*side_effect_printer) (struct rx *, void *, FILE *);
186#else
187typedef void (*side_effect_printer) ();
188#endif
189
190#ifdef __STDC__
191static void print_cset (struct rx *rx, rx_Bitset cset, FILE * fp);
192#else
193static void print_cset ();
194#endif
195
196#ifdef __STDC__
197static void
198print_rexp (struct rx *rx,
199 struct rexp_node *node, int depth,
200 side_effect_printer seprint, FILE * fp)
201#else
202static void
203print_rexp (rx, node, depth, seprint, fp)
204 struct rx *rx;
205 struct rexp_node *node;
206 int depth;
207 side_effect_printer seprint;
208 FILE * fp;
209#endif
210{
211 if (!node)
212 return;
213 else
214 {
215 switch (node->type)
216 {
217 case r_cset:
218 {
219 fprintf (fp, "%*s", depth, "");
220 print_cset (rx, node->params.cset, fp);
221 fputc ('\n', fp);
222 break;
223 }
224
225 case r_opt:
226 case r_star:
227 fprintf (fp, "%*s%s\n", depth, "",
228 node->type == r_opt ? "opt" : "star");
229 print_rexp (rx, node->params.pair.left, depth + 3, seprint, fp);
230 break;
231
232 case r_2phase_star:
233 fprintf (fp, "%*s2phase star\n", depth, "");
234 print_rexp (rx, node->params.pair.right, depth + 3, seprint, fp);
235 print_rexp (rx, node->params.pair.left, depth + 3, seprint, fp);
236 break;
237
238
239 case r_alternate:
240 case r_concat:
241 fprintf (fp, "%*s%s\n", depth, "",
242 node->type == r_alternate ? "alt" : "concat");
243 print_rexp (rx, node->params.pair.left, depth + 3, seprint, fp);
244 print_rexp (rx, node->params.pair.right, depth + 3, seprint, fp);
245 break;
246 case r_side_effect:
247 fprintf (fp, "%*sSide effect: ", depth, "");
248 seprint (rx, node->params.side_effect, fp);
249 fputc ('\n', fp);
250 }
251 }
252}
253
254#ifdef __STDC__
255static void
256print_nfa (struct rx * rx,
257 struct rx_nfa_state * n,
258 side_effect_printer seprint, FILE * fp)
259#else
260static void
261print_nfa (rx, n, seprint, fp)
262 struct rx * rx;
263 struct rx_nfa_state * n;
264 side_effect_printer seprint;
265 FILE * fp;
266#endif
267{
268 while (n)
269 {
270 struct rx_nfa_edge *e = n->edges;
271 struct rx_possible_future *ec = n->futures;
272 fprintf (fp, "node %d %s\n", n->id,
273 n->is_final ? "final" : (n->is_start ? "start" : ""));
274 while (e)
275 {
276 fprintf (fp, " edge to %d, ", e->dest->id);
277 switch (e->type)
278 {
279 case ne_epsilon:
280 fprintf (fp, "epsilon\n");
281 break;
282 case ne_side_effect:
283 fprintf (fp, "side effect ");
284 seprint (rx, e->params.side_effect, fp);
285 fputc ('\n', fp);
286 break;
287 case ne_cset:
288 fprintf (fp, "cset ");
289 print_cset (rx, e->params.cset, fp);
290 fputc ('\n', fp);
291 break;
292 }
293 e = e->next;
294 }
295
296 while (ec)
297 {
298 int x;
299 struct rx_nfa_state_set * s;
300 struct rx_se_list * l;
301 fprintf (fp, " eclosure to {");
302 for (s = ec->destset; s; s = s->cdr)
303 fprintf (fp, "%d ", s->car->id);
304 fprintf (fp, "} (");
305 for (l = ec->effects; l; l = l->cdr)
306 {
307 seprint (rx, l->car, fp);
308 fputc (' ', fp);
309 }
310 fprintf (fp, ")\n");
311 ec = ec->next;
312 }
313 n = n->next;
314 }
315}
316
317static char * efnames [] =
318{
319 "bogon",
320 "re_se_try",
321 "re_se_pushback",
322 "re_se_push0",
323 "re_se_pushpos",
324 "re_se_chkpos",
325 "re_se_poppos",
326 "re_se_at_dot",
327 "re_se_syntax",
328 "re_se_not_syntax",
329 "re_se_begbuf",
330 "re_se_hat",
331 "re_se_wordbeg",
332 "re_se_wordbound",
333 "re_se_notwordbound",
334 "re_se_wordend",
335 "re_se_endbuf",
336 "re_se_dollar",
337 "re_se_fail",
338};
339
340static char * efnames2[] =
341{
342 "re_se_win",
343 "re_se_lparen",
344 "re_se_rparen",
345 "re_se_backref",
346 "re_se_iter",
347 "re_se_end_iter",
348 "re_se_tv"
349};
350
351static char * inx_names[] =
352{
353 "rx_backtrack_point",
354 "rx_do_side_effects",
355 "rx_cache_miss",
356 "rx_next_char",
357 "rx_backtrack",
358 "rx_error_inx",
359 "rx_num_instructions"
360};
361
362
363#ifdef __STDC__
364static void
365re_seprint (struct rx * rx, void * effect, FILE * fp)
366#else
367static void
368re_seprint (rx, effect, fp)
369 struct rx * rx;
370 void * effect;
371 FILE * fp;
372#endif
373{
374 if ((int)effect < 0)
375 fputs (efnames[-(int)effect], fp);
376 else if (dbug_rxb)
377 {
378 struct re_se_params * p = &dbug_rxb->se_params[(int)effect];
379 fprintf (fp, "%s(%d,%d)", efnames2[p->se], p->op1, p->op2);
380 }
381 else
382 fprintf (fp, "[complex op # %d]", (int)effect);
383}
384
385
386/* These are so the regex.c regression tests will compile. */
387void
388print_compiled_pattern (rxb)
389 struct re_pattern_buffer * rxb;
390{
391}
392
393void
394print_fastmap (fm)
395 char * fm;
396{
397}
398
399#endif /* RX_DEBUG */
400
401
402
403
404/* This page: Bitsets. Completely unintersting. */
405
406#ifdef __STDC__
407RX_DECL int
408rx_bitset_is_equal (int size, rx_Bitset a, rx_Bitset b)
409#else
410RX_DECL int
411rx_bitset_is_equal (size, a, b)
412 int size;
413 rx_Bitset a;
414 rx_Bitset b;
415#endif
416{
417 int x;
418 RX_subset s = b[0];
419 b[0] = ~a[0];
420
421 for (x = rx_bitset_numb_subsets(size) - 1; a[x] == b[x]; --x)
422 ;
423
424 b[0] = s;
425 return !x && s == a[0];
426}
427
428#ifdef __STDC__
429RX_DECL int
430rx_bitset_is_subset (int size, rx_Bitset a, rx_Bitset b)
431#else
432RX_DECL int
433rx_bitset_is_subset (size, a, b)
434 int size;
435 rx_Bitset a;
436 rx_Bitset b;
437#endif
438{
439 int x = rx_bitset_numb_subsets(size) - 1;
440 while (x-- && (a[x] & b[x]) == a[x]);
441 return x == -1;
442}
443
444
445#ifdef __STDC__
446RX_DECL int
447rx_bitset_empty (int size, rx_Bitset set)
448#else
449RX_DECL int
450rx_bitset_empty (size, set)
451 int size;
452 rx_Bitset set;
453#endif
454{
455 int x;
456 RX_subset s = set[0];
457 set[0] = 1;
458 for (x = rx_bitset_numb_subsets(size) - 1; !set[x]; --x)
459 ;
460 set[0] = s;
461 return !s;
462}
463
464#ifdef __STDC__
465RX_DECL void
466rx_bitset_null (int size, rx_Bitset b)
467#else
468RX_DECL void
469rx_bitset_null (size, b)
470 int size;
471 rx_Bitset b;
472#endif
473{
474 bzero (b, rx_sizeof_bitset(size));
475}
476
477
478#ifdef __STDC__
479RX_DECL void
480rx_bitset_universe (int size, rx_Bitset b)
481#else
482RX_DECL void
483rx_bitset_universe (size, b)
484 int size;
485 rx_Bitset b;
486#endif
487{
488 int x = rx_bitset_numb_subsets (size);
489 while (x--)
490 *b++ = ~(RX_subset)0;
491}
492
493
494#ifdef __STDC__
495RX_DECL void
496rx_bitset_complement (int size, rx_Bitset b)
497#else
498RX_DECL void
499rx_bitset_complement (size, b)
500 int size;
501 rx_Bitset b;
502#endif
503{
504 int x = rx_bitset_numb_subsets (size);
505 while (x--)
506 {
507 *b = ~*b;
508 ++b;
509 }
510}
511
512
513#ifdef __STDC__
514RX_DECL void
515rx_bitset_assign (int size, rx_Bitset a, rx_Bitset b)
516#else
517RX_DECL void
518rx_bitset_assign (size, a, b)
519 int size;
520 rx_Bitset a;
521 rx_Bitset b;
522#endif
523{
524 int x;
525 for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
526 a[x] = b[x];
527}
528
529
530#ifdef __STDC__
531RX_DECL void
532rx_bitset_union (int size, rx_Bitset a, rx_Bitset b)
533#else
534RX_DECL void
535rx_bitset_union (size, a, b)
536 int size;
537 rx_Bitset a;
538 rx_Bitset b;
539#endif
540{
541 int x;
542 for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
543 a[x] |= b[x];
544}
545
546
547#ifdef __STDC__
548RX_DECL void
549rx_bitset_intersection (int size,
550 rx_Bitset a, rx_Bitset b)
551#else
552RX_DECL void
553rx_bitset_intersection (size, a, b)
554 int size;
555 rx_Bitset a;
556 rx_Bitset b;
557#endif
558{
559 int x;
560 for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
561 a[x] &= b[x];
562}
563
564
565#ifdef __STDC__
566RX_DECL void
567rx_bitset_difference (int size, rx_Bitset a, rx_Bitset b)
568#else
569RX_DECL void
570rx_bitset_difference (size, a, b)
571 int size;
572 rx_Bitset a;
573 rx_Bitset b;
574#endif
575{
576 int x;
577 for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
578 a[x] &= ~ b[x];
579}
580
581
582#ifdef __STDC__
583RX_DECL void
584rx_bitset_revdifference (int size,
585 rx_Bitset a, rx_Bitset b)
586#else
587RX_DECL void
588rx_bitset_revdifference (size, a, b)
589 int size;
590 rx_Bitset a;
591 rx_Bitset b;
592#endif
593{
594 int x;
595 for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
596 a[x] = ~a[x] & b[x];
597}
598
599#ifdef __STDC__
600RX_DECL void
601rx_bitset_xor (int size, rx_Bitset a, rx_Bitset b)
602#else
603RX_DECL void
604rx_bitset_xor (size, a, b)
605 int size;
606 rx_Bitset a;
607 rx_Bitset b;
608#endif
609{
610 int x;
611 for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
612 a[x] ^= b[x];
613}
614
615
616#ifdef __STDC__
617RX_DECL mg_u_long
618rx_bitset_hash (int size, rx_Bitset b)
619#else
620RX_DECL mg_u_long
621rx_bitset_hash (size, b)
622 int size;
623 rx_Bitset b;
624#endif
625{
626 int x;
627 uintptr_t hash = (uintptr_t)rx_bitset_hash;
628
629 for (x = rx_bitset_numb_subsets(size) - 1; x >= 0; --x)
630 hash ^= rx_bitset_subset_val(b, x);
631
632 return hash;
633}
634
635
636RX_DECL RX_subset rx_subset_singletons [RX_subset_bits] =
637{
638 0x1,
639 0x2,
640 0x4,
641 0x8,
642 0x10,
643 0x20,
644 0x40,
645 0x80,
646 0x100,
647 0x200,
648 0x400,
649 0x800,
650 0x1000,
651 0x2000,
652 0x4000,
653 0x8000,
654 0x10000,
655 0x20000,
656 0x40000,
657 0x80000,
658 0x100000,
659 0x200000,
660 0x400000,
661 0x800000,
662 0x1000000,
663 0x2000000,
664 0x4000000,
665 0x8000000,
666 0x10000000,
667 0x20000000,
668 0x40000000,
669 0x80000000
670};
671
672#ifdef RX_DEBUG
673
674#ifdef __STDC__
675static void
676print_cset (struct rx *rx, rx_Bitset cset, FILE * fp)
677#else
678static void
679print_cset (rx, cset, fp)
680 struct rx *rx;
681 rx_Bitset cset;
682 FILE * fp;
683#endif
684{
685 int x;
686 fputc ('[', fp);
687 for (x = 0; x < rx->local_cset_size; ++x)
688 if (RX_bitset_member (cset, x))
689 {
690 if (isprint(x))
691 fputc (x, fp);
692 else
693 fprintf (fp, "\\0%o ", x);
694 }
695 fputc (']', fp);
696}
697
698#endif /* RX_DEBUG */
699
700
701
702
703static mg_u_long rx_hash_masks[4] =
704{
705 0x12488421,
706 0x96699669,
707 0xbe7dd7eb,
708 0xffffffff
709};
710
711
712/* Hash tables */
713#ifdef __STDC__
714RX_DECL struct rx_hash_item *
715rx_hash_find (struct rx_hash * table,
716 mg_u_long hash,
717 void * value,
718 struct rx_hash_rules * rules)
719#else
720RX_DECL struct rx_hash_item *
721rx_hash_find (table, hash, value, rules)
722 struct rx_hash * table;
723 mg_u_long hash;
724 void * value;
725 struct rx_hash_rules * rules;
726#endif
727{
728 rx_hash_eq eq = rules->eq;
729 int maskc = 0;
730 mg_s_long mask = rx_hash_masks [0];
731 int bucket = (hash & mask) % 13;
732
733 while (table->children [bucket])
734 {
735 table = table->children [bucket];
736 ++maskc;
737 mask = rx_hash_masks[maskc];
738 bucket = (hash & mask) % 13;
739 }
740
741 {
742 struct rx_hash_item * it = table->buckets[bucket];
743 while (it)
744 if (eq (it->data, value))
745 return it;
746 else
747 it = it->next_same_hash;
748 }
749
750 return 0;
751}
752
753
754#ifdef __STDC__
755RX_DECL struct rx_hash_item *
756rx_hash_store (struct rx_hash * table,
757 mg_u_long hash,
758 void * value,
759 struct rx_hash_rules * rules)
760#else
761RX_DECL struct rx_hash_item *
762rx_hash_store (table, hash, value, rules)
763 struct rx_hash * table;
764 mg_u_long hash;
765 void * value;
766 struct rx_hash_rules * rules;
767#endif
768{
769 rx_hash_eq eq = rules->eq;
770 int maskc = 0;
771 mg_s_long mask = rx_hash_masks[0];
772 int bucket = (hash & mask) % 13;
773 int depth = 0;
774
775 while (table->children [bucket])
776 {
777 table = table->children [bucket];
778 ++maskc;
779 mask = rx_hash_masks[maskc];
780 bucket = (hash & mask) % 13;
781 ++depth;
782 }
783
784 {
785 struct rx_hash_item * it = table->buckets[bucket];
786 while (it)
787 if (eq (it->data, value))
788 return it;
789 else
790 it = it->next_same_hash;
791 }
792
793 {
794 if ( (depth < 3)
795 && (table->bucket_size [bucket] >= 4))
796 {
797 struct rx_hash * newtab = ((struct rx_hash *)
798 rules->hash_alloc (rules));
799 if (!newtab)
800 goto add_to_bucket;
801 bzero (newtab, sizeof (*newtab));
802 newtab->parent = table;
803 {
804 struct rx_hash_item * them = table->buckets[bucket];
805 mg_u_long newmask = rx_hash_masks[maskc + 1];
806 while (them)
807 {
808 struct rx_hash_item * save = them->next_same_hash;
809 int new_buck = (them->hash & newmask) % 13;
810 them->next_same_hash = newtab->buckets[new_buck];
811 newtab->buckets[new_buck] = them;
812 them->table = newtab;
813 them = save;
814 ++newtab->bucket_size[new_buck];
815 ++newtab->refs;
816 }
817 table->refs = (table->refs - table->bucket_size[bucket] + 1);
818 table->bucket_size[bucket] = 0;
819 table->buckets[bucket] = 0;
820 table->children[bucket] = newtab;
821 table = newtab;
822 bucket = (hash & newmask) % 13;
823 }
824 }
825 }
826 add_to_bucket:
827 {
828 struct rx_hash_item * it = ((struct rx_hash_item *)
829 rules->hash_item_alloc (rules, value));
830 if (!it)
831 return 0;
832 it->hash = hash;
833 it->table = table;
834 /* DATA and BINDING are to be set in hash_item_alloc */
835 it->next_same_hash = table->buckets [bucket];
836 table->buckets[bucket] = it;
837 ++table->bucket_size [bucket];
838 ++table->refs;
839 return it;
840 }
841}
842
843
844#ifdef __STDC__
845RX_DECL void
846rx_hash_free (struct rx_hash_item * it, struct rx_hash_rules * rules)
847#else
848RX_DECL void
849rx_hash_free (it, rules)
850 struct rx_hash_item * it;
851 struct rx_hash_rules * rules;
852#endif
853{
854 if (it)
855 {
856 struct rx_hash * table = it->table;
857 mg_u_long hash = it->hash;
858 int depth = (table->parent
859 ? (table->parent->parent
860 ? (table->parent->parent->parent
861 ? 3
862 : 2)
863 : 1)
864 : 0);
865 int bucket = (hash & rx_hash_masks [depth]) % 13;
866 struct rx_hash_item ** pos = &table->buckets [bucket];
867
868 while (*pos != it)
869 pos = &(*pos)->next_same_hash;
870 *pos = it->next_same_hash;
871 rules->free_hash_item (it, rules);
872 --table->bucket_size[bucket];
873 --table->refs;
874 while (!table->refs && depth)
875 {
876 struct rx_hash * save = table;
877 table = table->parent;
878 --depth;
879 bucket = (hash & rx_hash_masks [depth]) % 13;
880 --table->refs;
881 table->children[bucket] = 0;
882 rules->free_hash (save, rules);
883 }
884 }
885}
886
887#ifdef __STDC__
888RX_DECL void
889rx_free_hash_table (struct rx_hash * tab, rx_hash_freefn freefn,
890 struct rx_hash_rules * rules)
891#else
892RX_DECL void
893rx_free_hash_table (tab, freefn, rules)
894 struct rx_hash * tab;
895 rx_hash_freefn freefn;
896 struct rx_hash_rules * rules;
897#endif
898{
899 int x;
900
901 for (x = 0; x < 13; ++x)
902 if (tab->children[x])
903 {
904 rx_free_hash_table (tab->children[x], freefn, rules);
905 rules->free_hash (tab->children[x], rules);
906 }
907 else
908 {
909 struct rx_hash_item * them = tab->buckets[x];
910 while (them)
911 {
912 struct rx_hash_item * that = them;
913 them = that->next_same_hash;
914 freefn (that);
915 rules->free_hash_item (that, rules);
916 }
917 }
918}
919
920
921
922
923/* Utilities for manipulating bitset represntations of characters sets. */
924
925#ifdef __STDC__
926RX_DECL rx_Bitset
927rx_cset (struct rx *rx)
928#else
929RX_DECL rx_Bitset
930rx_cset (rx)
931 struct rx *rx;
932#endif
933{
934 rx_Bitset b = (rx_Bitset) malloc (rx_sizeof_bitset (rx->local_cset_size));
935 if (b)
936 rx_bitset_null (rx->local_cset_size, b);
937 return b;
938}
939
940
941#ifdef __STDC__
942RX_DECL rx_Bitset
943rx_copy_cset (struct rx *rx, rx_Bitset a)
944#else
945RX_DECL rx_Bitset
946rx_copy_cset (rx, a)
947 struct rx *rx;
948 rx_Bitset a;
949#endif
950{
951 rx_Bitset cs = rx_cset (rx);
952
953 if (cs)
954 rx_bitset_union (rx->local_cset_size, cs, a);
955
956 return cs;
957}
958
959
960#ifdef __STDC__
961RX_DECL void
962rx_free_cset (struct rx * rx, rx_Bitset c)
963#else
964RX_DECL void
965rx_free_cset (rx, c)
966 struct rx * rx;
967 rx_Bitset c;
968#endif
969{
970 if (c)
971 free ((char *)c);
972}
973
974
975
976/* Hash table memory allocation policy for the regexp compiler */
977
978#ifdef __STDC__
979static struct rx_hash *
980compiler_hash_alloc (struct rx_hash_rules * rules)
981#else
982static struct rx_hash *
983compiler_hash_alloc (rules)
984 struct rx_hash_rules * rules;
985#endif
986{
987 return (struct rx_hash *)malloc (sizeof (struct rx_hash));
988}
989
990
991#ifdef __STDC__
992static struct rx_hash_item *
993compiler_hash_item_alloc (struct rx_hash_rules * rules, void * value)
994#else
995static struct rx_hash_item *
996compiler_hash_item_alloc (rules, value)
997 struct rx_hash_rules * rules;
998 void * value;
999#endif
1000{
1001 struct rx_hash_item * it;
1002 it = (struct rx_hash_item *)malloc (sizeof (*it));
1003 if (it)
1004 {
1005 it->data = value;
1006 it->binding = 0;
1007 }
1008 return it;
1009}
1010
1011
1012#ifdef __STDC__
1013static void
1014compiler_free_hash (struct rx_hash * tab,
1015 struct rx_hash_rules * rules)
1016#else
1017static void
1018compiler_free_hash (tab, rules)
1019 struct rx_hash * tab;
1020 struct rx_hash_rules * rules;
1021#endif
1022{
1023 free ((char *)tab);
1024}
1025
1026
1027#ifdef __STDC__
1028static void
1029compiler_free_hash_item (struct rx_hash_item * item,
1030 struct rx_hash_rules * rules)
1031#else
1032static void
1033compiler_free_hash_item (item, rules)
1034 struct rx_hash_item * item;
1035 struct rx_hash_rules * rules;
1036#endif
1037{
1038 free ((char *)item);
1039}
1040
1041
1042
1043/* This page: REXP_NODE (expression tree) structures. */
1044
1045#ifdef __STDC__
1046RX_DECL struct rexp_node *
1047rexp_node (struct rx *rx,
1048 enum rexp_node_type type)
1049#else
1050RX_DECL struct rexp_node *
1051rexp_node (rx, type)
1052 struct rx *rx;
1053 enum rexp_node_type type;
1054#endif
1055{
1056 struct rexp_node *n;
1057
1058 n = (struct rexp_node *)malloc (sizeof (*n));
1059 bzero (n, sizeof (*n));
1060 if (n)
1061 n->type = type;
1062 return n;
1063}
1064
1065
1066/* free_rexp_node assumes that the bitset passed to rx_mk_r_cset
1067 * can be freed using rx_free_cset.
1068 */
1069#ifdef __STDC__
1070RX_DECL struct rexp_node *
1071rx_mk_r_cset (struct rx * rx,
1072 rx_Bitset b)
1073#else
1074RX_DECL struct rexp_node *
1075rx_mk_r_cset (rx, b)
1076 struct rx * rx;
1077 rx_Bitset b;
1078#endif
1079{
1080 struct rexp_node * n = rexp_node (rx, r_cset);
1081 if (n)
1082 n->params.cset = b;
1083 return n;
1084}
1085
1086
1087#ifdef __STDC__
1088RX_DECL struct rexp_node *
1089rx_mk_r_concat (struct rx * rx,
1090 struct rexp_node * a,
1091 struct rexp_node * b)
1092#else
1093RX_DECL struct rexp_node *
1094rx_mk_r_concat (rx, a, b)
1095 struct rx * rx;
1096 struct rexp_node * a;
1097 struct rexp_node * b;
1098#endif
1099{
1100 struct rexp_node * n = rexp_node (rx, r_concat);
1101 if (n)
1102 {
1103 n->params.pair.left = a;
1104 n->params.pair.right = b;
1105 }
1106 return n;
1107}
1108
1109
1110#ifdef __STDC__
1111RX_DECL struct rexp_node *
1112rx_mk_r_alternate (struct rx * rx,
1113 struct rexp_node * a,
1114 struct rexp_node * b)
1115#else
1116RX_DECL struct rexp_node *
1117rx_mk_r_alternate (rx, a, b)
1118 struct rx * rx;
1119 struct rexp_node * a;
1120 struct rexp_node * b;
1121#endif
1122{
1123 struct rexp_node * n = rexp_node (rx, r_alternate);
1124 if (n)
1125 {
1126 n->params.pair.left = a;
1127 n->params.pair.right = b;
1128 }
1129 return n;
1130}
1131
1132
1133#ifdef __STDC__
1134RX_DECL struct rexp_node *
1135rx_mk_r_opt (struct rx * rx,
1136 struct rexp_node * a)
1137#else
1138RX_DECL struct rexp_node *
1139rx_mk_r_opt (rx, a)
1140 struct rx * rx;
1141 struct rexp_node * a;
1142#endif
1143{
1144 struct rexp_node * n = rexp_node (rx, r_opt);
1145 if (n)
1146 {
1147 n->params.pair.left = a;
1148 n->params.pair.right = 0;
1149 }
1150 return n;
1151}
1152
1153
1154#ifdef __STDC__
1155RX_DECL struct rexp_node *
1156rx_mk_r_star (struct rx * rx,
1157 struct rexp_node * a)
1158#else
1159RX_DECL struct rexp_node *
1160rx_mk_r_star (rx, a)
1161 struct rx * rx;
1162 struct rexp_node * a;
1163#endif
1164{
1165 struct rexp_node * n = rexp_node (rx, r_star);
1166 if (n)
1167 {
1168 n->params.pair.left = a;
1169 n->params.pair.right = 0;
1170 }
1171 return n;
1172}
1173
1174
1175#ifdef __STDC__
1176RX_DECL struct rexp_node *
1177rx_mk_r_2phase_star (struct rx * rx,
1178 struct rexp_node * a,
1179 struct rexp_node * b)
1180#else
1181RX_DECL struct rexp_node *
1182rx_mk_r_2phase_star (rx, a, b)
1183 struct rx * rx;
1184 struct rexp_node * a;
1185 struct rexp_node * b;
1186#endif
1187{
1188 struct rexp_node * n = rexp_node (rx, r_2phase_star);
1189 if (n)
1190 {
1191 n->params.pair.left = a;
1192 n->params.pair.right = b;
1193 }
1194 return n;
1195}
1196
1197
1198#ifdef __STDC__
1199RX_DECL struct rexp_node *
1200rx_mk_r_side_effect (struct rx * rx,
1201 rx_side_effect a)
1202#else
1203RX_DECL struct rexp_node *
1204rx_mk_r_side_effect (rx, a)
1205 struct rx * rx;
1206 rx_side_effect a;
1207#endif
1208{
1209 struct rexp_node * n = rexp_node (rx, r_side_effect);
1210 if (n)
1211 {
1212 n->params.side_effect = a;
1213 n->params.pair.right = 0;
1214 }
1215 return n;
1216}
1217
1218
1219#ifdef __STDC__
1220RX_DECL struct rexp_node *
1221rx_mk_r_data (struct rx * rx,
1222 void * a)
1223#else
1224RX_DECL struct rexp_node *
1225rx_mk_r_data (rx, a)
1226 struct rx * rx;
1227 void * a;
1228#endif
1229{
1230 struct rexp_node * n = rexp_node (rx, r_data);
1231 if (n)
1232 {
1233 n->params.pair.left = a;
1234 n->params.pair.right = 0;
1235 }
1236 return n;
1237}
1238
1239
1240#ifdef __STDC__
1241RX_DECL void
1242rx_free_rexp (struct rx * rx, struct rexp_node * node)
1243#else
1244RX_DECL void
1245rx_free_rexp (rx, node)
1246 struct rx * rx;
1247 struct rexp_node * node;
1248#endif
1249{
1250 if (node)
1251 {
1252 switch (node->type)
1253 {
1254 case r_cset:
1255 if (node->params.cset)
1256 rx_free_cset (rx, node->params.cset);
1257
1258 case r_side_effect:
1259 break;
1260
1261 case r_concat:
1262 case r_alternate:
1263 case r_2phase_star:
1264 case r_opt:
1265 case r_star:
1266 rx_free_rexp (rx, node->params.pair.left);
1267 rx_free_rexp (rx, node->params.pair.right);
1268 break;
1269
1270 case r_data:
1271 /* This shouldn't occur. */
1272 break;
1273 }
1274 free ((char *)node);
1275 }
1276}
1277
1278
1279#ifdef __STDC__
1280RX_DECL struct rexp_node *
1281rx_copy_rexp (struct rx *rx,
1282 struct rexp_node *node)
1283#else
1284RX_DECL struct rexp_node *
1285rx_copy_rexp (rx, node)
1286 struct rx *rx;
1287 struct rexp_node *node;
1288#endif
1289{
1290 if (!node)
1291 return 0;
1292 else
1293 {
1294 struct rexp_node *n = rexp_node (rx, node->type);
1295 if (!n)
1296 return 0;
1297 switch (node->type)
1298 {
1299 case r_cset:
1300 n->params.cset = rx_copy_cset (rx, node->params.cset);
1301 if (!n->params.cset)
1302 {
1303 rx_free_rexp (rx, n);
1304 return 0;
1305 }
1306 break;
1307
1308 case r_side_effect:
1309 n->params.side_effect = node->params.side_effect;
1310 break;
1311
1312 case r_concat:
1313 case r_alternate:
1314 case r_opt:
1315 case r_2phase_star:
1316 case r_star:
1317 n->params.pair.left =
1318 rx_copy_rexp (rx, node->params.pair.left);
1319 n->params.pair.right =
1320 rx_copy_rexp (rx, node->params.pair.right);
1321 if ( (node->params.pair.left && !n->params.pair.left)
1322 || (node->params.pair.right && !n->params.pair.right))
1323 {
1324 rx_free_rexp (rx, n);
1325 return 0;
1326 }
1327 break;
1328 case r_data:
1329 /* shouldn't happen */
1330 break;
1331 }
1332 return n;
1333 }
1334}
1335
1336
1337
1338
1339/* This page: functions to build and destroy graphs that describe nfa's */
1340
1341/* Constructs a new nfa node. */
1342#ifdef __STDC__
1343RX_DECL struct rx_nfa_state *
1344rx_nfa_state (struct rx *rx)
1345#else
1346RX_DECL struct rx_nfa_state *
1347rx_nfa_state (rx)
1348 struct rx *rx;
1349#endif
1350{
1351 struct rx_nfa_state * n = (struct rx_nfa_state *)malloc (sizeof (*n));
1352 if (!n)
1353 return 0;
1354 bzero (n, sizeof (*n));
1355 n->next = rx->nfa_states;
1356 rx->nfa_states = n;
1357 return n;
1358}
1359
1360
1361#ifdef __STDC__
1362RX_DECL void
1363rx_free_nfa_state (struct rx_nfa_state * n)
1364#else
1365RX_DECL void
1366rx_free_nfa_state (n)
1367 struct rx_nfa_state * n;
1368#endif
1369{
1370 free ((char *)n);
1371}
1372
1373
1374/* This looks up an nfa node, given a numeric id. Numeric id's are
1375 * assigned after the nfa has been built.
1376 */
1377#ifdef __STDC__
1378RX_DECL struct rx_nfa_state *
1379rx_id_to_nfa_state (struct rx * rx,
1380 int id)
1381#else
1382RX_DECL struct rx_nfa_state *
1383rx_id_to_nfa_state (rx, id)
1384 struct rx * rx;
1385 int id;
1386#endif
1387{
1388 struct rx_nfa_state * n;
1389 for (n = rx->nfa_states; n; n = n->next)
1390 if (n->id == id)
1391 return n;
1392 return 0;
1393}
1394
1395
1396/* This adds an edge between two nodes, but doesn't initialize the
1397 * edge label.
1398 */
1399
1400#ifdef __STDC__
1401RX_DECL struct rx_nfa_edge *
1402rx_nfa_edge (struct rx *rx,
1403 enum rx_nfa_etype type,
1404 struct rx_nfa_state *start,
1405 struct rx_nfa_state *dest)
1406#else
1407RX_DECL struct rx_nfa_edge *
1408rx_nfa_edge (rx, type, start, dest)
1409 struct rx *rx;
1410 enum rx_nfa_etype type;
1411 struct rx_nfa_state *start;
1412 struct rx_nfa_state *dest;
1413#endif
1414{
1415 struct rx_nfa_edge *e;
1416 e = (struct rx_nfa_edge *)malloc (sizeof (*e));
1417 if (!e)
1418 return 0;
1419 e->next = start->edges;
1420 start->edges = e;
1421 e->type = type;
1422 e->dest = dest;
1423 return e;
1424}
1425
1426
1427#ifdef __STDC__
1428RX_DECL void
1429rx_free_nfa_edge (struct rx_nfa_edge * e)
1430#else
1431RX_DECL void
1432rx_free_nfa_edge (e)
1433 struct rx_nfa_edge * e;
1434#endif
1435{
1436 free ((char *)e);
1437}
1438
1439
1440/* This constructs a POSSIBLE_FUTURE, which is a kind epsilon-closure
1441 * of an NFA. These are added to an nfa automaticly by eclose_nfa.
1442 */
1443
1444#ifdef __STDC__
1445static struct rx_possible_future *
1446rx_possible_future (struct rx * rx,
1447 struct rx_se_list * effects)
1448#else
1449static struct rx_possible_future *
1450rx_possible_future (rx, effects)
1451 struct rx * rx;
1452 struct rx_se_list * effects;
1453#endif
1454{
1455 struct rx_possible_future *ec;
1456 ec = (struct rx_possible_future *) malloc (sizeof (*ec));
1457 if (!ec)
1458 return 0;
1459 ec->destset = 0;
1460 ec->next = 0;
1461 ec->effects = effects;
1462 return ec;
1463}
1464
1465
1466#ifdef __STDC__
1467static void
1468rx_free_possible_future (struct rx_possible_future * pf)
1469#else
1470static void
1471rx_free_possible_future (pf)
1472 struct rx_possible_future * pf;
1473#endif
1474{
1475 free ((char *)pf);
1476}
1477
1478
1479#ifdef __STDC__
1480RX_DECL void
1481rx_free_nfa (struct rx *rx)
1482#else
1483RX_DECL void
1484rx_free_nfa (rx)
1485 struct rx *rx;
1486#endif
1487{
1488 while (rx->nfa_states)
1489 {
1490 while (rx->nfa_states->edges)
1491 {
1492 switch (rx->nfa_states->edges->type)
1493 {
1494 case ne_cset:
1495 rx_free_cset (rx, rx->nfa_states->edges->params.cset);
1496 break;
1497 default:
1498 break;
1499 }
1500 {
1501 struct rx_nfa_edge * e;
1502 e = rx->nfa_states->edges;
1503 rx->nfa_states->edges = rx->nfa_states->edges->next;
1504 rx_free_nfa_edge (e);
1505 }
1506 } /* while (rx->nfa_states->edges) */
1507 {
1508 /* Iterate over the partial epsilon closures of rx->nfa_states */
1509 struct rx_possible_future * pf = rx->nfa_states->futures;
1510 while (pf)
1511 {
1512 struct rx_possible_future * pft = pf;
1513 pf = pf->next;
1514 rx_free_possible_future (pft);
1515 }
1516 }
1517 {
1518 struct rx_nfa_state *n;
1519 n = rx->nfa_states;
1520 rx->nfa_states = rx->nfa_states->next;
1521 rx_free_nfa_state (n);
1522 }
1523 }
1524}
1525
1526
1527
1528
1529/* This page: translating a pattern expression into an nfa and doing the
1530 * static part of the nfa->super-nfa translation.
1531 */
1532
1533/* This is the thompson regexp->nfa algorithm.
1534 * It is modified to allow for `side-effect epsilons.' Those are
1535 * edges that are taken whenever a similar epsilon edge would be,
1536 * but which imply that some side effect occurs when the edge
1537 * is taken.
1538 *
1539 * Side effects are used to model parts of the pattern langauge
1540 * that are not regular (in the formal sense).
1541 */
1542
1543#ifdef __STDC__
1544RX_DECL int
1545rx_build_nfa (struct rx *rx,
1546 struct rexp_node *rexp,
1547 struct rx_nfa_state **start,
1548 struct rx_nfa_state **end)
1549#else
1550RX_DECL int
1551rx_build_nfa (rx, rexp, start, end)
1552 struct rx *rx;
1553 struct rexp_node *rexp;
1554 struct rx_nfa_state **start;
1555 struct rx_nfa_state **end;
1556#endif
1557{
1558 struct rx_nfa_edge *edge;
1559
1560 /* Start & end nodes may have been allocated by the caller. */
1561 *start = *start ? *start : rx_nfa_state (rx);
1562
1563 if (!*start)
1564 return 0;
1565
1566 if (!rexp)
1567 {
1568 *end = *start;
1569 return 1;
1570 }
1571
1572 *end = *end ? *end : rx_nfa_state (rx);
1573
1574 if (!*end)
1575 {
1576 rx_free_nfa_state (*start);
1577 return 0;
1578 }
1579
1580 switch (rexp->type)
1581 {
1582 case r_data:
1583 return 0;
1584
1585 case r_cset:
1586 edge = rx_nfa_edge (rx, ne_cset, *start, *end);
1587 if (!edge)
1588 return 0;
1589 edge->params.cset = rx_copy_cset (rx, rexp->params.cset);
1590 if (!edge->params.cset)
1591 {
1592 rx_free_nfa_edge (edge);
1593 return 0;
1594 }
1595 return 1;
1596
1597 case r_opt:
1598 return (rx_build_nfa (rx, rexp->params.pair.left, start, end)
1599 && rx_nfa_edge (rx, ne_epsilon, *start, *end));
1600
1601 case r_star:
1602 {
1603 struct rx_nfa_state * star_start = 0;
1604 struct rx_nfa_state * star_end = 0;
1605 return (rx_build_nfa (rx, rexp->params.pair.left,
1606 &star_start, &star_end)
1607 && star_start
1608 && star_end
1609 && rx_nfa_edge (rx, ne_epsilon, star_start, star_end)
1610 && rx_nfa_edge (rx, ne_epsilon, *start, star_start)
1611 && rx_nfa_edge (rx, ne_epsilon, star_end, *end)
1612
1613 && rx_nfa_edge (rx, ne_epsilon, star_end, star_start));
1614 }
1615
1616 case r_2phase_star:
1617 {
1618 struct rx_nfa_state * star_start = 0;
1619 struct rx_nfa_state * star_end = 0;
1620 struct rx_nfa_state * loop_exp_start = 0;
1621 struct rx_nfa_state * loop_exp_end = 0;
1622
1623 return (rx_build_nfa (rx, rexp->params.pair.left,
1624 &star_start, &star_end)
1625 && rx_build_nfa (rx, rexp->params.pair.right,
1626 &loop_exp_start, &loop_exp_end)
1627 && star_start
1628 && star_end
1629 && loop_exp_end
1630 && loop_exp_start
1631 && rx_nfa_edge (rx, ne_epsilon, star_start, *end)
1632 && rx_nfa_edge (rx, ne_epsilon, *start, star_start)
1633 && rx_nfa_edge (rx, ne_epsilon, star_end, *end)
1634
1635 && rx_nfa_edge (rx, ne_epsilon, star_end, loop_exp_start)
1636 && rx_nfa_edge (rx, ne_epsilon, loop_exp_end, star_start));
1637 }
1638
1639
1640 case r_concat:
1641 {
1642 struct rx_nfa_state *shared = 0;
1643 return
1644 (rx_build_nfa (rx, rexp->params.pair.left, start, &shared)
1645 && rx_build_nfa (rx, rexp->params.pair.right, &shared, end));
1646 }
1647
1648 case r_alternate:
1649 {
1650 struct rx_nfa_state *ls = 0;
1651 struct rx_nfa_state *le = 0;
1652 struct rx_nfa_state *rs = 0;
1653 struct rx_nfa_state *re = 0;
1654 return (rx_build_nfa (rx, rexp->params.pair.left, &ls, &le)
1655 && rx_build_nfa (rx, rexp->params.pair.right, &rs, &re)
1656 && rx_nfa_edge (rx, ne_epsilon, *start, ls)
1657 && rx_nfa_edge (rx, ne_epsilon, *start, rs)
1658 && rx_nfa_edge (rx, ne_epsilon, le, *end)
1659 && rx_nfa_edge (rx, ne_epsilon, re, *end));
1660 }
1661
1662 case r_side_effect:
1663 edge = rx_nfa_edge (rx, ne_side_effect, *start, *end);
1664 if (!edge)
1665 return 0;
1666 edge->params.side_effect = rexp->params.side_effect;
1667 return 1;
1668 }
1669
1670 /* this should never happen */
1671 return 0;
1672}
1673
1674
1675/* RX_NAME_NFA_STATES identifies all nodes with outgoing non-epsilon
1676 * transitions. Only these nodes can occur in super-states.
1677 * All nodes are given an integer id.
1678 * The id is non-negative if the node has non-epsilon out-transitions, negative
1679 * otherwise (this is because we want the non-negative ids to be used as
1680 * array indexes in a few places).
1681 */
1682
1683#ifdef __STDC__
1684RX_DECL void
1685rx_name_nfa_states (struct rx *rx)
1686#else
1687RX_DECL void
1688rx_name_nfa_states (rx)
1689 struct rx *rx;
1690#endif
1691{
1692 struct rx_nfa_state *n = rx->nfa_states;
1693
1694 rx->nodec = 0;
1695 rx->epsnodec = -1;
1696
1697 while (n)
1698 {
1699 struct rx_nfa_edge *e = n->edges;
1700
1701 if (n->is_start)
1702 n->eclosure_needed = 1;
1703
1704 while (e)
1705 {
1706 switch (e->type)
1707 {
1708 case ne_epsilon:
1709 case ne_side_effect:
1710 break;
1711
1712 case ne_cset:
1713 n->id = rx->nodec++;
1714 {
1715 struct rx_nfa_edge *from_n = n->edges;
1716 while (from_n)
1717 {
1718 from_n->dest->eclosure_needed = 1;
1719 from_n = from_n->next;
1720 }
1721 }
1722 goto cont;
1723 }
1724 e = e->next;
1725 }
1726 n->id = rx->epsnodec--;
1727 cont:
1728 n = n->next;
1729 }
1730 rx->epsnodec = -rx->epsnodec;
1731}
1732
1733
1734
1735/* This page: data structures for the static part of the nfa->supernfa
1736 * translation.
1737 *
1738 * There are side effect lists -- lists of side effects occuring
1739 * along an uninterrupted, acyclic path of side-effect epsilon edges.
1740 * Such paths are collapsed to single edges in the course of computing
1741 * epsilon closures. Such single edges are labled with a list of all
1742 * the side effects entailed in crossing them. Like lists of side
1743 * effects are made == by the constructors below.
1744 *
1745 * There are also nfa state sets. These are used to hold a list of all
1746 * states reachable from a starting state for a given type of transition
1747 * and side effect list. These are also hash-consed.
1748 */
1749
1750/* The next several functions compare, construct, etc. lists of side
1751 * effects. See ECLOSE_NFA (below) for details.
1752 */
1753
1754/* Ordering of rx_se_list
1755 * (-1, 0, 1 return value convention).
1756 */
1757
1758#ifdef __STDC__
1759static int
1760se_list_cmp (void * va, void * vb)
1761#else
1762static int
1763se_list_cmp (va, vb)
1764 void * va;
1765 void * vb;
1766#endif
1767{
1768 struct rx_se_list * a = (struct rx_se_list *)va;
1769 struct rx_se_list * b = (struct rx_se_list *)vb;
1770
1771 return ((va == vb)
1772 ? 0
1773 : (!va
1774 ? -1
1775 : (!vb
1776 ? 1
1777 : ((intptr_t)a->car < (intptr_t)b->car
1778 ? 1
1779 : ((intptr_t)a->car > (intptr_t)b->car
1780 ? -1
1781 : se_list_cmp ((void *)a->cdr, (void *)b->cdr))))));
1782}
1783
1784
1785#ifdef __STDC__
1786static int
1787se_list_equal (void * va, void * vb)
1788#else
1789static int
1790se_list_equal (va, vb)
1791 void * va;
1792 void * vb;
1793#endif
1794{
1795 return !(se_list_cmp (va, vb));
1796}
1797
1798static struct rx_hash_rules se_list_hash_rules =
1799{
1800 se_list_equal,
1801 compiler_hash_alloc,
1802 compiler_free_hash,
1803 compiler_hash_item_alloc,
1804 compiler_free_hash_item
1805};
1806
1807
1808#ifdef __STDC__
1809static struct rx_se_list *
1810side_effect_cons (struct rx * rx,
1811 void * se, struct rx_se_list * list)
1812#else
1813static struct rx_se_list *
1814side_effect_cons (rx, se, list)
1815 struct rx * rx;
1816 void * se;
1817 struct rx_se_list * list;
1818#endif
1819{
1820 struct rx_se_list * l;
1821 l = ((struct rx_se_list *) malloc (sizeof (*l)));
1822 if (!l)
1823 return 0;
1824 l->car = se;
1825 l->cdr = list;
1826 return l;
1827}
1828
1829
1830#ifdef __STDC__
1831static struct rx_se_list *
1832hash_cons_se_prog (struct rx * rx,
1833 struct rx_hash * memo,
1834 void * car, struct rx_se_list * cdr)
1835#else
1836static struct rx_se_list *
1837hash_cons_se_prog (rx, memo, car, cdr)
1838 struct rx * rx;
1839 struct rx_hash * memo;
1840 void * car;
1841 struct rx_se_list * cdr;
1842#endif
1843{
1844 intptr_t hash = (intptr_t)car ^ (intptr_t)cdr;
1845 struct rx_se_list template;
1846
1847 template.car = car;
1848 template.cdr = cdr;
1849 {
1850 struct rx_hash_item * it = rx_hash_store (memo, hash,
1851 (void *)&template,
1852 &se_list_hash_rules);
1853 if (!it)
1854 return 0;
1855 if (it->data == (void *)&template)
1856 {
1857 struct rx_se_list * consed;
1858 consed = (struct rx_se_list *) malloc (sizeof (*consed));
1859 *consed = template;
1860 it->data = (void *)consed;
1861 }
1862 return (struct rx_se_list *)it->data;
1863 }
1864}
1865
1866
1867#ifdef __STDC__
1868static struct rx_se_list *
1869hash_se_prog (struct rx * rx, struct rx_hash * memo, struct rx_se_list * prog)
1870#else
1871static struct rx_se_list *
1872hash_se_prog (rx, memo, prog)
1873 struct rx * rx;
1874 struct rx_hash * memo;
1875 struct rx_se_list * prog;
1876#endif
1877{
1878 struct rx_se_list * answer = 0;
1879 while (prog)
1880 {
1881 answer = hash_cons_se_prog (rx, memo, prog->car, answer);
1882 if (!answer)
1883 return 0;
1884 prog = prog->cdr;
1885 }
1886 return answer;
1887}
1888
1889#ifdef __STDC__
1890static int
1891nfa_set_cmp (void * va, void * vb)
1892#else
1893static int
1894nfa_set_cmp (va, vb)
1895 void * va;
1896 void * vb;
1897#endif
1898{
1899 struct rx_nfa_state_set * a = (struct rx_nfa_state_set *)va;
1900 struct rx_nfa_state_set * b = (struct rx_nfa_state_set *)vb;
1901
1902 return ((va == vb)
1903 ? 0
1904 : (!va
1905 ? -1
1906 : (!vb
1907 ? 1
1908 : (a->car->id < b->car->id
1909 ? 1
1910 : (a->car->id > b->car->id
1911 ? -1
1912 : nfa_set_cmp ((void *)a->cdr, (void *)b->cdr))))));
1913}
1914
1915#ifdef __STDC__
1916static int
1917nfa_set_equal (void * va, void * vb)
1918#else
1919static int
1920nfa_set_equal (va, vb)
1921 void * va;
1922 void * vb;
1923#endif
1924{
1925 return !nfa_set_cmp (va, vb);
1926}
1927
1928static struct rx_hash_rules nfa_set_hash_rules =
1929{
1930 nfa_set_equal,
1931 compiler_hash_alloc,
1932 compiler_free_hash,
1933 compiler_hash_item_alloc,
1934 compiler_free_hash_item
1935};
1936
1937
1938#ifdef __STDC__
1939static struct rx_nfa_state_set *
1940nfa_set_cons (struct rx * rx,
1941 struct rx_hash * memo, struct rx_nfa_state * state,
1942 struct rx_nfa_state_set * set)
1943#else
1944static struct rx_nfa_state_set *
1945nfa_set_cons (rx, memo, state, set)
1946 struct rx * rx;
1947 struct rx_hash * memo;
1948 struct rx_nfa_state * state;
1949 struct rx_nfa_state_set * set;
1950#endif
1951{
1952 struct rx_nfa_state_set template;
1953 struct rx_hash_item * node;
1954 template.car = state;
1955 template.cdr = set;
1956 node = rx_hash_store (memo,
1957 (((intptr_t)state) >> 8) ^ (intptr_t)set,
1958 &template, &nfa_set_hash_rules);
1959 if (!node)
1960 return 0;
1961 if (node->data == &template)
1962 {
1963 struct rx_nfa_state_set * l;
1964 l = (struct rx_nfa_state_set *) malloc (sizeof (*l));
1965 node->data = (void *) l;
1966 if (!l)
1967 return 0;
1968 *l = template;
1969 }
1970 return (struct rx_nfa_state_set *)node->data;
1971}
1972
1973
1974#ifdef __STDC__
1975static struct rx_nfa_state_set *
1976nfa_set_enjoin (struct rx * rx,
1977 struct rx_hash * memo, struct rx_nfa_state * state,
1978 struct rx_nfa_state_set * set)
1979#else
1980static struct rx_nfa_state_set *
1981nfa_set_enjoin (rx, memo, state, set)
1982 struct rx * rx;
1983 struct rx_hash * memo;
1984 struct rx_nfa_state * state;
1985 struct rx_nfa_state_set * set;
1986#endif
1987{
1988 if (!set || state->id < set->car->id)
1989 return nfa_set_cons (rx, memo, state, set);
1990 if (state->id == set->car->id)
1991 return set;
1992 else
1993 {
1994 struct rx_nfa_state_set * newcdr
1995 = nfa_set_enjoin (rx, memo, state, set->cdr);
1996 if (newcdr != set->cdr)
1997 set = nfa_set_cons (rx, memo, set->car, newcdr);
1998 return set;
1999 }
2000}
2001
2002
2003
2004
2005/* This page: computing epsilon closures. The closures aren't total.
2006 * Each node's closures are partitioned according to the side effects entailed
2007 * along the epsilon edges. Return true on success.
2008 */
2009
2010struct eclose_frame
2011{
2012 struct rx_se_list *prog_backwards;
2013};
2014
2015
2016#ifdef __STDC__
2017static int
2018eclose_node (struct rx *rx, struct rx_nfa_state *outnode,
2019 struct rx_nfa_state *node, struct eclose_frame *frame)
2020#else
2021static int
2022eclose_node (rx, outnode, node, frame)
2023 struct rx *rx;
2024 struct rx_nfa_state *outnode;
2025 struct rx_nfa_state *node;
2026 struct eclose_frame *frame;
2027#endif
2028{
2029 struct rx_nfa_edge *e = node->edges;
2030
2031 /* For each node, we follow all epsilon paths to build the closure.
2032 * The closure omits nodes that have only epsilon edges.
2033 * The closure is split into partial closures -- all the states in
2034 * a partial closure are reached by crossing the same list of
2035 * of side effects (though not necessarily the same path).
2036 */
2037 if (node->mark)
2038 return 1;
2039 node->mark = 1;
2040
2041 if (node->id >= 0 || node->is_final)
2042 {
2043 struct rx_possible_future **ec;
2044 struct rx_se_list * prog_in_order
2045 = ((struct rx_se_list *)hash_se_prog (rx,
2046 &rx->se_list_memo,
2047 frame->prog_backwards));
2048 int cmp;
2049
2050 ec = &outnode->futures;
2051
2052 while (*ec)
2053 {
2054 cmp = se_list_cmp ((void *)(*ec)->effects, (void *)prog_in_order);
2055 if (cmp <= 0)
2056 break;
2057 ec = &(*ec)->next;
2058 }
2059 if (!*ec || (cmp < 0))
2060 {
2061 struct rx_possible_future * saved = *ec;
2062 *ec = rx_possible_future (rx, prog_in_order);
2063 (*ec)->next = saved;
2064 if (!*ec)
2065 return 0;
2066 }
2067 if (node->id >= 0)
2068 {
2069 (*ec)->destset = nfa_set_enjoin (rx, &rx->set_list_memo,
2070 node, (*ec)->destset);
2071 if (!(*ec)->destset)
2072 return 0;
2073 }
2074 }
2075
2076 while (e)
2077 {
2078 switch (e->type)
2079 {
2080 case ne_epsilon:
2081 if (!eclose_node (rx, outnode, e->dest, frame))
2082 return 0;
2083 break;
2084 case ne_side_effect:
2085 {
2086 frame->prog_backwards = side_effect_cons (rx,
2087 e->params.side_effect,
2088 frame->prog_backwards);
2089 if (!frame->prog_backwards)
2090 return 0;
2091 if (!eclose_node (rx, outnode, e->dest, frame))
2092 return 0;
2093 {
2094 struct rx_se_list * dying = frame->prog_backwards;
2095 frame->prog_backwards = frame->prog_backwards->cdr;
2096 free ((char *)dying);
2097 }
2098 break;
2099 }
2100 default:
2101 break;
2102 }
2103 e = e->next;
2104 }
2105 node->mark = 0;
2106 return 1;
2107}
2108
2109
2110#ifdef __STDC__
2111RX_DECL int
2112rx_eclose_nfa (struct rx *rx)
2113#else
2114RX_DECL int
2115rx_eclose_nfa (rx)
2116 struct rx *rx;
2117#endif
2118{
2119 struct rx_nfa_state *n = rx->nfa_states;
2120 struct eclose_frame frame;
2121 static int rx_id = 0;
2122
2123 frame.prog_backwards = 0;
2124 rx->rx_id = rx_id++;
2125 bzero (&rx->se_list_memo, sizeof (rx->se_list_memo));
2126 bzero (&rx->set_list_memo, sizeof (rx->set_list_memo));
2127 while (n)
2128 {
2129 n->futures = 0;
2130 if (n->eclosure_needed && !eclose_node (rx, n, n, &frame))
2131 return 0;
2132 /* clear_marks (rx); */
2133 n = n->next;
2134 }
2135 return 1;
2136}
2137
2138
2139/* This deletes epsilon edges from an NFA. After running eclose_node,
2140 * we have no more need for these edges. They are removed to simplify
2141 * further operations on the NFA.
2142 */
2143
2144#ifdef __STDC__
2145RX_DECL void
2146rx_delete_epsilon_transitions (struct rx *rx)
2147#else
2148RX_DECL void
2149rx_delete_epsilon_transitions (rx)
2150 struct rx *rx;
2151#endif
2152{
2153 struct rx_nfa_state *n = rx->nfa_states;
2154 struct rx_nfa_edge **e;
2155
2156 while (n)
2157 {
2158 e = &n->edges;
2159 while (*e)
2160 {
2161 struct rx_nfa_edge *t;
2162 switch ((*e)->type)
2163 {
2164 case ne_epsilon:
2165 case ne_side_effect:
2166 t = *e;
2167 *e = t->next;
2168 rx_free_nfa_edge (t);
2169 break;
2170
2171 default:
2172 e = &(*e)->next;
2173 break;
2174 }
2175 }
2176 n = n->next;
2177 }
2178}
2179
2180
2181
2182/* This page: storing the nfa in a contiguous region of memory for
2183 * subsequent conversion to a super-nfa.
2184 */
2185
2186/* This is for qsort on an array of nfa_states. The order
2187 * is based on state ids and goes
2188 * [0...MAX][MIN..-1] where (MAX>=0) and (MIN<0)
2189 * This way, positive ids double as array indices.
2190 */
2191
2192#ifdef __STDC__
2193static int
2194nfacmp (void * va, void * vb)
2195#else
2196static int
2197nfacmp (va, vb)
2198 void * va;
2199 void * vb;
2200#endif
2201{
2202 struct rx_nfa_state **a = (struct rx_nfa_state **)va;
2203 struct rx_nfa_state **b = (struct rx_nfa_state **)vb;
2204 return (*a == *b /* &&&& 3.18 */
2205 ? 0
2206 : (((*a)->id < 0) == ((*b)->id < 0)
2207 ? (((*a)->id < (*b)->id) ? -1 : 1)
2208 : (((*a)->id < 0)
2209 ? 1 : -1)));
2210}
2211
2212#ifdef __STDC__
2213static int
2214count_hash_nodes (struct rx_hash * st)
2215#else
2216static int
2217count_hash_nodes (st)
2218 struct rx_hash * st;
2219#endif
2220{
2221 int x;
2222 int count = 0;
2223 for (x = 0; x < 13; ++x)
2224 count += ((st->children[x])
2225 ? count_hash_nodes (st->children[x])
2226 : st->bucket_size[x]);
2227
2228 return count;
2229}
2230
2231
2232#ifdef __STDC__
2233static void
2234se_memo_freer (struct rx_hash_item * node)
2235#else
2236static void
2237se_memo_freer (node)
2238 struct rx_hash_item * node;
2239#endif
2240{
2241 free ((char *)node->data);
2242}
2243
2244
2245#ifdef __STDC__
2246static void
2247nfa_set_freer (struct rx_hash_item * node)
2248#else
2249static void
2250nfa_set_freer (node)
2251 struct rx_hash_item * node;
2252#endif
2253{
2254 free ((char *)node->data);
2255}
2256
2257
2258/* This copies an entire NFA into a single malloced block of memory.
2259 * Mostly this is for compatability with regex.c, though it is convenient
2260 * to have the nfa nodes in an array.
2261 */
2262
2263#ifdef __STDC__
2264RX_DECL int
2265rx_compactify_nfa (struct rx *rx,
2266 void **mem, mg_u_long *size)
2267#else
2268RX_DECL int
2269rx_compactify_nfa (rx, mem, size)
2270 struct rx *rx;
2271 void **mem;
2272 mg_u_long *size;
2273#endif
2274{
2275 int total_nodec;
2276 struct rx_nfa_state *n;
2277 int edgec = 0;
2278 int eclosec = 0;
2279 int se_list_consc = count_hash_nodes (&rx->se_list_memo);
2280 int nfa_setc = count_hash_nodes (&rx->set_list_memo);
2281 mg_u_long total_size;
2282
2283 /* This takes place in two stages. First, the total size of the
2284 * nfa is computed, then structures are copied.
2285 */
2286 n = rx->nfa_states;
2287 total_nodec = 0;
2288 while (n)
2289 {
2290 struct rx_nfa_edge *e = n->edges;
2291 struct rx_possible_future *ec = n->futures;
2292 ++total_nodec;
2293 while (e)
2294 {
2295 ++edgec;
2296 e = e->next;
2297 }
2298 while (ec)
2299 {
2300 ++eclosec;
2301 ec = ec->next;
2302 }
2303 n = n->next;
2304 }
2305
2306 total_size = (total_nodec * sizeof (struct rx_nfa_state)
2307 + edgec * rx_sizeof_bitset (rx->local_cset_size)
2308 + edgec * sizeof (struct rx_nfa_edge)
2309 + nfa_setc * sizeof (struct rx_nfa_state_set)
2310 + eclosec * sizeof (struct rx_possible_future)
2311 + se_list_consc * sizeof (struct rx_se_list)
2312 + rx->reserved);
2313
2314 if (total_size > *size)
2315 {
2316 *mem = remalloc (*mem, total_size);
2317 if (*mem)
2318 *size = total_size;
2319 else
2320 return 0;
2321 }
2322 /* Now we've allocated the memory; this copies the NFA. */
2323 {
2324 static struct rx_nfa_state **scratch = 0;
2325 static int scratch_alloc = 0;
2326 struct rx_nfa_state *state_base = (struct rx_nfa_state *) * mem;
2327 struct rx_nfa_state *new_state = state_base;
2328 struct rx_nfa_edge *new_edge =
2329 (struct rx_nfa_edge *)
2330 ((char *) state_base + total_nodec * sizeof (struct rx_nfa_state));
2331 struct rx_se_list * new_se_list =
2332 (struct rx_se_list *)
2333 ((char *)new_edge + edgec * sizeof (struct rx_nfa_edge));
2334 struct rx_possible_future *new_close =
2335 ((struct rx_possible_future *)
2336 ((char *) new_se_list
2337 + se_list_consc * sizeof (struct rx_se_list)));
2338 struct rx_nfa_state_set * new_nfa_set =
2339 ((struct rx_nfa_state_set *)
2340 ((char *)new_close + eclosec * sizeof (struct rx_possible_future)));
2341 char *new_bitset =
2342 ((char *) new_nfa_set + nfa_setc * sizeof (struct rx_nfa_state_set));
2343 int x;
2344 struct rx_nfa_state *n;
2345
2346 if (scratch_alloc < total_nodec)
2347 {
2348 scratch = ((struct rx_nfa_state **)
2349 remalloc (scratch, total_nodec * sizeof (*scratch)));
2350 if (scratch)
2351 scratch_alloc = total_nodec;
2352 else
2353 {
2354 scratch_alloc = 0;
2355 return 0;
2356 }
2357 }
2358
2359 for (x = 0, n = rx->nfa_states; n; n = n->next)
2360 scratch[x++] = n;
2361
2362 qsort (scratch, total_nodec,
2363 sizeof (struct rx_nfa_state *), (int (*)())nfacmp);
2364
2365 for (x = 0; x < total_nodec; ++x)
2366 {
2367 struct rx_possible_future *eclose = scratch[x]->futures;
2368 struct rx_nfa_edge *edge = scratch[x]->edges;
2369 struct rx_nfa_state *cn = new_state++;
2370 cn->futures = 0;
2371 cn->edges = 0;
2372 cn->next = (x == total_nodec - 1) ? 0 : (cn + 1);
2373 cn->id = scratch[x]->id;
2374 cn->is_final = scratch[x]->is_final;
2375 cn->is_start = scratch[x]->is_start;
2376 cn->mark = 0;
2377 while (edge)
2378 {
2379 int indx = (edge->dest->id < 0
2380 ? (total_nodec + edge->dest->id)
2381 : edge->dest->id);
2382 struct rx_nfa_edge *e = new_edge++;
2383 rx_Bitset cset = (rx_Bitset) new_bitset;
2384 new_bitset += rx_sizeof_bitset (rx->local_cset_size);
2385 rx_bitset_null (rx->local_cset_size, cset);
2386 rx_bitset_union (rx->local_cset_size, cset, edge->params.cset);
2387 e->next = cn->edges;
2388 cn->edges = e;
2389 e->type = edge->type;
2390 e->dest = state_base + indx;
2391 e->params.cset = cset;
2392 edge = edge->next;
2393 }
2394 while (eclose)
2395 {
2396 struct rx_possible_future *ec = new_close++;
2397 struct rx_hash_item * sp;
2398 struct rx_se_list ** sepos;
2399 struct rx_se_list * sesrc;
2400 struct rx_nfa_state_set * destlst;
2401 struct rx_nfa_state_set ** destpos;
2402 ec->next = cn->futures;
2403 cn->futures = ec;
2404 for (sepos = &ec->effects, sesrc = eclose->effects;
2405 sesrc;
2406 sesrc = sesrc->cdr, sepos = &(*sepos)->cdr)
2407 {
2408 sp = rx_hash_find (&rx->se_list_memo,
2409 (intptr_t)sesrc->car ^ (intptr_t)sesrc->cdr,
2410 sesrc, &se_list_hash_rules);
2411 if (sp->binding)
2412 {
2413 sesrc = (struct rx_se_list *)sp->binding;
2414 break;
2415 }
2416 *new_se_list = *sesrc;
2417 sp->binding = (void *)new_se_list;
2418 *sepos = new_se_list;
2419 ++new_se_list;
2420 }
2421 *sepos = sesrc;
2422 for (destpos = &ec->destset, destlst = eclose->destset;
2423 destlst;
2424 destpos = &(*destpos)->cdr, destlst = destlst->cdr)
2425 {
2426 sp = rx_hash_find (&rx->set_list_memo,
2427 ((((intptr_t)destlst->car) >> 8)
2428 ^ (intptr_t)destlst->cdr),
2429 destlst, &nfa_set_hash_rules);
2430 if (sp->binding)
2431 {
2432 destlst = (struct rx_nfa_state_set *)sp->binding;
2433 break;
2434 }
2435 *new_nfa_set = *destlst;
2436 new_nfa_set->car = state_base + destlst->car->id;
2437 sp->binding = (void *)new_nfa_set;
2438 *destpos = new_nfa_set;
2439 ++new_nfa_set;
2440 }
2441 *destpos = destlst;
2442 eclose = eclose->next;
2443 }
2444 }
2445 }
2446 rx_free_hash_table (&rx->se_list_memo, se_memo_freer, &se_list_hash_rules);
2447 bzero (&rx->se_list_memo, sizeof (rx->se_list_memo));
2448 rx_free_hash_table (&rx->set_list_memo, nfa_set_freer, &nfa_set_hash_rules);
2449 bzero (&rx->set_list_memo, sizeof (rx->set_list_memo));
2450
2451 rx_free_nfa (rx);
2452 rx->nfa_states = (struct rx_nfa_state *)*mem;
2453 return 1;
2454}
2455
2456
2457
2458/* The functions in the next several pages define the lazy-NFA-conversion used
2459 * by matchers. The input to this construction is an NFA such as
2460 * is built by compactify_nfa (rx.c). The output is the superNFA.
2461 */
2462
2463/* Match engines can use arbitrary values for opcodes. So, the parse tree
2464 * is built using instructions names (enum rx_opcode), but the superstate
2465 * nfa is populated with mystery opcodes (void *).
2466 *
2467 * For convenience, here is an id table. The opcodes are == to their inxs
2468 *
2469 * The lables in re_search_2 would make good values for instructions.
2470 */
2471
2472void * rx_id_instruction_table[rx_num_instructions] =
2473{
2474 (void *) rx_backtrack_point,
2475 (void *) rx_do_side_effects,
2476 (void *) rx_cache_miss,
2477 (void *) rx_next_char,
2478 (void *) rx_backtrack,
2479 (void *) rx_error_inx
2480};
2481
2482
2483
2484
2485/* Memory mgt. for superstate graphs. */
2486
2487#ifdef __STDC__
2488static char *
2489rx_cache_malloc (struct rx_cache * cache, int bytes)
2490#else
2491static char *
2492rx_cache_malloc (cache, bytes)
2493 struct rx_cache * cache;
2494 int bytes;
2495#endif
2496{
2497 while (cache->bytes_left < bytes)
2498 {
2499 if (cache->memory_pos)
2500 cache->memory_pos = cache->memory_pos->next;
2501 if (!cache->memory_pos)
2502 {
2503 cache->morecore (cache);
2504 if (!cache->memory_pos)
2505 return 0;
2506 }
2507 cache->bytes_left = cache->memory_pos->bytes;
2508 cache->memory_addr = ((char *)cache->memory_pos
2509 + sizeof (struct rx_blocklist));
2510 }
2511 cache->bytes_left -= bytes;
2512 {
2513 char * addr = cache->memory_addr;
2514 cache->memory_addr += bytes;
2515 return addr;
2516 }
2517}
2518
2519#ifdef __STDC__
2520static void
2521rx_cache_free (struct rx_cache * cache,
2522 struct rx_freelist ** freelist, char * mem)
2523#else
2524static void
2525rx_cache_free (cache, freelist, mem)
2526 struct rx_cache * cache;
2527 struct rx_freelist ** freelist;
2528 char * mem;
2529#endif
2530{
2531 struct rx_freelist * it = (struct rx_freelist *)mem;
2532 it->next = *freelist;
2533 *freelist = it;
2534}
2535
2536
2537/* The partially instantiated superstate graph has a transition
2538 * table at every node. There is one entry for every character.
2539 * This fills in the transition for a set.
2540 */
2541#ifdef __STDC__
2542static void
2543install_transition (struct rx_superstate *super,
2544 struct rx_inx *answer, rx_Bitset trcset)
2545#else
2546static void
2547install_transition (super, answer, trcset)
2548 struct rx_superstate *super;
2549 struct rx_inx *answer;
2550 rx_Bitset trcset;
2551#endif
2552{
2553 struct rx_inx * transitions = super->transitions;
2554 int chr;
2555 for (chr = 0; chr < 256; )
2556 if (!*trcset)
2557 {
2558 ++trcset;
2559 chr += 32;
2560 }
2561 else
2562 {
2563 RX_subset sub = *trcset;
2564 RX_subset mask = 1;
2565 int bound = chr + 32;
2566 while (chr < bound)
2567 {
2568 if (sub & mask)
2569 transitions [chr] = *answer;
2570 ++chr;
2571 mask <<= 1;
2572 }
2573 ++trcset;
2574 }
2575}
2576
2577
2578#ifdef __STDC__
2579static int
2580qlen (struct rx_superstate * q)
2581#else
2582static int
2583qlen (q)
2584 struct rx_superstate * q;
2585#endif
2586{
2587 int count = 1;
2588 struct rx_superstate * it;
2589 if (!q)
2590 return 0;
2591 for (it = q->next_recyclable; it != q; it = it->next_recyclable)
2592 ++count;
2593 return count;
2594}
2595
2596#ifdef __STDC__
2597static void
2598check_cache (struct rx_cache * cache)
2599#else
2600static void
2601check_cache (cache)
2602 struct rx_cache * cache;
2603#endif
2604{
2605 struct rx_cache * you_fucked_up = 0;
2606 int total = cache->superstates;
2607 int semi = cache->semifree_superstates;
2608 if (semi != qlen (cache->semifree_superstate))
2609 check_cache (you_fucked_up);
2610 if ((total - semi) != qlen (cache->lru_superstate))
2611 check_cache (you_fucked_up);
2612}
2613
2614/* When a superstate is old and neglected, it can enter a
2615 * semi-free state. A semi-free state is slated to die.
2616 * Incoming transitions to a semi-free state are re-written
2617 * to cause an (interpreted) fault when they are taken.
2618 * The fault handler revives the semi-free state, patches
2619 * incoming transitions back to normal, and continues.
2620 *
2621 * The idea is basicly to free in two stages, aborting
2622 * between the two if the state turns out to be useful again.
2623 * When a free is aborted, the rescued superstate is placed
2624 * in the most-favored slot to maximize the time until it
2625 * is next semi-freed.
2626 */
2627
2628#ifdef __STDC__
2629static void
2630semifree_superstate (struct rx_cache * cache)
2631#else
2632static void
2633semifree_superstate (cache)
2634 struct rx_cache * cache;
2635#endif
2636{
2637 int disqualified = cache->semifree_superstates;
2638 if (disqualified == cache->superstates)
2639 return;
2640 while (cache->lru_superstate->locks)
2641 {
2642 cache->lru_superstate = cache->lru_superstate->next_recyclable;
2643 ++disqualified;
2644 if (disqualified == cache->superstates)
2645 return;
2646 }
2647 {
2648 struct rx_superstate * it = cache->lru_superstate;
2649 it->next_recyclable->prev_recyclable = it->prev_recyclable;
2650 it->prev_recyclable->next_recyclable = it->next_recyclable;
2651 cache->lru_superstate = (it == it->next_recyclable
2652 ? 0
2653 : it->next_recyclable);
2654 if (!cache->semifree_superstate)
2655 {
2656 cache->semifree_superstate = it;
2657 it->next_recyclable = it;
2658 it->prev_recyclable = it;
2659 }
2660 else
2661 {
2662 it->prev_recyclable = cache->semifree_superstate->prev_recyclable;
2663 it->next_recyclable = cache->semifree_superstate;
2664 it->prev_recyclable->next_recyclable = it;
2665 it->next_recyclable->prev_recyclable = it;
2666 }
2667 {
2668 struct rx_distinct_future *df;
2669 it->is_semifree = 1;
2670 ++cache->semifree_superstates;
2671 df = it->transition_refs;
2672 if (df)
2673 {
2674 df->prev_same_dest->next_same_dest = 0;
2675 for (df = it->transition_refs; df; df = df->next_same_dest)
2676 {
2677 df->future_frame.inx = cache->instruction_table[rx_cache_miss];
2678 df->future_frame.data = 0;
2679 df->future_frame.data_2 = (void *) df;
2680 /* If there are any NEXT-CHAR instruction frames that
2681 * refer to this state, we convert them to CACHE-MISS frames.
2682 */
2683 if (!df->effects
2684 && (df->edge->options->next_same_super_edge[0]
2685 == df->edge->options))
2686 install_transition (df->present, &df->future_frame,
2687 df->edge->cset);
2688 }
2689 df = it->transition_refs;
2690 df->prev_same_dest->next_same_dest = df;
2691 }
2692 }
2693 }
2694}
2695
2696
2697#ifdef __STDC__
2698static void
2699refresh_semifree_superstate (struct rx_cache * cache,
2700 struct rx_superstate * super)
2701#else
2702static void
2703refresh_semifree_superstate (cache, super)
2704 struct rx_cache * cache;
2705 struct rx_superstate * super;
2706#endif
2707{
2708 struct rx_distinct_future *df;
2709
2710 if (super->transition_refs)
2711 {
2712 super->transition_refs->prev_same_dest->next_same_dest = 0;
2713 for (df = super->transition_refs; df; df = df->next_same_dest)
2714 {
2715 df->future_frame.inx = cache->instruction_table[rx_next_char];
2716 df->future_frame.data = (void *) super->transitions;
2717 /* CACHE-MISS instruction frames that refer to this state,
2718 * must be converted to NEXT-CHAR frames.
2719 */
2720 if (!df->effects
2721 && (df->edge->options->next_same_super_edge[0]
2722 == df->edge->options))
2723 install_transition (df->present, &df->future_frame,
2724 df->edge->cset);
2725 }
2726 super->transition_refs->prev_same_dest->next_same_dest
2727 = super->transition_refs;
2728 }
2729 if (cache->semifree_superstate == super)
2730 cache->semifree_superstate = (super->prev_recyclable == super
2731 ? 0
2732 : super->prev_recyclable);
2733 super->next_recyclable->prev_recyclable = super->prev_recyclable;
2734 super->prev_recyclable->next_recyclable = super->next_recyclable;
2735
2736 if (!cache->lru_superstate)
2737 (cache->lru_superstate
2738 = super->next_recyclable
2739 = super->prev_recyclable
2740 = super);
2741 else
2742 {
2743 super->next_recyclable = cache->lru_superstate;
2744 super->prev_recyclable = cache->lru_superstate->prev_recyclable;
2745 super->next_recyclable->prev_recyclable = super;
2746 super->prev_recyclable->next_recyclable = super;
2747 }
2748 super->is_semifree = 0;
2749 --cache->semifree_superstates;
2750}
2751
2752#ifdef __STDC__
2753static void
2754rx_refresh_this_superstate (struct rx_cache * cache, struct rx_superstate * superstate)
2755#else
2756static void
2757rx_refresh_this_superstate (cache, superstate)
2758 struct rx_cache * cache;
2759 struct rx_superstate * superstate;
2760#endif
2761{
2762 if (superstate->is_semifree)
2763 refresh_semifree_superstate (cache, superstate);
2764 else if (cache->lru_superstate == superstate)
2765 cache->lru_superstate = superstate->next_recyclable;
2766 else if (superstate != cache->lru_superstate->prev_recyclable)
2767 {
2768 superstate->next_recyclable->prev_recyclable
2769 = superstate->prev_recyclable;
2770 superstate->prev_recyclable->next_recyclable
2771 = superstate->next_recyclable;
2772 superstate->next_recyclable = cache->lru_superstate;
2773 superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
2774 superstate->next_recyclable->prev_recyclable = superstate;
2775 superstate->prev_recyclable->next_recyclable = superstate;
2776 }
2777}
2778
2779#ifdef __STDC__
2780static void
2781release_superset_low (struct rx_cache * cache,
2782 struct rx_superset *set)
2783#else
2784static void
2785release_superset_low (cache, set)
2786 struct rx_cache * cache;
2787 struct rx_superset *set;
2788#endif
2789{
2790 if (!--set->refs)
2791 {
2792 if (set->cdr)
2793 release_superset_low (cache, set->cdr);
2794
2795 set->starts_for = 0;
2796
2797 rx_hash_free
2798 (rx_hash_find
2799 (&cache->superset_table,
2800 (intptr_t)set->car ^ set->id ^ (intptr_t)set->cdr,
2801 (void *)set,
2802 &cache->superset_hash_rules),
2803 &cache->superset_hash_rules);
2804 rx_cache_free (cache, &cache->free_supersets, (char *)set);
2805 }
2806}
2807
2808#ifdef __STDC__
2809RX_DECL void
2810rx_release_superset (struct rx *rx,
2811 struct rx_superset *set)
2812#else
2813RX_DECL void
2814rx_release_superset (rx, set)
2815 struct rx *rx;
2816 struct rx_superset *set;
2817#endif
2818{
2819 release_superset_low (rx->cache, set);
2820}
2821
2822/* This tries to add a new superstate to the superstate freelist.
2823 * It might, as a result, free some edge pieces or hash tables.
2824 * If nothing can be freed because too many locks are being held, fail.
2825 */
2826
2827#ifdef __STDC__
2828static int
2829rx_really_free_superstate (struct rx_cache * cache)
2830#else
2831static int
2832rx_really_free_superstate (cache)
2833 struct rx_cache * cache;
2834#endif
2835{
2836 int locked_superstates = 0;
2837 struct rx_superstate * it;
2838
2839 if (!cache->superstates)
2840 return 0;
2841
2842 {
2843 /* This is a total guess. The idea is that we should expect as
2844 * many misses as we've recently experienced. I.e., cache->misses
2845 * should be the same as cache->semifree_superstates.
2846 */
2847 while ((cache->hits + cache->misses) > cache->superstates_allowed)
2848 {
2849 cache->hits >>= 1;
2850 cache->misses >>= 1;
2851 }
2852 if ( ((cache->hits + cache->misses) * cache->semifree_superstates)
2853 < (cache->superstates * cache->misses))
2854 {
2855 semifree_superstate (cache);
2856 semifree_superstate (cache);
2857 }
2858 }
2859
2860 while (cache->semifree_superstate && cache->semifree_superstate->locks)
2861 {
2862 refresh_semifree_superstate (cache, cache->semifree_superstate);
2863 ++locked_superstates;
2864 if (locked_superstates == cache->superstates)
2865 return 0;
2866 }
2867
2868 if (cache->semifree_superstate)
2869 {
2870 it = cache->semifree_superstate;
2871 it->next_recyclable->prev_recyclable = it->prev_recyclable;
2872 it->prev_recyclable->next_recyclable = it->next_recyclable;
2873 cache->semifree_superstate = ((it == it->next_recyclable)
2874 ? 0
2875 : it->next_recyclable);
2876 --cache->semifree_superstates;
2877 }
2878 else
2879 {
2880 while (cache->lru_superstate->locks)
2881 {
2882 cache->lru_superstate = cache->lru_superstate->next_recyclable;
2883 ++locked_superstates;
2884 if (locked_superstates == cache->superstates)
2885 return 0;
2886 }
2887 it = cache->lru_superstate;
2888 it->next_recyclable->prev_recyclable = it->prev_recyclable;
2889 it->prev_recyclable->next_recyclable = it->next_recyclable;
2890 cache->lru_superstate = ((it == it->next_recyclable)
2891 ? 0
2892 : it->next_recyclable);
2893 }
2894
2895 if (it->transition_refs)
2896 {
2897 struct rx_distinct_future *df;
2898 for (df = it->transition_refs,
2899 df->prev_same_dest->next_same_dest = 0;
2900 df;
2901 df = df->next_same_dest)
2902 {
2903 df->future_frame.inx = cache->instruction_table[rx_cache_miss];
2904 df->future_frame.data = 0;
2905 df->future_frame.data_2 = (void *) df;
2906 df->future = 0;
2907 }
2908 it->transition_refs->prev_same_dest->next_same_dest =
2909 it->transition_refs;
2910 }
2911 {
2912 struct rx_super_edge *tc = it->edges;
2913 while (tc)
2914 {
2915 struct rx_distinct_future * df;
2916 struct rx_super_edge *tct = tc->next;
2917 df = tc->options;
2918 df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
2919 while (df)
2920 {
2921 struct rx_distinct_future *dft = df;
2922 df = df->next_same_super_edge[0];
2923
2924
2925 if (dft->future && dft->future->transition_refs == dft)
2926 {
2927 dft->future->transition_refs = dft->next_same_dest;
2928 if (dft->future->transition_refs == dft)
2929 dft->future->transition_refs = 0;
2930 }
2931 dft->next_same_dest->prev_same_dest = dft->prev_same_dest;
2932 dft->prev_same_dest->next_same_dest = dft->next_same_dest;
2933 rx_cache_free (cache, &cache->free_discernable_futures,
2934 (char *)dft);
2935 }
2936 rx_cache_free (cache, &cache->free_transition_classes, (char *)tc);
2937 tc = tct;
2938 }
2939 }
2940
2941 if (it->contents->superstate == it)
2942 it->contents->superstate = 0;
2943 release_superset_low (cache, it->contents);
2944 rx_cache_free (cache, &cache->free_superstates, (char *)it);
2945 --cache->superstates;
2946 return 1;
2947}
2948
2949#ifdef __STDC__
2950static char *
2951rx_cache_get (struct rx_cache * cache,
2952 struct rx_freelist ** freelist)
2953#else
2954static char *
2955rx_cache_get (cache, freelist)
2956 struct rx_cache * cache;
2957 struct rx_freelist ** freelist;
2958#endif
2959{
2960 while (!*freelist && rx_really_free_superstate (cache))
2961 ;
2962 if (!*freelist)
2963 return 0;
2964 {
2965 struct rx_freelist * it = *freelist;
2966 *freelist = it->next;
2967 return (char *)it;
2968 }
2969}
2970
2971#ifdef __STDC__
2972static char *
2973rx_cache_malloc_or_get (struct rx_cache * cache,
2974 struct rx_freelist ** freelist, int bytes)
2975#else
2976static char *
2977rx_cache_malloc_or_get (cache, freelist, bytes)
2978 struct rx_cache * cache;
2979 struct rx_freelist ** freelist;
2980 int bytes;
2981#endif
2982{
2983 if (!*freelist)
2984 {
2985 char * answer = rx_cache_malloc (cache, bytes);
2986 if (answer)
2987 return answer;
2988 }
2989
2990 return rx_cache_get (cache, freelist);
2991}
2992
2993#ifdef __STDC__
2994static char *
2995rx_cache_get_superstate (struct rx_cache * cache)
2996#else
2997static char *
2998rx_cache_get_superstate (cache)
2999 struct rx_cache * cache;
3000#endif
3001{
3002 char * answer;
3003 int bytes = ( sizeof (struct rx_superstate)
3004 + cache->local_cset_size * sizeof (struct rx_inx));
3005 if (!cache->free_superstates
3006 && (cache->superstates < cache->superstates_allowed))
3007 {
3008 answer = rx_cache_malloc (cache, bytes);
3009 if (answer)
3010 {
3011 ++cache->superstates;
3012 return answer;
3013 }
3014 }
3015 answer = rx_cache_get (cache, &cache->free_superstates);
3016 if (!answer)
3017 {
3018 answer = rx_cache_malloc (cache, bytes);
3019 if (answer)
3020 ++cache->superstates_allowed;
3021 }
3022 ++cache->superstates;
3023 return answer;
3024}
3025
3026
3027
3028
3029#ifdef __STDC__
3030static int
3031supersetcmp (void * va, void * vb)
3032#else
3033static int
3034supersetcmp (va, vb)
3035 void * va;
3036 void * vb;
3037#endif
3038{
3039 struct rx_superset * a = (struct rx_superset *)va;
3040 struct rx_superset * b = (struct rx_superset *)vb;
3041 return ( (a == b)
3042 || (a && b && (a->car == b->car) && (a->cdr == b->cdr)));
3043}
3044
3045#ifdef __STDC__
3046static struct rx_hash_item *
3047superset_allocator (struct rx_hash_rules * rules, void * val)
3048#else
3049static struct rx_hash_item *
3050superset_allocator (rules, val)
3051 struct rx_hash_rules * rules;
3052 void * val;
3053#endif
3054{
3055 struct rx_cache * cache
3056 = ((struct rx_cache *)
3057 ((char *)rules
3058 - (intptr_t)(&((struct rx_cache *)0)->superset_hash_rules)));
3059 struct rx_superset * template = (struct rx_superset *)val;
3060 struct rx_superset * newset
3061 = ((struct rx_superset *)
3062 rx_cache_malloc_or_get (cache,
3063 &cache->free_supersets,
3064 sizeof (*template)));
3065 if (!newset)
3066 return 0;
3067 newset->refs = 0;
3068 newset->car = template->car;
3069 newset->id = template->car->id;
3070 newset->cdr = template->cdr;
3071 newset->superstate = 0;
3072 rx_protect_superset (rx, template->cdr);
3073 newset->hash_item.data = (void *)newset;
3074 newset->hash_item.binding = 0;
3075 return &newset->hash_item;
3076}
3077
3078#ifdef __STDC__
3079static struct rx_hash *
3080super_hash_allocator (struct rx_hash_rules * rules)
3081#else
3082static struct rx_hash *
3083super_hash_allocator (rules)
3084 struct rx_hash_rules * rules;
3085#endif
3086{
3087 struct rx_cache * cache
3088 = ((struct rx_cache *)
3089 ((char *)rules
3090 - (intptr_t)(&((struct rx_cache *)0)->superset_hash_rules)));
3091 return ((struct rx_hash *)
3092 rx_cache_malloc_or_get (cache,
3093 &cache->free_hash, sizeof (struct rx_hash)));
3094}
3095
3096
3097#ifdef __STDC__
3098static void
3099super_hash_liberator (struct rx_hash * hash, struct rx_hash_rules * rules)
3100#else
3101static void
3102super_hash_liberator (hash, rules)
3103 struct rx_hash * hash;
3104 struct rx_hash_rules * rules;
3105#endif
3106{
3107 struct rx_cache * cache
3108 = ((struct rx_cache *)
3109 (char *)rules - (intptr_t)(&((struct rx_cache *)0)->superset_hash_rules));
3110 rx_cache_free (cache, &cache->free_hash, (char *)hash);
3111}
3112
3113#ifdef __STDC__
3114static void
3115superset_hash_item_liberator (struct rx_hash_item * it,
3116 struct rx_hash_rules * rules)
3117#else
3118static void
3119superset_hash_item_liberator (it, rules) /* Well, it does ya know. */
3120 struct rx_hash_item * it;
3121 struct rx_hash_rules * rules;
3122#endif
3123{
3124}
3125
3126int rx_cache_bound = 128;
3127static int rx_default_cache_got = 0;
3128
3129#ifdef __STDC__
3130static int
3131bytes_for_cache_size (int supers, int cset_size)
3132#else
3133static int
3134bytes_for_cache_size (supers, cset_size)
3135 int supers;
3136 int cset_size;
3137#endif
3138{
3139 /* What the hell is this? !!!*/
3140 return (int)
3141 ((float)supers *
3142 ( (1.03 * (float) ( rx_sizeof_bitset (cset_size)
3143 + sizeof (struct rx_super_edge)))
3144 + (1.80 * (float) sizeof (struct rx_possible_future))
3145 + (float) ( sizeof (struct rx_superstate)
3146 + cset_size * sizeof (struct rx_inx))));
3147}
3148
3149#ifdef __STDC__
3150static void
3151rx_morecore (struct rx_cache * cache)
3152#else
3153static void
3154rx_morecore (cache)
3155 struct rx_cache * cache;
3156#endif
3157{
3158 if (rx_default_cache_got >= rx_cache_bound)
3159 return;
3160
3161 rx_default_cache_got += 16;
3162 cache->superstates_allowed = rx_cache_bound;
3163 {
3164 struct rx_blocklist ** pos = &cache->memory;
3165 int size = bytes_for_cache_size (16, cache->local_cset_size);
3166 while (*pos)
3167 pos = &(*pos)->next;
3168 *pos = ((struct rx_blocklist *)
3169 malloc (size + sizeof (struct rx_blocklist)));
3170 if (!*pos)
3171 return;
3172
3173 (*pos)->next = 0;
3174 (*pos)->bytes = size;
3175 cache->memory_pos = *pos;
3176 cache->memory_addr = (char *)*pos + sizeof (**pos);
3177 cache->bytes_left = size;
3178 }
3179}
3180
3181static struct rx_cache default_cache =
3182{
3183 {
3184 supersetcmp,
3185 super_hash_allocator,
3186 super_hash_liberator,
3187 superset_allocator,
3188 superset_hash_item_liberator,
3189 },
3190 0,
3191 0,
3192 0,
3193 0,
3194 rx_morecore,
3195
3196 0,
3197 0,
3198 0,
3199 0,
3200 0,
3201
3202 0,
3203 0,
3204
3205 0,
3206
3207 0,
3208 0,
3209 0,
3210 0,
3211 128,
3212
3213 256,
3214 rx_id_instruction_table,
3215
3216 {
3217 0,
3218 0,
3219 {0},
3220 {0},
3221 {0}
3222 }
3223};
3224
3225/* This adds an element to a superstate set. These sets are lists, such
3226 * that lists with == elements are ==. The empty set is returned by
3227 * superset_cons (rx, 0, 0) and is NOT equivelent to
3228 * (struct rx_superset)0.
3229 */
3230
3231#ifdef __STDC__
3232RX_DECL struct rx_superset *
3233rx_superset_cons (struct rx * rx,
3234 struct rx_nfa_state *car, struct rx_superset *cdr)
3235#else
3236RX_DECL struct rx_superset *
3237rx_superset_cons (rx, car, cdr)
3238 struct rx * rx;
3239 struct rx_nfa_state *car;
3240 struct rx_superset *cdr;
3241#endif
3242{
3243 struct rx_cache * cache = rx->cache;
3244 if (!car && !cdr)
3245 {
3246 if (!cache->empty_superset)
3247 {
3248 cache->empty_superset
3249 = ((struct rx_superset *)
3250 rx_cache_malloc_or_get (cache, &cache->free_supersets,
3251 sizeof (struct rx_superset)));
3252 if (!cache->empty_superset)
3253 return 0;
3254 bzero (cache->empty_superset, sizeof (struct rx_superset));
3255 cache->empty_superset->refs = 1000;
3256 }
3257 return cache->empty_superset;
3258 }
3259 {
3260 struct rx_superset template;
3261 struct rx_hash_item * hit;
3262 template.car = car;
3263 template.cdr = cdr;
3264 template.id = car->id;
3265 hit = rx_hash_store (&cache->superset_table,
3266 (intptr_t)car ^ car->id ^ (intptr_t)cdr,
3267 (void *)&template,
3268 &cache->superset_hash_rules);
3269 return (hit
3270 ? (struct rx_superset *)hit->data
3271 : 0);
3272 }
3273}
3274
3275/* This computes a union of two NFA state sets. The sets do not have the
3276 * same representation though. One is a RX_SUPERSET structure (part
3277 * of the superstate NFA) and the other is an NFA_STATE_SET (part of the NFA).
3278 */
3279
3280#ifdef __STDC__
3281RX_DECL struct rx_superset *
3282rx_superstate_eclosure_union
3283 (struct rx * rx, struct rx_superset *set, struct rx_nfa_state_set *ecl)
3284#else
3285RX_DECL struct rx_superset *
3286rx_superstate_eclosure_union (rx, set, ecl)
3287 struct rx * rx;
3288 struct rx_superset *set;
3289 struct rx_nfa_state_set *ecl;
3290#endif
3291{
3292 if (!ecl)
3293 return set;
3294
3295 if (!set->car)
3296 return rx_superset_cons (rx, ecl->car,
3297 rx_superstate_eclosure_union (rx, set, ecl->cdr));
3298 if (set->car == ecl->car)
3299 return rx_superstate_eclosure_union (rx, set, ecl->cdr);
3300
3301 {
3302 struct rx_superset * tail;
3303 struct rx_nfa_state * first;
3304
3305 if (set->car > ecl->car)
3306 {
3307 tail = rx_superstate_eclosure_union (rx, set->cdr, ecl);
3308 first = set->car;
3309 }
3310 else
3311 {
3312 tail = rx_superstate_eclosure_union (rx, set, ecl->cdr);
3313 first = ecl->car;
3314 }
3315 if (!tail)
3316 return 0;
3317 else
3318 {
3319 struct rx_superset * answer;
3320 answer = rx_superset_cons (rx, first, tail);
3321 if (!answer)
3322 {
3323 rx_protect_superset (rx, tail);
3324 rx_release_superset (rx, tail);
3325 return 0;
3326 }
3327 else
3328 return answer;
3329 }
3330 }
3331}
3332
3333
3334
3335
3336
3337/*
3338 * This makes sure that a list of rx_distinct_futures contains
3339 * a future for each possible set of side effects in the eclosure
3340 * of a given state. This is some of the work of filling in a
3341 * superstate transition.
3342 */
3343
3344#ifdef __STDC__
3345static struct rx_distinct_future *
3346include_futures (struct rx *rx,
3347 struct rx_distinct_future *df, struct rx_nfa_state
3348 *state, struct rx_superstate *superstate)
3349#else
3350static struct rx_distinct_future *
3351include_futures (rx, df, state, superstate)
3352 struct rx *rx;
3353 struct rx_distinct_future *df;
3354 struct rx_nfa_state *state;
3355 struct rx_superstate *superstate;
3356#endif
3357{
3358 struct rx_possible_future *future;
3359 struct rx_cache * cache = rx->cache;
3360 for (future = state->futures; future; future = future->next)
3361 {
3362 struct rx_distinct_future *dfp;
3363 struct rx_distinct_future *insert_before = 0;
3364 if (df)
3365 df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
3366 for (dfp = df; dfp; dfp = dfp->next_same_super_edge[0])
3367 if (dfp->effects == future->effects)
3368 break;
3369 else
3370 {
3371 int order = rx->se_list_cmp (rx, dfp->effects, future->effects);
3372 if (order > 0)
3373 {
3374 insert_before = dfp;
3375 dfp = 0;
3376 break;
3377 }
3378 }
3379 if (df)
3380 df->next_same_super_edge[1]->next_same_super_edge[0] = df;
3381 if (!dfp)
3382 {
3383 dfp
3384 = ((struct rx_distinct_future *)
3385 rx_cache_malloc_or_get (cache, &cache->free_discernable_futures,
3386 sizeof (struct rx_distinct_future)));
3387 if (!dfp)
3388 return 0;
3389 if (!df)
3390 {
3391 df = insert_before = dfp;
3392 df->next_same_super_edge[0] = df->next_same_super_edge[1] = df;
3393 }
3394 else if (!insert_before)
3395 insert_before = df;
3396 else if (insert_before == df)
3397 df = dfp;
3398
3399 dfp->next_same_super_edge[0] = insert_before;
3400 dfp->next_same_super_edge[1]
3401 = insert_before->next_same_super_edge[1];
3402 dfp->next_same_super_edge[1]->next_same_super_edge[0] = dfp;
3403 dfp->next_same_super_edge[0]->next_same_super_edge[1] = dfp;
3404 dfp->next_same_dest = dfp->prev_same_dest = dfp;
3405 dfp->future = 0;
3406 dfp->present = superstate;
3407 dfp->future_frame.inx = rx->instruction_table[rx_cache_miss];
3408 dfp->future_frame.data = 0;
3409 dfp->future_frame.data_2 = (void *) dfp;
3410 dfp->side_effects_frame.inx
3411 = rx->instruction_table[rx_do_side_effects];
3412 dfp->side_effects_frame.data = 0;
3413 dfp->side_effects_frame.data_2 = (void *) dfp;
3414 dfp->effects = future->effects;
3415 }
3416 }
3417 return df;
3418}
3419
3420
3421
3422
3423/* This constructs a new superstate from its state set. The only
3424 * complexity here is memory management.
3425 */
3426#ifdef __STDC__
3427RX_DECL struct rx_superstate *
3428rx_superstate (struct rx *rx,
3429 struct rx_superset *set)
3430#else
3431RX_DECL struct rx_superstate *
3432rx_superstate (rx, set)
3433 struct rx *rx;
3434 struct rx_superset *set;
3435#endif
3436{
3437 struct rx_cache * cache = rx->cache;
3438 struct rx_superstate * superstate = 0;
3439
3440 /* Does the superstate already exist in the cache? */
3441 if (set->superstate)
3442 {
3443 if (set->superstate->rx_id != rx->rx_id)
3444 {
3445 /* Aha. It is in the cache, but belongs to a superstate
3446 * that refers to an NFA that no longer exists.
3447 * (We know it no longer exists because it was evidently
3448 * stored in the same region of memory as the current nfa
3449 * yet it has a different id.)
3450 */
3451 superstate = set->superstate;
3452 if (!superstate->is_semifree)
3453 {
3454 if (cache->lru_superstate == superstate)
3455 {
3456 cache->lru_superstate = superstate->next_recyclable;
3457 if (cache->lru_superstate == superstate)
3458 cache->lru_superstate = 0;
3459 }
3460 {
3461 superstate->next_recyclable->prev_recyclable
3462 = superstate->prev_recyclable;
3463 superstate->prev_recyclable->next_recyclable
3464 = superstate->next_recyclable;
3465 if (!cache->semifree_superstate)
3466 {
3467 (cache->semifree_superstate
3468 = superstate->next_recyclable
3469 = superstate->prev_recyclable
3470 = superstate);
3471 }
3472 else
3473 {
3474 superstate->next_recyclable = cache->semifree_superstate;
3475 superstate->prev_recyclable
3476 = cache->semifree_superstate->prev_recyclable;
3477 superstate->next_recyclable->prev_recyclable
3478 = superstate;
3479 superstate->prev_recyclable->next_recyclable
3480 = superstate;
3481 cache->semifree_superstate = superstate;
3482 }
3483 ++cache->semifree_superstates;
3484 }
3485 }
3486 set->superstate = 0;
3487 goto handle_cache_miss;
3488 }
3489 ++cache->hits;
3490 superstate = set->superstate;
3491
3492 rx_refresh_this_superstate (cache, superstate);
3493 return superstate;
3494 }
3495
3496 handle_cache_miss:
3497
3498 /* This point reached only for cache misses. */
3499 ++cache->misses;
3500#if RX_DEBUG
3501 if (rx_debug_trace > 1)
3502 {
3503 struct rx_superset * setp = set;
3504 fprintf (stderr, "Building a superstet %d(%d): ", rx->rx_id, set);
3505 while (setp)
3506 {
3507 fprintf (stderr, "%d ", setp->id);
3508 setp = setp->cdr;
3509 }
3510 fprintf (stderr, "(%d)\n", set);
3511 }
3512#endif
3513 superstate = (struct rx_superstate *)rx_cache_get_superstate (cache);
3514 if (!superstate)
3515 return 0;
3516
3517 if (!cache->lru_superstate)
3518 (cache->lru_superstate
3519 = superstate->next_recyclable
3520 = superstate->prev_recyclable
3521 = superstate);
3522 else
3523 {
3524 superstate->next_recyclable = cache->lru_superstate;
3525 superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
3526 ( superstate->prev_recyclable->next_recyclable
3527 = superstate->next_recyclable->prev_recyclable
3528 = superstate);
3529 }
3530 superstate->rx_id = rx->rx_id;
3531 superstate->transition_refs = 0;
3532 superstate->locks = 0;
3533 superstate->is_semifree = 0;
3534 set->superstate = superstate;
3535 superstate->contents = set;
3536 rx_protect_superset (rx, set);
3537 superstate->edges = 0;
3538 {
3539 int x;
3540 /* None of the transitions from this superstate are known yet. */
3541 for (x = 0; x < rx->local_cset_size; ++x) /* &&&&& 3.8 % */
3542 {
3543 struct rx_inx * ifr = &superstate->transitions[x];
3544 ifr->inx = rx->instruction_table [rx_cache_miss];
3545 ifr->data = ifr->data_2 = 0;
3546 }
3547 }
3548 return superstate;
3549}
3550
3551
3552
3553/* This computes the destination set of one edge of the superstate NFA.
3554 * Note that a RX_DISTINCT_FUTURE is a superstate edge.
3555 * Returns 0 on an allocation failure.
3556 */
3557
3558#ifdef __STDC__
3559static int
3560solve_destination (struct rx *rx, struct rx_distinct_future *df)
3561#else
3562static int
3563solve_destination (rx, df)
3564 struct rx *rx;
3565 struct rx_distinct_future *df;
3566#endif
3567{
3568 struct rx_super_edge *tc = df->edge;
3569 struct rx_superset *nfa_state;
3570 struct rx_superset *nil_set = rx_superset_cons (rx, 0, 0);
3571 struct rx_superset *solution = nil_set;
3572 struct rx_superstate *dest;
3573
3574 rx_protect_superset (rx, solution);
3575 /* Iterate over all NFA states in the state set of this superstate. */
3576 for (nfa_state = df->present->contents;
3577 nfa_state->car;
3578 nfa_state = nfa_state->cdr)
3579 {
3580 struct rx_nfa_edge *e;
3581 /* Iterate over all edges of each NFA state. */
3582 for (e = nfa_state->car->edges; e; e = e->next)
3583 /* If we find an edge that is labeled with
3584 * the characters we are solving for.....
3585 */
3586 if (rx_bitset_is_subset (rx->local_cset_size,
3587 tc->cset, e->params.cset))
3588 {
3589 struct rx_nfa_state *n = e->dest;
3590 struct rx_possible_future *pf;
3591 /* ....search the partial epsilon closures of the destination
3592 * of that edge for a path that involves the same set of
3593 * side effects we are solving for.
3594 * If we find such a RX_POSSIBLE_FUTURE, we add members to the
3595 * stateset we are computing.
3596 */
3597 for (pf = n->futures; pf; pf = pf->next)
3598 if (pf->effects == df->effects)
3599 {
3600 struct rx_superset * old_sol;
3601 old_sol = solution;
3602 solution = rx_superstate_eclosure_union (rx, solution,
3603 pf->destset);
3604 if (!solution)
3605 return 0;
3606 rx_protect_superset (rx, solution);
3607 rx_release_superset (rx, old_sol);
3608 }
3609 }
3610 }
3611 /* It is possible that the RX_DISTINCT_FUTURE we are working on has
3612 * the empty set of NFA states as its definition. In that case, this
3613 * is a failure point.
3614 */
3615 if (solution == nil_set)
3616 {
3617 df->future_frame.inx = (void *) rx_backtrack;
3618 df->future_frame.data = 0;
3619 df->future_frame.data_2 = 0;
3620 return 1;
3621 }
3622 dest = rx_superstate (rx, solution);
3623 rx_release_superset (rx, solution);
3624 if (!dest)
3625 return 0;
3626
3627 {
3628 struct rx_distinct_future *dft;
3629 dft = df;
3630 df->prev_same_dest->next_same_dest = 0;
3631 while (dft)
3632 {
3633 dft->future = dest;
3634 dft->future_frame.inx = rx->instruction_table[rx_next_char];
3635 dft->future_frame.data = (void *) dest->transitions;
3636 dft = dft->next_same_dest;
3637 }
3638 df->prev_same_dest->next_same_dest = df;
3639 }
3640 if (!dest->transition_refs)
3641 dest->transition_refs = df;
3642 else
3643 {
3644 struct rx_distinct_future *dft = dest->transition_refs->next_same_dest;
3645 dest->transition_refs->next_same_dest = df->next_same_dest;
3646 df->next_same_dest->prev_same_dest = dest->transition_refs;
3647 df->next_same_dest = dft;
3648 dft->prev_same_dest = df;
3649 }
3650 return 1;
3651}
3652
3653
3654/* This takes a superstate and a character, and computes some edges
3655 * from the superstate NFA. In particular, this computes all edges
3656 * that lead from SUPERSTATE given CHR. This function also
3657 * computes the set of characters that share this edge set.
3658 * This returns 0 on allocation error.
3659 * The character set and list of edges are returned through
3660 * the paramters CSETOUT and DFOUT.
3661} */
3662
3663#ifdef __STDC__
3664static int
3665compute_super_edge (struct rx *rx, struct rx_distinct_future **dfout,
3666 rx_Bitset csetout, struct rx_superstate *superstate,
3667 unsigned char chr)
3668#else
3669static int
3670compute_super_edge (rx, dfout, csetout, superstate, chr)
3671 struct rx *rx;
3672 struct rx_distinct_future **dfout;
3673 rx_Bitset csetout;
3674 struct rx_superstate *superstate;
3675 unsigned char chr;
3676#endif
3677{
3678 struct rx_superset *stateset = superstate->contents;
3679
3680 /* To compute the set of characters that share edges with CHR,
3681 * we start with the full character set, and subtract.
3682 */
3683 rx_bitset_universe (rx->local_cset_size, csetout);
3684 *dfout = 0;
3685
3686 /* Iterate over the NFA states in the superstate state-set. */
3687 while (stateset->car)
3688 {
3689 struct rx_nfa_edge *e;
3690 for (e = stateset->car->edges; e; e = e->next)
3691 if (RX_bitset_member (e->params.cset, chr))
3692 {
3693 /* If we find an NFA edge that applies, we make sure there
3694 * are corresponding edges in the superstate NFA.
3695 */
3696 {
3697 struct rx_distinct_future * saved;
3698 saved = *dfout;
3699 *dfout = include_futures (rx, *dfout, e->dest, superstate);
3700 if (!*dfout)
3701 {
3702 struct rx_distinct_future * df;
3703 df = saved;
3704 df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
3705 while (df)
3706 {
3707 struct rx_distinct_future *dft;
3708 dft = df;
3709 df = df->next_same_super_edge[0];
3710
3711 if (dft->future && dft->future->transition_refs == dft)
3712 {
3713 dft->future->transition_refs = dft->next_same_dest;
3714 if (dft->future->transition_refs == dft)
3715 dft->future->transition_refs = 0;
3716 }
3717 dft->next_same_dest->prev_same_dest = dft->prev_same_dest;
3718 dft->prev_same_dest->next_same_dest = dft->next_same_dest;
3719 rx_cache_free (rx->cache,
3720 &rx->cache->free_discernable_futures,
3721 (char *)dft);
3722 }
3723 return 0;
3724 }
3725 }
3726 /* We also trim the character set a bit. */
3727 rx_bitset_intersection (rx->local_cset_size,
3728 csetout, e->params.cset);
3729 }
3730 else
3731 /* An edge that doesn't apply at least tells us some characters
3732 * that don't share the same edge set as CHR.
3733 */
3734 rx_bitset_difference (rx->local_cset_size, csetout, e->params.cset);
3735 stateset = stateset->cdr;
3736 }
3737 return 1;
3738}
3739
3740
3741
3742/* This is a constructor for RX_SUPER_EDGE structures. These are
3743 * wrappers for lists of superstate NFA edges that share character sets labels.
3744 * If a transition class contains more than one rx_distinct_future (superstate
3745 * edge), then it represents a non-determinism in the superstate NFA.
3746 */
3747
3748#ifdef __STDC__
3749static struct rx_super_edge *
3750rx_super_edge (struct rx *rx,
3751 struct rx_superstate *super, rx_Bitset cset,
3752 struct rx_distinct_future *df)
3753#else
3754static struct rx_super_edge *
3755rx_super_edge (rx, super, cset, df)
3756 struct rx *rx;
3757 struct rx_superstate *super;
3758 rx_Bitset cset;
3759 struct rx_distinct_future *df;
3760#endif
3761{
3762 struct rx_super_edge *tc =
3763 (struct rx_super_edge *)rx_cache_malloc_or_get
3764 (rx->cache, &rx->cache->free_transition_classes,
3765 sizeof (struct rx_super_edge) + rx_sizeof_bitset (rx->local_cset_size));
3766
3767 if (!tc)
3768 return 0;
3769 tc->next = super->edges;
3770 super->edges = tc;
3771 tc->rx_backtrack_frame.inx = rx->instruction_table[rx_backtrack_point];
3772 tc->rx_backtrack_frame.data = 0;
3773 tc->rx_backtrack_frame.data_2 = (void *) tc;
3774 tc->options = df;
3775 tc->cset = (rx_Bitset) ((char *) tc + sizeof (*tc));
3776 rx_bitset_assign (rx->local_cset_size, tc->cset, cset);
3777 if (df)
3778 {
3779 struct rx_distinct_future * dfp = df;
3780 df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
3781 while (dfp)
3782 {
3783 dfp->edge = tc;
3784 dfp = dfp->next_same_super_edge[0];
3785 }
3786 df->next_same_super_edge[1]->next_same_super_edge[0] = df;
3787 }
3788 return tc;
3789}
3790
3791
3792/* There are three kinds of cache miss. The first occurs when a
3793 * transition is taken that has never been computed during the
3794 * lifetime of the source superstate. That cache miss is handled by
3795 * calling COMPUTE_SUPER_EDGE. The second kind of cache miss
3796 * occurs when the destination superstate of a transition doesn't
3797 * exist. SOLVE_DESTINATION is used to construct the destination superstate.
3798 * Finally, the third kind of cache miss occurs when the destination
3799 * superstate of a transition is in a `semi-free state'. That case is
3800 * handled by UNFREE_SUPERSTATE.
3801 *
3802 * The function of HANDLE_CACHE_MISS is to figure out which of these
3803 * cases applies.
3804 */
3805
3806#ifdef __STDC__
3807static void
3808install_partial_transition (struct rx_superstate *super,
3809 struct rx_inx *answer,
3810 RX_subset set, int offset)
3811#else
3812static void
3813install_partial_transition (super, answer, set, offset)
3814 struct rx_superstate *super;
3815 struct rx_inx *answer;
3816 RX_subset set;
3817 int offset;
3818#endif
3819{
3820 int start = offset;
3821 int end = start + 32;
3822 RX_subset pos = 1;
3823 struct rx_inx * transitions = super->transitions;
3824
3825 while (start < end)
3826 {
3827 if (set & pos)
3828 transitions[start] = *answer;
3829 pos <<= 1;
3830 ++start;
3831 }
3832}
3833
3834
3835#ifdef __STDC__
3836RX_DECL struct rx_inx *
3837rx_handle_cache_miss
3838 (struct rx *rx, struct rx_superstate *super, unsigned char chr, void *data)
3839#else
3840RX_DECL struct rx_inx *
3841rx_handle_cache_miss (rx, super, chr, data)
3842 struct rx *rx;
3843 struct rx_superstate *super;
3844 unsigned char chr;
3845 void *data;
3846#endif
3847{
3848 int offset = chr / RX_subset_bits;
3849 struct rx_distinct_future *df = data;
3850
3851 if (!df) /* must be the shared_cache_miss_frame */
3852 {
3853 /* Perhaps this is just a transition waiting to be filled. */
3854 struct rx_super_edge *tc;
3855 RX_subset mask = rx_subset_singletons [chr % RX_subset_bits];
3856
3857 for (tc = super->edges; tc; tc = tc->next)
3858 if (tc->cset[offset] & mask)
3859 {
3860 struct rx_inx * answer;
3861 df = tc->options;
3862 answer = ((tc->options->next_same_super_edge[0] != tc->options)
3863 ? &tc->rx_backtrack_frame
3864 : (df->effects
3865 ? &df->side_effects_frame
3866 : &df->future_frame));
3867 install_partial_transition (super, answer,
3868 tc->cset [offset], offset * 32);
3869 return answer;
3870 }
3871 /* Otherwise, it's a flushed or newly encountered edge. */
3872 {
3873 char cset_space[1024]; /* this limit is far from unreasonable */
3874 rx_Bitset trcset;
3875 struct rx_inx *answer;
3876
3877 if (rx_sizeof_bitset (rx->local_cset_size) > sizeof (cset_space))
3878 return 0; /* If the arbitrary limit is hit, always fail */
3879 /* cleanly. */
3880 trcset = (rx_Bitset)cset_space;
3881 rx_lock_superstate (rx, super);
3882 if (!compute_super_edge (rx, &df, trcset, super, chr))
3883 {
3884 rx_unlock_superstate (rx, super);
3885 return 0;
3886 }
3887 if (!df) /* We just computed the fail transition. */
3888 {
3889 static struct rx_inx
3890 shared_fail_frame = { 0, 0, (void *)rx_backtrack, 0 };
3891 answer = &shared_fail_frame;
3892 }
3893 else
3894 {
3895 tc = rx_super_edge (rx, super, trcset, df);
3896 if (!tc)
3897 {
3898 rx_unlock_superstate (rx, super);
3899 return 0;
3900 }
3901 answer = ((tc->options->next_same_super_edge[0] != tc->options)
3902 ? &tc->rx_backtrack_frame
3903 : (df->effects
3904 ? &df->side_effects_frame
3905 : &df->future_frame));
3906 }
3907 install_partial_transition (super, answer,
3908 trcset[offset], offset * 32);
3909 rx_unlock_superstate (rx, super);
3910 return answer;
3911 }
3912 }
3913 else if (df->future) /* A cache miss on an edge with a future? Must be
3914 * a semi-free destination. */
3915 {
3916 if (df->future->is_semifree)
3917 refresh_semifree_superstate (rx->cache, df->future);
3918 return &df->future_frame;
3919 }
3920 else
3921 /* no future superstate on an existing edge */
3922 {
3923 rx_lock_superstate (rx, super);
3924 if (!solve_destination (rx, df))
3925 {
3926 rx_unlock_superstate (rx, super);
3927 return 0;
3928 }
3929 if (!df->effects
3930 && (df->edge->options->next_same_super_edge[0] == df->edge->options))
3931 install_partial_transition (super, &df->future_frame,
3932 df->edge->cset[offset], offset * 32);
3933 rx_unlock_superstate (rx, super);
3934 return &df->future_frame;
3935 }
3936}
3937
3938
3939
3940
3941
3942/* The rest of the code provides a regex.c compatable interface. */
3943
3944
3945__const__ char *re_error_msg[] =
3946{
3947 0, /* REG_NOUT */
3948 "No match", /* REG_NOMATCH */
3949 "Invalid regular expression", /* REG_BADPAT */
3950 "Invalid collation character", /* REG_ECOLLATE */
3951 "Invalid character class name", /* REG_ECTYPE */
3952 "Trailing backslash", /* REG_EESCAPE */
3953 "Invalid back reference", /* REG_ESUBREG */
3954 "Unmatched [ or [^", /* REG_EBRACK */
3955 "Unmatched ( or \\(", /* REG_EPAREN */
3956 "Unmatched \\{", /* REG_EBRACE */
3957 "Invalid content of \\{\\}", /* REG_BADBR */
3958 "Invalid range end", /* REG_ERANGE */
3959 "Memory exhausted", /* REG_ESPACE */
3960 "Invalid preceding regular expression", /* REG_BADRPT */
3961 "Premature end of regular expression", /* REG_EEND */
3962 "Regular expression too big", /* REG_ESIZE */
3963 "Unmatched ) or \\)", /* REG_ERPAREN */
3964};
3965
3966
3967
3968
3969/*
3970 * Macros used while compiling patterns.
3971 *
3972 * By convention, PEND points just past the end of the uncompiled pattern,
3973 * P points to the read position in the pattern. `translate' is the name
3974 * of the translation table (`TRANSLATE' is the name of a macro that looks
3975 * things up in `translate').
3976 */
3977
3978
3979/*
3980 * Fetch the next character in the uncompiled pattern---translating it
3981 * if necessary. *Also cast from a signed character in the constant
3982 * string passed to us by the user to an unsigned char that we can use
3983 * as an array index (in, e.g., `translate').
3984 */
3985#define PATFETCH(c) \
3986 do {if (p == pend) return REG_EEND; \
3987 c = (unsigned char) *p++; \
3988 c = translate[c]; \
3989 } while (0)
3990
3991/*
3992 * Fetch the next character in the uncompiled pattern, with no
3993 * translation.
3994 */
3995#define PATFETCH_RAW(c) \
3996 do {if (p == pend) return REG_EEND; \
3997 c = (unsigned char) *p++; \
3998 } while (0)
3999
4000/* Go backwards one character in the pattern. */
4001#define PATUNFETCH p--
4002
4003
4004#define TRANSLATE(d) translate[(unsigned char) (d)]
4005
4006typedef unsigned regnum_t;
4007
4008/* Since offsets can go either forwards or backwards, this type needs to
4009 * be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.
4010 */
4011typedef int pattern_offset_t;
4012
4013typedef struct
4014{
4015 struct rexp_node ** top_expression; /* was begalt */
4016 struct rexp_node ** last_expression; /* was laststart */
4017 pattern_offset_t inner_group_offset;
4018 regnum_t regnum;
4019} compile_stack_elt_t;
4020
4021typedef struct
4022{
4023 compile_stack_elt_t *stack;
4024 unsigned size;
4025 unsigned avail; /* Offset of next open position. */
4026} compile_stack_type;
4027
4028
4029#define INIT_COMPILE_STACK_SIZE 32
4030
4031#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
4032#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
4033
4034/* The next available element. */
4035#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
4036
4037
4038/* Set the bit for character C in a list. */
4039#define SET_LIST_BIT(c) \
4040 (b[((unsigned char) (c)) / CHARBITS] \
4041 |= 1 << (((unsigned char) c) % CHARBITS))
4042
4043/* Get the next unsigned number in the uncompiled pattern. */
4044#define GET_UNSIGNED_NUMBER(num) \
4045 { if (p != pend) \
4046 { \
4047 PATFETCH (c); \
4048 while (isdigit (c)) \
4049 { \
4050 if (num < 0) \
4051 num = 0; \
4052 num = num * 10 + c - '0'; \
4053 if (p == pend) \
4054 break; \
4055 PATFETCH (c); \
4056 } \
4057 } \
4058 }
4059
4060#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
4061
4062#define IS_CHAR_CLASS(string) \
4063 (!strcmp (string, "alpha") || !strcmp (string, "upper") \
4064 || !strcmp (string, "lower") || !strcmp (string, "digit") \
4065 || !strcmp (string, "alnum") || !strcmp (string, "xdigit") \
4066 || !strcmp (string, "space") || !strcmp (string, "print") \
4067 || !strcmp (string, "punct") || !strcmp (string, "graph") \
4068 || !strcmp (string, "cntrl") || !strcmp (string, "blank"))
4069
4070
4071
4072/* These predicates are used in regex_compile. */
4073
4074/* P points to just after a ^ in PATTERN. Return true if that ^ comes
4075 * after an alternative or a begin-subexpression. We assume there is at
4076 * least one character before the ^.
4077 */
4078
4079#ifdef __STDC__
4080static boolean
4081at_begline_loc_p (__const__ char *pattern, __const__ char * p, reg_syntax_t syntax)
4082#else
4083static boolean
4084at_begline_loc_p (pattern, p, syntax)
4085 __const__ char *pattern;
4086 __const__ char * p;
4087 reg_syntax_t syntax;
4088#endif
4089{
4090 __const__ char *prev = p - 2;
4091 boolean prev_prev_backslash = ((prev > pattern) && (prev[-1] == '\\'));
4092
4093 return
4094
4095 (/* After a subexpression? */
4096 ((*prev == '(') && ((syntax & RE_NO_BK_PARENS) || prev_prev_backslash))
4097 ||
4098 /* After an alternative? */
4099 ((*prev == '|') && ((syntax & RE_NO_BK_VBAR) || prev_prev_backslash))
4100 );
4101}
4102
4103/* The dual of at_begline_loc_p. This one is for $. We assume there is
4104 * at least one character after the $, i.e., `P < PEND'.
4105 */
4106
4107#ifdef __STDC__
4108static boolean
4109at_endline_loc_p (__const__ char *p, __const__ char *pend, int syntax)
4110#else
4111static boolean
4112at_endline_loc_p (p, pend, syntax)
4113 __const__ char *p;
4114 __const__ char *pend;
4115 int syntax;
4116#endif
4117{
4118 __const__ char *next = p;
4119 boolean next_backslash = (*next == '\\');
4120 __const__ char *next_next = (p + 1 < pend) ? (p + 1) : 0;
4121
4122 return
4123 (
4124 /* Before a subexpression? */
4125 ((syntax & RE_NO_BK_PARENS)
4126 ? (*next == ')')
4127 : (next_backslash && next_next && (*next_next == ')')))
4128 ||
4129 /* Before an alternative? */
4130 ((syntax & RE_NO_BK_VBAR)
4131 ? (*next == '|')
4132 : (next_backslash && next_next && (*next_next == '|')))
4133 );
4134}
4135
4136
4137
4138unsigned char rx_id_translation[256] =
4139{
4140 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
4141 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
4142 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
4143 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
4144 40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
4145 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
4146 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
4147 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
4148 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
4149 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
4150
4151 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
4152 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
4153 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
4154 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
4155 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
4156 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
4157 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
4158 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
4159 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
4160 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
4161
4162 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
4163 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,
4164 220, 221, 222, 223, 224, 225, 226, 227, 228, 229,
4165 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
4166 240, 241, 242, 243, 244, 245, 246, 247, 248, 249,
4167 250, 251, 252, 253, 254, 255
4168};
4169
4170/* The compiler keeps an inverted translation table.
4171 * This looks up/inititalize elements.
4172 * VALID is an array of booleans that validate CACHE.
4173 */
4174
4175#ifdef __STDC__
4176static rx_Bitset
4177inverse_translation (struct re_pattern_buffer * rxb,
4178 char * valid, rx_Bitset cache,
4179 unsigned char * translate, int c)
4180#else
4181static rx_Bitset
4182inverse_translation (rxb, valid, cache, translate, c)
4183 struct re_pattern_buffer * rxb;
4184 char * valid;
4185 rx_Bitset cache;
4186 unsigned char * translate;
4187 int c;
4188#endif
4189{
4190 rx_Bitset cs
4191 = cache + c * rx_bitset_numb_subsets (rxb->rx.local_cset_size);
4192
4193 if (!valid[c])
4194 {
4195 int x;
4196 int c_tr = TRANSLATE(c);
4197 rx_bitset_null (rxb->rx.local_cset_size, cs);
4198 for (x = 0; x < 256; ++x) /* &&&& 13.37 */
4199 if (TRANSLATE(x) == c_tr)
4200 RX_bitset_enjoin (cs, x);
4201 valid[c] = 1;
4202 }
4203 return cs;
4204}
4205
4206
4207
4208
4209
4210/* More subroutine declarations and macros for regex_compile. */
4211
4212/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
4213 false if it's not. */
4214
4215#ifdef __STDC__
4216static boolean
4217group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
4218#else
4219static boolean
4220group_in_compile_stack (compile_stack, regnum)
4221 compile_stack_type compile_stack;
4222 regnum_t regnum;
4223#endif
4224{
4225 int this_element;
4226
4227 for (this_element = compile_stack.avail - 1;
4228 this_element >= 0;
4229 this_element--)
4230 if (compile_stack.stack[this_element].regnum == regnum)
4231 return true;
4232
4233 return false;
4234}
4235
4236
4237/*
4238 * Read the ending character of a range (in a bracket expression) from the
4239 * uncompiled pattern *P_PTR (which ends at PEND). We assume the
4240 * starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
4241 * Then we set the translation of all bits between the starting and
4242 * ending characters (inclusive) in the compiled pattern B.
4243 *
4244 * Return an error code.
4245 *
4246 * We use these short variable names so we can use the same macros as
4247 * `regex_compile' itself.
4248 */
4249
4250#ifdef __STDC__
4251static reg_errcode_t
4252compile_range (struct re_pattern_buffer * rxb, rx_Bitset cs,
4253 __const__ char ** p_ptr, __const__ char * pend,
4254 unsigned char * translate, reg_syntax_t syntax,
4255 rx_Bitset inv_tr, char * valid_inv_tr)
4256#else
4257static reg_errcode_t
4258compile_range (rxb, cs, p_ptr, pend, translate, syntax, inv_tr, valid_inv_tr)
4259 struct re_pattern_buffer * rxb;
4260 rx_Bitset cs;
4261 __const__ char ** p_ptr;
4262 __const__ char * pend;
4263 unsigned char * translate;
4264 reg_syntax_t syntax;
4265 rx_Bitset inv_tr;
4266 char * valid_inv_tr;
4267#endif
4268{
4269 unsigned this_char;
4270
4271 __const__ char *p = *p_ptr;
4272
4273 unsigned char range_end;
4274 unsigned char range_start = TRANSLATE(p[-2]);
4275
4276 if (p == pend)
4277 return REG_ERANGE;
4278
4279 PATFETCH (range_end);
4280
4281 (*p_ptr)++;
4282
4283 if (range_start > range_end)
4284 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
4285
4286 for (this_char = range_start; this_char <= range_end; this_char++)
4287 {
4288 rx_Bitset it =
4289 inverse_translation (rxb, valid_inv_tr, inv_tr, translate, this_char);
4290 rx_bitset_union (rxb->rx.local_cset_size, cs, it);
4291 }
4292
4293 return REG_NOERROR;
4294}
4295
4296
4297
4298/* This searches a regexp for backreference side effects.
4299 * It fills in the array OUT with 1 at the index of every register pair
4300 * referenced by a backreference.
4301 *
4302 * This is used to help optimize patterns for searching. The information is
4303 * useful because, if the caller doesn't want register values, backreferenced
4304 * registers are the only registers for which we need rx_backtrack.
4305 */
4306
4307#ifdef __STDC__
4308static void
4309find_backrefs (char * out, struct rexp_node * rexp,
4310 struct re_se_params * params)
4311#else
4312static void
4313find_backrefs (out, rexp, params)
4314 char * out;
4315 struct rexp_node * rexp;
4316 struct re_se_params * params;
4317#endif
4318{
4319 if (rexp)
4320 switch (rexp->type)
4321 {
4322 case r_cset:
4323 case r_data:
4324 return;
4325 case r_alternate:
4326 case r_concat:
4327 case r_opt:
4328 case r_star:
4329 case r_2phase_star:
4330 find_backrefs (out, rexp->params.pair.left, params);
4331 find_backrefs (out, rexp->params.pair.right, params);
4332 return;
4333 case r_side_effect:
4334 if ( ((intptr_t)rexp->params.side_effect >= 0)
4335 && (params [(intptr_t)rexp->params.side_effect].se == re_se_backref))
4336 out[ params [(intptr_t)rexp->params.side_effect].op1] = 1;
4337 return;
4338 }
4339}
4340
4341
4342
4343
4344/* Returns 0 unless the pattern can match the empty string. */
4345
4346#ifdef __STDC__
4347static int
4348compute_fastset (struct re_pattern_buffer * rxb, struct rexp_node * rexp)
4349#else
4350static int
4351compute_fastset (rxb, rexp)
4352 struct re_pattern_buffer * rxb;
4353 struct rexp_node * rexp;
4354#endif
4355{
4356 if (!rexp)
4357 return 1;
4358 switch (rexp->type)
4359 {
4360 case r_data:
4361 return 1;
4362 case r_cset:
4363 {
4364 rx_bitset_union (rxb->rx.local_cset_size,
4365 rxb->fastset, rexp->params.cset);
4366 }
4367 return 0;
4368 case r_concat:
4369 return (compute_fastset (rxb, rexp->params.pair.left)
4370 && compute_fastset (rxb, rexp->params.pair.right));
4371 case r_2phase_star:
4372 compute_fastset (rxb, rexp->params.pair.left);
4373 /* compute_fastset (rxb, rexp->params.pair.right); nope... */
4374 return 1;
4375 case r_alternate:
4376 return !!(compute_fastset (rxb, rexp->params.pair.left)
4377 + compute_fastset (rxb, rexp->params.pair.right));
4378 case r_opt:
4379 case r_star:
4380 compute_fastset (rxb, rexp->params.pair.left);
4381 return 1;
4382 case r_side_effect:
4383 return 1;
4384 }
4385
4386 /* this should never happen */
4387 return 0;
4388}
4389
4390
4391
4392/* returns
4393 * 1 -- yes, definately anchored by the given side effect.
4394 * 2 -- maybe anchored, maybe the empty string.
4395 * 0 -- definately not anchored
4396 * There is simply no other possibility.
4397 */
4398
4399#ifdef __STDC__
4400static int
4401is_anchored (struct rexp_node * rexp, rx_side_effect se)
4402#else
4403static int
4404is_anchored (rexp, se)
4405 struct rexp_node * rexp;
4406 rx_side_effect se;
4407#endif
4408{
4409 if (!rexp)
4410 return 2;
4411 switch (rexp->type)
4412 {
4413 case r_cset:
4414 case r_data:
4415 return 0;
4416 case r_concat:
4417 case r_2phase_star:
4418 {
4419 int l = is_anchored (rexp->params.pair.left, se);
4420 return (l == 2 ? is_anchored (rexp->params.pair.right, se) : l);
4421 }
4422 case r_alternate:
4423 {
4424 int l = is_anchored (rexp->params.pair.left, se);
4425 int r = l ? is_anchored (rexp->params.pair.right, se) : 0;
4426
4427 if (l == r)
4428 return l;
4429 else if ((l == 0) || (r == 0))
4430 return 0;
4431 else
4432 return 2;
4433 }
4434 case r_opt:
4435 case r_star:
4436 return is_anchored (rexp->params.pair.left, se) ? 2 : 0;
4437
4438 case r_side_effect:
4439 return ((rexp->params.side_effect == se)
4440 ? 1 : 2);
4441 }
4442
4443 /* this should never happen */
4444 return 0;
4445}
4446
4447
4448
4449/* This removes register assignments that aren't required by backreferencing.
4450 * This can speed up explore_future, especially if it eliminates
4451 * non-determinism in the superstate NFA.
4452 *
4453 * NEEDED is an array of characters, presumably filled in by FIND_BACKREFS.
4454 * The non-zero elements of the array indicate which register assignments
4455 * can NOT be removed from the expression.
4456 */
4457
4458#ifdef __STDC__
4459static struct rexp_node *
4460remove_unecessary_side_effects (struct rx * rx, char * needed,
4461 struct rexp_node * rexp,
4462 struct re_se_params * params)
4463#else
4464static struct rexp_node *
4465remove_unecessary_side_effects (rx, needed, rexp, params)
4466 struct rx * rx;
4467 char * needed;
4468 struct rexp_node * rexp;
4469 struct re_se_params * params;
4470#endif
4471{
4472 struct rexp_node * l;
4473 struct rexp_node * r;
4474 if (!rexp)
4475 return 0;
4476 else
4477 switch (rexp->type)
4478 {
4479 case r_cset:
4480 case r_data:
4481 return rexp;
4482 case r_alternate:
4483 case r_concat:
4484 case r_2phase_star:
4485 l = remove_unecessary_side_effects (rx, needed,
4486 rexp->params.pair.left, params);
4487 r = remove_unecessary_side_effects (rx, needed,
4488 rexp->params.pair.right, params);
4489 if ((l && r) || (rexp->type != r_concat))
4490 {
4491 rexp->params.pair.left = l;
4492 rexp->params.pair.right = r;
4493 return rexp;
4494 }
4495 else
4496 {
4497 rexp->params.pair.left = rexp->params.pair.right = 0;
4498 rx_free_rexp (rx, rexp);
4499 return l ? l : r;
4500 }
4501 case r_opt:
4502 case r_star:
4503 l = remove_unecessary_side_effects (rx, needed,
4504 rexp->params.pair.left, params);
4505 if (l)
4506 {
4507 rexp->params.pair.left = l;
4508 return rexp;
4509 }
4510 else
4511 {
4512 rexp->params.pair.left = 0;
4513 rx_free_rexp (rx, rexp);
4514 return 0;
4515 }
4516 case r_side_effect:
4517 {
4518 int se = (intptr_t)rexp->params.side_effect;
4519 if ( (se >= 0)
4520 && ( ((enum re_side_effects)params[se].se == re_se_lparen)
4521 || ((enum re_side_effects)params[se].se == re_se_rparen))
4522 && (params [se].op1 > 0)
4523 && (!needed [params [se].op1]))
4524 {
4525 rx_free_rexp (rx, rexp);
4526 return 0;
4527 }
4528 else
4529 return rexp;
4530 }
4531 }
4532
4533 /* this should never happen */
4534 return 0;
4535}
4536
4537
4538
4539
4540#ifdef __STDC__
4541static int
4542pointless_if_repeated (struct rexp_node * node, struct re_se_params * params)
4543#else
4544static int
4545pointless_if_repeated (node, params)
4546 struct rexp_node * node;
4547 struct re_se_params * params;
4548#endif
4549{
4550 if (!node)
4551 return 1;
4552 switch (node->type)
4553 {
4554 case r_cset:
4555 return 0;
4556 case r_alternate:
4557 case r_concat:
4558 case r_2phase_star:
4559 return (pointless_if_repeated (node->params.pair.left, params)
4560 && pointless_if_repeated (node->params.pair.right, params));
4561 case r_opt:
4562 case r_star:
4563 return pointless_if_repeated (node->params.pair.left, params);
4564 case r_side_effect:
4565 switch ((enum re_side_effects) (((intptr_t)node->params.side_effect < 0)
4566 ? (enum re_side_effects) (intptr_t) node->params.side_effect
4567 : (enum re_side_effects)params[(intptr_t)node->params.side_effect].se))
4568 {
4569 case re_se_try:
4570 case re_se_at_dot:
4571 case re_se_begbuf:
4572 case re_se_hat:
4573 case re_se_wordbeg:
4574 case re_se_wordbound:
4575 case re_se_notwordbound:
4576 case re_se_wordend:
4577 case re_se_endbuf:
4578 case re_se_dollar:
4579 case re_se_fail:
4580 case re_se_win:
4581 return 1;
4582 case re_se_lparen:
4583 case re_se_rparen:
4584 case re_se_iter:
4585 case re_se_end_iter:
4586 case re_se_syntax:
4587 case re_se_not_syntax:
4588 case re_se_backref:
4589 return 0;
4590 case re_se_pushback:
4591 case re_se_push0:
4592 case re_se_pushpos:
4593 case re_se_chkpos:
4594 case re_se_poppos:
4595 case re_se_tv:
4596 case re_floogle_flap:
4597 return -1;
4598 }
4599 case r_data:
4600 default:
4601 return 0;
4602 }
4603}
4604
4605
4606
4607
4608#ifdef __STDC__
4609static int
4610registers_on_stack (struct re_pattern_buffer * rxb,
4611 struct rexp_node * rexp, int in_danger,
4612 struct re_se_params * params)
4613#else
4614static int
4615registers_on_stack (rxb, rexp, in_danger, params)
4616 struct re_pattern_buffer * rxb;
4617 struct rexp_node * rexp;
4618 int in_danger;
4619 struct re_se_params * params;
4620#endif
4621{
4622 if (!rexp)
4623 return 0;
4624 else
4625 switch (rexp->type)
4626 {
4627 case r_cset:
4628 case r_data:
4629 return 0;
4630 case r_alternate:
4631 case r_concat:
4632 return ( registers_on_stack (rxb, rexp->params.pair.left,
4633 in_danger, params)
4634 || (registers_on_stack
4635 (rxb, rexp->params.pair.right,
4636 in_danger, params)));
4637 case r_opt:
4638 return registers_on_stack (rxb, rexp->params.pair.left, 0, params);
4639 case r_star:
4640 return registers_on_stack (rxb, rexp->params.pair.left, 1, params);
4641 case r_2phase_star:
4642 return
4643 ( registers_on_stack (rxb, rexp->params.pair.left, 1, params)
4644 || registers_on_stack (rxb, rexp->params.pair.right, 1, params));
4645 case r_side_effect:
4646 {
4647 int se = (intptr_t)rexp->params.side_effect;
4648 if ( in_danger
4649 && (se >= 0)
4650 && (params [se].op1 > 0)
4651 && ( ((enum re_side_effects)params[se].se == re_se_lparen)
4652 || ((enum re_side_effects)params[se].se == re_se_rparen)))
4653 return 1;
4654 else
4655 return 0;
4656 }
4657 }
4658
4659 /* this should never happen */
4660 return 0;
4661}
4662
4663
4664
4665
4666static char idempotent_complex_se[] =
4667{
4668#define RX_WANT_SE_DEFS 1
4669#undef RX_DEF_SE
4670#undef RX_DEF_CPLX_SE
4671#define RX_DEF_SE(IDEM, NAME, VALUE)
4672#define RX_DEF_CPLX_SE(IDEM, NAME, VALUE) IDEM,
4673#include "rx.h"
4674#undef RX_DEF_SE
4675#undef RX_DEF_CPLX_SE
4676#undef RX_WANT_SE_DEFS
4677 23
4678};
4679
4680static char idempotent_se[] =
4681{
4682 13,
4683#define RX_WANT_SE_DEFS 1
4684#undef RX_DEF_SE
4685#undef RX_DEF_CPLX_SE
4686#define RX_DEF_SE(IDEM, NAME, VALUE) IDEM,
4687#define RX_DEF_CPLX_SE(IDEM, NAME, VALUE)
4688#include "rx.h"
4689#undef RX_DEF_SE
4690#undef RX_DEF_CPLX_SE
4691#undef RX_WANT_SE_DEFS
4692 42
4693};
4694
4695
4696
4697
4698
4699#ifdef __STDC__
4700static int
4701has_any_se (struct rx * rx,
4702 struct rexp_node * rexp)
4703#else
4704static int
4705has_any_se (rx, rexp)
4706 struct rx * rx;
4707 struct rexp_node * rexp;
4708#endif
4709{
4710 if (!rexp)
4711 return 0;
4712
4713 switch (rexp->type)
4714 {
4715 case r_cset:
4716 case r_data:
4717 return 0;
4718
4719 case r_side_effect:
4720 return 1;
4721
4722 case r_2phase_star:
4723 case r_concat:
4724 case r_alternate:
4725 return
4726 ( has_any_se (rx, rexp->params.pair.left)
4727 || has_any_se (rx, rexp->params.pair.right));
4728
4729 case r_opt:
4730 case r_star:
4731 return has_any_se (rx, rexp->params.pair.left);
4732 }
4733
4734 /* this should never happen */
4735 return 0;
4736}
4737
4738
4739
4740
4741/* This must be called AFTER `convert_hard_loops' for a given REXP. */
4742#ifdef __STDC__
4743static int
4744has_non_idempotent_epsilon_path (struct rx * rx,
4745 struct rexp_node * rexp,
4746 struct re_se_params * params)
4747#else
4748static int
4749has_non_idempotent_epsilon_path (rx, rexp, params)
4750 struct rx * rx;
4751 struct rexp_node * rexp;
4752 struct re_se_params * params;
4753#endif
4754{
4755 if (!rexp)
4756 return 0;
4757
4758 switch (rexp->type)
4759 {
4760 case r_cset:
4761 case r_data:
4762 case r_star:
4763 return 0;
4764
4765 case r_side_effect:
4766 return
4767 !((intptr_t)rexp->params.side_effect > 0
4768 ? idempotent_complex_se [ params [(intptr_t)rexp->params.side_effect].se ]
4769 : idempotent_se [-(intptr_t)rexp->params.side_effect]);
4770
4771 case r_alternate:
4772 return
4773 ( has_non_idempotent_epsilon_path (rx,
4774 rexp->params.pair.left, params)
4775 || has_non_idempotent_epsilon_path (rx,
4776 rexp->params.pair.right, params));
4777
4778 case r_2phase_star:
4779 case r_concat:
4780 return
4781 ( has_non_idempotent_epsilon_path (rx,
4782 rexp->params.pair.left, params)
4783 && has_non_idempotent_epsilon_path (rx,
4784 rexp->params.pair.right, params));
4785
4786 case r_opt:
4787 return has_non_idempotent_epsilon_path (rx,
4788 rexp->params.pair.left, params);
4789 }
4790
4791 /* this should never happen */
4792 return 0;
4793}
4794
4795
4796
4797
4798/* This computes rougly what it's name suggests. It can (and does) go wrong
4799 * in the direction of returning spurious 0 without causing disasters.
4800 */
4801#ifdef __STDC__
4802static int
4803begins_with_complex_se (struct rx * rx, struct rexp_node * rexp)
4804#else
4805static int
4806begins_with_complex_se (rx, rexp)
4807 struct rx * rx;
4808 struct rexp_node * rexp;
4809#endif
4810{
4811 if (!rexp)
4812 return 0;
4813
4814 switch (rexp->type)
4815 {
4816 case r_cset:
4817 case r_data:
4818 return 0;
4819
4820 case r_side_effect:
4821 return ((intptr_t)rexp->params.side_effect >= 0);
4822
4823 case r_alternate:
4824 return
4825 ( begins_with_complex_se (rx, rexp->params.pair.left)
4826 && begins_with_complex_se (rx, rexp->params.pair.right));
4827
4828
4829 case r_concat:
4830 return has_any_se (rx, rexp->params.pair.left);
4831 case r_opt:
4832 case r_star:
4833 case r_2phase_star:
4834 return 0;
4835 }
4836
4837 /* this should never happen */
4838 return 0;
4839}
4840
4841
4842
4843/* This destructively removes some of the re_se_tv side effects from
4844 * a rexp tree. In particular, during parsing re_se_tv was inserted on the
4845 * right half of every | to guarantee that posix path preference could be
4846 * honored. This function removes some which it can be determined aren't
4847 * needed.
4848 */
4849
4850#ifdef __STDC__
4851static void
4852speed_up_alt (struct rx * rx,
4853 struct rexp_node * rexp,
4854 int unposix)
4855#else
4856static void
4857speed_up_alt (rx, rexp, unposix)
4858 struct rx * rx;
4859 struct rexp_node * rexp;
4860 int unposix;
4861#endif
4862{
4863 if (!rexp)
4864 return;
4865
4866 switch (rexp->type)
4867 {
4868 case r_cset:
4869 case r_data:
4870 case r_side_effect:
4871 return;
4872
4873 case r_opt:
4874 case r_star:
4875 speed_up_alt (rx, rexp->params.pair.left, unposix);
4876 return;
4877
4878 case r_2phase_star:
4879 case r_concat:
4880 speed_up_alt (rx, rexp->params.pair.left, unposix);
4881 speed_up_alt (rx, rexp->params.pair.right, unposix);
4882 return;
4883
4884 case r_alternate:
4885 /* the right child is guaranteed to be (concat re_se_tv <subexp>) */
4886
4887 speed_up_alt (rx, rexp->params.pair.left, unposix);
4888 speed_up_alt (rx, rexp->params.pair.right->params.pair.right, unposix);
4889
4890 if ( unposix
4891 || (begins_with_complex_se
4892 (rx, rexp->params.pair.right->params.pair.right))
4893 || !( has_any_se (rx, rexp->params.pair.right->params.pair.right)
4894 || has_any_se (rx, rexp->params.pair.left)))
4895 {
4896 struct rexp_node * conc = rexp->params.pair.right;
4897 rexp->params.pair.right = conc->params.pair.right;
4898 conc->params.pair.right = 0;
4899 rx_free_rexp (rx, conc);
4900 }
4901 }
4902}
4903
4904
4905
4906
4907
4908
4909/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
4910 Returns one of error codes defined in `regex.h', or zero for success.
4911
4912 Assumes the `allocated' (and perhaps `buffer') and `translate'
4913 fields are set in BUFP on entry.
4914
4915 If it succeeds, results are put in BUFP (if it returns an error, the
4916 contents of BUFP are undefined):
4917 `buffer' is the compiled pattern;
4918 `syntax' is set to SYNTAX;
4919 `used' is set to the length of the compiled pattern;
4920 `fastmap_accurate' is set to zero;
4921 `re_nsub' is set to the number of groups in PATTERN;
4922 `not_bol' and `not_eol' are set to zero.
4923
4924 The `fastmap' and `newline_anchor' fields are neither
4925 examined nor set. */
4926
4927
4928
4929#ifdef __STDC__
4930RX_DECL reg_errcode_t
4931rx_compile (__const__ char *pattern, int size,
4932 reg_syntax_t syntax,
4933 struct re_pattern_buffer * rxb)
4934#else
4935RX_DECL reg_errcode_t
4936rx_compile (pattern, size, syntax, rxb)
4937 __const__ char *pattern;
4938 int size;
4939 reg_syntax_t syntax;
4940 struct re_pattern_buffer * rxb;
4941#endif
4942{
4943 RX_subset
4944 inverse_translate [CHAR_SET_SIZE * rx_bitset_numb_subsets(CHAR_SET_SIZE)];
4945 char
4946 validate_inv_tr [CHAR_SET_SIZE * rx_bitset_numb_subsets(CHAR_SET_SIZE)];
4947
4948 /* We fetch characters from PATTERN here. Even though PATTERN is
4949 `char *' (i.e., signed), we declare these variables as unsigned, so
4950 they can be reliably used as array indices. */
4951 register unsigned char c, c1;
4952
4953 /* A random tempory spot in PATTERN. */
4954 __const__ char *p1;
4955
4956 /* Keeps track of unclosed groups. */
4957 compile_stack_type compile_stack;
4958
4959 /* Points to the current (ending) position in the pattern. */
4960 __const__ char *p = pattern;
4961 __const__ char *pend = pattern + size;
4962
4963 /* How to translate the characters in the pattern. */
4964 unsigned char *translate = (rxb->translate
4965 ? rxb->translate
4966 : rx_id_translation);
4967
4968 /* When parsing is done, this will hold the expression tree. */
4969 struct rexp_node * rexp = 0;
4970
4971 /* In the midst of compilation, this holds onto the regexp
4972 * first parst while rexp goes on to aquire additional constructs.
4973 */
4974 struct rexp_node * orig_rexp = 0;
4975 struct rexp_node * fewer_side_effects = 0;
4976
4977 /* This and top_expression are saved on the compile stack. */
4978 struct rexp_node ** top_expression = &rexp;
4979 struct rexp_node ** last_expression = top_expression;
4980
4981 /* Parameter to `goto append_node' */
4982 struct rexp_node * append;
4983
4984 /* Counts open-groups as they are encountered. This is the index of the
4985 * innermost group being compiled.
4986 */
4987 regnum_t regnum = 0;
4988
4989 /* Place in the uncompiled pattern (i.e., the {) to
4990 * which to go back if the interval is invalid.
4991 */
4992 __const__ char *beg_interval;
4993
4994 struct re_se_params * params = 0;
4995 int paramc = 0; /* How many complex side effects so far? */
4996
4997 rx_side_effect side; /* param to `goto add_side_effect' */
4998
4999 bzero (validate_inv_tr, sizeof (validate_inv_tr));
5000
5001 rxb->rx.instruction_table = rx_id_instruction_table;
5002
5003
5004 /* Initialize the compile stack. */
5005 compile_stack.stack = (( compile_stack_elt_t *) malloc ((INIT_COMPILE_STACK_SIZE) * sizeof ( compile_stack_elt_t)));
5006 if (compile_stack.stack == 0)
5007 return REG_ESPACE;
5008
5009 compile_stack.size = INIT_COMPILE_STACK_SIZE;
5010 compile_stack.avail = 0;
5011
5012 /* Initialize the pattern buffer. */
5013 rxb->rx.cache = &default_cache;
5014 rxb->syntax = syntax;
5015 rxb->fastmap_accurate = 0;
5016 rxb->not_bol = rxb->not_eol = 0;
5017 rxb->least_subs = 0;
5018
5019 /* Always count groups, whether or not rxb->no_sub is set.
5020 * The whole pattern is implicitly group 0, so counting begins
5021 * with 1.
5022 */
5023 rxb->re_nsub = 0;
5024
5025#if !defined (emacs) && !defined (SYNTAX_TABLE)
5026 /* Initialize the syntax table. */
5027 init_syntax_once ();
5028#endif
5029
5030 /* Loop through the uncompiled pattern until we're at the end. */
5031 while (p != pend)
5032 {
5033 PATFETCH (c);
5034
5035 switch (c)
5036 {
5037 case '^':
5038 {
5039 if ( /* If at start of pattern, it's an operator. */
5040 p == pattern + 1
5041 /* If context independent, it's an operator. */
5042 || syntax & RE_CONTEXT_INDEP_ANCHORS
5043 /* Otherwise, depends on what's come before. */
5044 || at_begline_loc_p (pattern, p, syntax))
5045 {
5046 struct rexp_node * n
5047 = rx_mk_r_side_effect (&rxb->rx, (rx_side_effect)re_se_hat);
5048 if (!n)
5049 return REG_ESPACE;
5050 append = n;
5051 goto append_node;
5052 }
5053 else
5054 goto normal_char;
5055 }
5056 break;
5057
5058
5059 case '$':
5060 {
5061 if ( /* If at end of pattern, it's an operator. */
5062 p == pend
5063 /* If context independent, it's an operator. */
5064 || syntax & RE_CONTEXT_INDEP_ANCHORS
5065 /* Otherwise, depends on what's next. */
5066 || at_endline_loc_p (p, pend, syntax))
5067 {
5068 struct rexp_node * n
5069 = rx_mk_r_side_effect (&rxb->rx, (rx_side_effect)re_se_dollar);
5070 if (!n)
5071 return REG_ESPACE;
5072 append = n;
5073 goto append_node;
5074 }
5075 else
5076 goto normal_char;
5077 }
5078 break;
5079
5080
5081 case '+':
5082 case '?':
5083 if ((syntax & RE_BK_PLUS_QM)
5084 || (syntax & RE_LIMITED_OPS))
5085 goto normal_char;
5086
5087 handle_plus:
5088 case '*':
5089 /* If there is no previous pattern... */
5090 if (pointless_if_repeated (*last_expression, params))
5091 {
5092 if (syntax & RE_CONTEXT_INVALID_OPS)
5093 return REG_BADRPT;
5094 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
5095 goto normal_char;
5096 }
5097
5098 {
5099 /* 1 means zero (many) matches is allowed. */
5100 char zero_times_ok = 0, many_times_ok = 0;
5101
5102 /* If there is a sequence of repetition chars, collapse it
5103 down to just one (the right one). We can't combine
5104 interval operators with these because of, e.g., `a{2}*',
5105 which should only match an even number of `a's. */
5106
5107 for (;;)
5108 {
5109 zero_times_ok |= c != '+';
5110 many_times_ok |= c != '?';
5111
5112 if (p == pend)
5113 break;
5114
5115 PATFETCH (c);
5116
5117 if (c == '*'
5118 || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
5119 ;
5120
5121 else if (syntax & RE_BK_PLUS_QM && c == '\\')
5122 {
5123 if (p == pend) return REG_EESCAPE;
5124
5125 PATFETCH (c1);
5126 if (!(c1 == '+' || c1 == '?'))
5127 {
5128 PATUNFETCH;
5129 PATUNFETCH;
5130 break;
5131 }
5132
5133 c = c1;
5134 }
5135 else
5136 {
5137 PATUNFETCH;
5138 break;
5139 }
5140
5141 /* If we get here, we found another repeat character. */
5142 }
5143
5144 /* Star, etc. applied to an empty pattern is equivalent
5145 to an empty pattern. */
5146 if (!last_expression)
5147 break;
5148
5149 /* Now we know whether or not zero matches is allowed
5150 * and also whether or not two or more matches is allowed.
5151 */
5152
5153 {
5154 struct rexp_node * inner_exp = *last_expression;
5155 int need_sync = 0;
5156
5157 if (many_times_ok
5158 && has_non_idempotent_epsilon_path (&rxb->rx,
5159 inner_exp, params))
5160 {
5161 struct rexp_node * pusher
5162 = rx_mk_r_side_effect (&rxb->rx,
5163 (rx_side_effect)re_se_pushpos);
5164 struct rexp_node * checker
5165 = rx_mk_r_side_effect (&rxb->rx,
5166 (rx_side_effect)re_se_chkpos);
5167 struct rexp_node * pushback
5168 = rx_mk_r_side_effect (&rxb->rx,
5169 (rx_side_effect)re_se_pushback);
5170 rx_Bitset cs = rx_cset (&rxb->rx);
5171 struct rexp_node * lit_t = rx_mk_r_cset (&rxb->rx, cs);
5172 struct rexp_node * fake_state
5173 = rx_mk_r_concat (&rxb->rx, pushback, lit_t);
5174 struct rexp_node * phase2
5175 = rx_mk_r_concat (&rxb->rx, checker, fake_state);
5176 struct rexp_node * popper
5177 = rx_mk_r_side_effect (&rxb->rx,
5178 (rx_side_effect)re_se_poppos);
5179 struct rexp_node * star
5180 = rx_mk_r_2phase_star (&rxb->rx, inner_exp, phase2);
5181 struct rexp_node * a
5182 = rx_mk_r_concat (&rxb->rx, pusher, star);
5183 struct rexp_node * whole_thing
5184 = rx_mk_r_concat (&rxb->rx, a, popper);
5185 if (!(pusher && star && pushback && lit_t && fake_state
5186 && lit_t && phase2 && checker && popper
5187 && a && whole_thing))
5188 return REG_ESPACE;
5189 RX_bitset_enjoin (cs, 't');
5190 *last_expression = whole_thing;
5191 }
5192 else
5193 {
5194 struct rexp_node * star =
5195 (many_times_ok ? rx_mk_r_star : rx_mk_r_opt)
5196 (&rxb->rx, *last_expression);
5197 if (!star)
5198 return REG_ESPACE;
5199 *last_expression = star;
5200 need_sync = has_any_se (&rxb->rx, *last_expression);
5201 }
5202 if (!zero_times_ok)
5203 {
5204 struct rexp_node * concat
5205 = rx_mk_r_concat (&rxb->rx, inner_exp,
5206 rx_copy_rexp (&rxb->rx,
5207 *last_expression));
5208 if (!concat)
5209 return REG_ESPACE;
5210 *last_expression = concat;
5211 }
5212 if (need_sync)
5213 {
5214 intptr_t sync_se = paramc;
5215 params = (params
5216 ? ((struct re_se_params *)
5217 realloc (params,
5218 sizeof (*params) * (1 + paramc)))
5219 : ((struct re_se_params *)
5220 malloc (sizeof (*params))));
5221 if (!params)
5222 return REG_ESPACE;
5223 ++paramc;
5224 params [sync_se].se = re_se_tv;
5225 side = (rx_side_effect)sync_se;
5226 goto add_side_effect;
5227 }
5228 }
5229 /* The old regex.c used to optimize `.*\n'.
5230 * Maybe rx should too?
5231 */
5232 }
5233 break;
5234
5235
5236 case '.':
5237 {
5238 rx_Bitset cs = rx_cset (&rxb->rx);
5239 struct rexp_node * n = rx_mk_r_cset (&rxb->rx, cs);
5240 if (!(cs && n))
5241 return REG_ESPACE;
5242
5243 rx_bitset_universe (rxb->rx.local_cset_size, cs);
5244 if (!(rxb->syntax & RE_DOT_NEWLINE))
5245 RX_bitset_remove (cs, '\n');
5246 if (!(rxb->syntax & RE_DOT_NOT_NULL))
5247 RX_bitset_remove (cs, 0);
5248
5249 append = n;
5250 goto append_node;
5251 break;
5252 }
5253
5254
5255 case '[':
5256 if (p == pend) return REG_EBRACK;
5257 {
5258 boolean had_char_class = false;
5259 rx_Bitset cs = rx_cset (&rxb->rx);
5260 struct rexp_node * node = rx_mk_r_cset (&rxb->rx, cs);
5261 int is_inverted = *p == '^';
5262
5263 if (!(node && cs))
5264 return REG_ESPACE;
5265
5266 /* This branch of the switch is normally exited with
5267 *`goto append_node'
5268 */
5269 append = node;
5270
5271 if (is_inverted)
5272 p++;
5273
5274 /* Remember the first position in the bracket expression. */
5275 p1 = p;
5276
5277 /* Read in characters and ranges, setting map bits. */
5278 for (;;)
5279 {
5280 if (p == pend) return REG_EBRACK;
5281
5282 PATFETCH (c);
5283
5284 /* \ might escape characters inside [...] and [^...]. */
5285 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
5286 {
5287 if (p == pend) return REG_EESCAPE;
5288
5289 PATFETCH (c1);
5290 {
5291 rx_Bitset it = inverse_translation (rxb,
5292 validate_inv_tr,
5293 inverse_translate,
5294 translate,
5295 c1);
5296 rx_bitset_union (rxb->rx.local_cset_size, cs, it);
5297 }
5298 continue;
5299 }
5300
5301 /* Could be the end of the bracket expression. If it's
5302 not (i.e., when the bracket expression is `[]' so
5303 far), the ']' character bit gets set way below. */
5304 if (c == ']' && p != p1 + 1)
5305 goto finalize_class_and_append;
5306
5307 /* Look ahead to see if it's a range when the last thing
5308 was a character class. */
5309 if (had_char_class && c == '-' && *p != ']')
5310 return REG_ERANGE;
5311
5312 /* Look ahead to see if it's a range when the last thing
5313 was a character: if this is a hyphen not at the
5314 beginning or the end of a list, then it's the range
5315 operator. */
5316 if (c == '-'
5317 && !(p - 2 >= pattern && p[-2] == '[')
5318 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
5319 && *p != ']')
5320 {
5321 reg_errcode_t ret
5322 = compile_range (rxb, cs, &p, pend, translate, syntax,
5323 inverse_translate, validate_inv_tr);
5324 if (ret != REG_NOERROR) return ret;
5325 }
5326
5327 else if (p[0] == '-' && p[1] != ']')
5328 { /* This handles ranges made up of characters only. */
5329 reg_errcode_t ret;
5330
5331 /* Move past the `-'. */
5332 PATFETCH (c1);
5333
5334 ret = compile_range (rxb, cs, &p, pend, translate, syntax,
5335 inverse_translate, validate_inv_tr);
5336 if (ret != REG_NOERROR) return ret;
5337 }
5338
5339 /* See if we're at the beginning of a possible character
5340 class. */
5341
5342 else if ((syntax & RE_CHAR_CLASSES)
5343 && (c == '[') && (*p == ':'))
5344 {
5345 char str[CHAR_CLASS_MAX_LENGTH + 1];
5346
5347 PATFETCH (c);
5348 c1 = 0;
5349
5350 /* If pattern is `[[:'. */
5351 if (p == pend) return REG_EBRACK;
5352
5353 for (;;)
5354 {
5355 PATFETCH (c);
5356 if (c == ':' || c == ']' || p == pend
5357 || c1 == CHAR_CLASS_MAX_LENGTH)
5358 break;
5359 str[c1++] = c;
5360 }
5361 str[c1] = '\0';
5362
5363 /* If isn't a word bracketed by `[:' and:`]':
5364 undo the ending character, the letters, and leave
5365 the leading `:' and `[' (but set bits for them). */
5366 if (c == ':' && *p == ']')
5367 {
5368 int ch;
5369 boolean is_alnum = !strcmp (str, "alnum");
5370 boolean is_alpha = !strcmp (str, "alpha");
5371 boolean is_blank = !strcmp (str, "blank");
5372 boolean is_cntrl = !strcmp (str, "cntrl");
5373 boolean is_digit = !strcmp (str, "digit");
5374 boolean is_graph = !strcmp (str, "graph");
5375 boolean is_lower = !strcmp (str, "lower");
5376 boolean is_print = !strcmp (str, "print");
5377 boolean is_punct = !strcmp (str, "punct");
5378 boolean is_space = !strcmp (str, "space");
5379 boolean is_upper = !strcmp (str, "upper");
5380 boolean is_xdigit = !strcmp (str, "xdigit");
5381
5382 if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
5383
5384 /* Throw away the ] at the end of the character
5385 class. */
5386 PATFETCH (c);
5387
5388 if (p == pend) return REG_EBRACK;
5389
5390 for (ch = 0; ch < 1 << CHARBITS; ch++)
5391 {
5392 if ( (is_alnum && isalnum (ch))
5393 || (is_alpha && isalpha (ch))
5394 || (is_blank && isblank (ch))
5395 || (is_cntrl && iscntrl (ch))
5396 || (is_digit && isdigit (ch))
5397 || (is_graph && isgraph (ch))
5398 || (is_lower && islower (ch))
5399 || (is_print && isprint (ch))
5400 || (is_punct && ispunct (ch))
5401 || (is_space && isspace (ch))
5402 || (is_upper && isupper (ch))
5403 || (is_xdigit && isxdigit (ch)))
5404 {
5405 rx_Bitset it =
5406 inverse_translation (rxb,
5407 validate_inv_tr,
5408 inverse_translate,
5409 translate,
5410 ch);
5411 rx_bitset_union (rxb->rx.local_cset_size,
5412 cs, it);
5413 }
5414 }
5415 had_char_class = true;
5416 }
5417 else
5418 {
5419 c1++;
5420 while (c1--)
5421 PATUNFETCH;
5422 {
5423 rx_Bitset it =
5424 inverse_translation (rxb,
5425 validate_inv_tr,
5426 inverse_translate,
5427 translate,
5428 '[');
5429 rx_bitset_union (rxb->rx.local_cset_size,
5430 cs, it);
5431 }
5432 {
5433 rx_Bitset it =
5434 inverse_translation (rxb,
5435 validate_inv_tr,
5436 inverse_translate,
5437 translate,
5438 ':');
5439 rx_bitset_union (rxb->rx.local_cset_size,
5440 cs, it);
5441 }
5442 had_char_class = false;
5443 }
5444 }
5445 else
5446 {
5447 had_char_class = false;
5448 {
5449 rx_Bitset it = inverse_translation (rxb,
5450 validate_inv_tr,
5451 inverse_translate,
5452 translate,
5453 c);
5454 rx_bitset_union (rxb->rx.local_cset_size, cs, it);
5455 }
5456 }
5457 }
5458
5459 finalize_class_and_append:
5460 if (is_inverted)
5461 {
5462 rx_bitset_complement (rxb->rx.local_cset_size, cs);
5463 if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
5464 RX_bitset_remove (cs, '\n');
5465 }
5466 goto append_node;
5467 }
5468 break;
5469
5470
5471 case '(':
5472 if (syntax & RE_NO_BK_PARENS)
5473 goto handle_open;
5474 else
5475 goto normal_char;
5476
5477
5478 case ')':
5479 if (syntax & RE_NO_BK_PARENS)
5480 goto handle_close;
5481 else
5482 goto normal_char;
5483
5484
5485 case '\n':
5486 if (syntax & RE_NEWLINE_ALT)
5487 goto handle_alt;
5488 else
5489 goto normal_char;
5490
5491
5492 case '|':
5493 if (syntax & RE_NO_BK_VBAR)
5494 goto handle_alt;
5495 else
5496 goto normal_char;
5497
5498
5499 case '{':
5500 if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
5501 goto handle_interval;
5502 else
5503 goto normal_char;
5504
5505
5506 case '\\':
5507 if (p == pend) return REG_EESCAPE;
5508
5509 /* Do not translate the character after the \, so that we can
5510 distinguish, e.g., \B from \b, even if we normally would
5511 translate, e.g., B to b. */
5512 PATFETCH_RAW (c);
5513
5514 switch (c)
5515 {
5516 case '(':
5517 if (syntax & RE_NO_BK_PARENS)
5518 goto normal_backslash;
5519
5520 handle_open:
5521 rxb->re_nsub++;
5522 regnum++;
5523 if (COMPILE_STACK_FULL)
5524 {
5525 ((compile_stack.stack) =
5526 (compile_stack_elt_t *) realloc (compile_stack.stack, ( compile_stack.size << 1) * sizeof (
5527 compile_stack_elt_t)));
5528 if (compile_stack.stack == 0) return REG_ESPACE;
5529
5530 compile_stack.size <<= 1;
5531 }
5532
5533 if (*last_expression)
5534 {
5535 struct rexp_node * concat
5536 = rx_mk_r_concat (&rxb->rx, *last_expression, 0);
5537 if (!concat)
5538 return REG_ESPACE;
5539 *last_expression = concat;
5540 last_expression = &concat->params.pair.right;
5541 }
5542
5543 /*
5544 * These are the values to restore when we hit end of this
5545 * group.
5546 */
5547 COMPILE_STACK_TOP.top_expression = top_expression;
5548 COMPILE_STACK_TOP.last_expression = last_expression;
5549 COMPILE_STACK_TOP.regnum = regnum;
5550
5551 compile_stack.avail++;
5552
5553 top_expression = last_expression;
5554 break;
5555
5556
5557 case ')':
5558 if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
5559
5560 handle_close:
5561 /* See similar code for backslashed left paren above. */
5562 if (COMPILE_STACK_EMPTY){
5563 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD){
5564 goto normal_char;
5565 }
5566 else{
5567 return REG_ERPAREN;
5568 }
5569 }
5570
5571 /* Since we just checked for an empty stack above, this
5572 ``can't happen''. */
5573
5574 {
5575 /* We don't just want to restore into `regnum', because
5576 later groups should continue to be numbered higher,
5577 as in `(ab)c(de)' -- the second group is #2. */
5578 regnum_t this_group_regnum;
5579 struct rexp_node ** inner = top_expression;
5580
5581 compile_stack.avail--;
5582 top_expression = COMPILE_STACK_TOP.top_expression;
5583 last_expression = COMPILE_STACK_TOP.last_expression;
5584 this_group_regnum = COMPILE_STACK_TOP.regnum;
5585 {
5586 intptr_t left_se = paramc;
5587 intptr_t right_se = paramc + 1;
5588
5589 params = (params
5590 ? ((struct re_se_params *)
5591 realloc (params,
5592 (paramc + 2) * sizeof (params[0])))
5593 : ((struct re_se_params *)
5594 malloc (2 * sizeof (params[0]))));
5595 if (!params)
5596 return REG_ESPACE;
5597 paramc += 2;
5598
5599 params[left_se].se = re_se_lparen;
5600 params[left_se].op1 = this_group_regnum;
5601 params[right_se].se = re_se_rparen;
5602 params[right_se].op1 = this_group_regnum;
5603 {
5604 struct rexp_node * left
5605 = rx_mk_r_side_effect (&rxb->rx,
5606 (rx_side_effect)left_se);
5607 struct rexp_node * right
5608 = rx_mk_r_side_effect (&rxb->rx,
5609 (rx_side_effect)right_se);
5610 struct rexp_node * c1
5611 = (*inner
5612 ? rx_mk_r_concat (&rxb->rx, left, *inner) : left);
5613 struct rexp_node * c2
5614 = rx_mk_r_concat (&rxb->rx, c1, right);
5615 if (!(left && right && c1 && c2))
5616 return REG_ESPACE;
5617 *inner = c2;
5618 }
5619 }
5620 break;
5621 }
5622
5623 case '|': /* `\|'. */
5624 if ((syntax & RE_LIMITED_OPS) || (syntax & RE_NO_BK_VBAR))
5625 goto normal_backslash;
5626 handle_alt:
5627 if (syntax & RE_LIMITED_OPS)
5628 goto normal_char;
5629
5630 {
5631 struct rexp_node * alt
5632 = rx_mk_r_alternate (&rxb->rx, *top_expression, 0);
5633 if (!alt)
5634 return REG_ESPACE;
5635 *top_expression = alt;
5636 last_expression = &alt->params.pair.right;
5637 {
5638 intptr_t sync_se = paramc;
5639
5640 params = (params
5641 ? ((struct re_se_params *)
5642 realloc (params,
5643 (paramc + 1) * sizeof (params[0])))
5644 : ((struct re_se_params *)
5645 malloc (sizeof (params[0]))));
5646 if (!params)
5647 return REG_ESPACE;
5648 ++paramc;
5649
5650 params[sync_se].se = re_se_tv;
5651 {
5652 struct rexp_node * sync
5653 = rx_mk_r_side_effect (&rxb->rx,
5654 (rx_side_effect)sync_se);
5655 struct rexp_node * conc
5656 = rx_mk_r_concat (&rxb->rx, sync, 0);
5657
5658 if (!sync || !conc)
5659 return REG_ESPACE;
5660
5661 *last_expression = conc;
5662 last_expression = &conc->params.pair.right;
5663 }
5664 }
5665 }
5666 break;
5667
5668
5669 case '{':
5670 /* If \{ is a literal. */
5671 if (!(syntax & RE_INTERVALS)
5672 /* If we're at `\{' and it's not the open-interval
5673 operator. */
5674 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
5675 || (p - 2 == pattern && p == pend))
5676 goto normal_backslash;
5677
5678 handle_interval:
5679 {
5680 /* If got here, then the syntax allows intervals. */
5681
5682 /* At least (most) this many matches must be made. */
5683 int lower_bound = -1, upper_bound = -1;
5684
5685 beg_interval = p - 1;
5686
5687 if (p == pend)
5688 {
5689 if (syntax & RE_NO_BK_BRACES)
5690 goto unfetch_interval;
5691 else
5692 return REG_EBRACE;
5693 }
5694
5695 GET_UNSIGNED_NUMBER (lower_bound);
5696
5697 if (c == ',')
5698 {
5699 GET_UNSIGNED_NUMBER (upper_bound);
5700 if (upper_bound < 0) upper_bound = RE_DUP_MAX;
5701 }
5702 else
5703 /* Interval such as `{1}' => match exactly once. */
5704 upper_bound = lower_bound;
5705
5706 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
5707 || lower_bound > upper_bound)
5708 {
5709 if (syntax & RE_NO_BK_BRACES)
5710 goto unfetch_interval;
5711 else
5712 return REG_BADBR;
5713 }
5714
5715 if (!(syntax & RE_NO_BK_BRACES))
5716 {
5717 if (c != '\\') return REG_EBRACE;
5718 PATFETCH (c);
5719 }
5720
5721 if (c != '}')
5722 {
5723 if (syntax & RE_NO_BK_BRACES)
5724 goto unfetch_interval;
5725 else
5726 return REG_BADBR;
5727 }
5728
5729 /* We just parsed a valid interval. */
5730
5731 /* If it's invalid to have no preceding re. */
5732 if (pointless_if_repeated (*last_expression, params))
5733 {
5734 if (syntax & RE_CONTEXT_INVALID_OPS)
5735 return REG_BADRPT;
5736 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
5737 goto unfetch_interval;
5738 /* was: else laststart = b; */
5739 }
5740
5741 /* If the upper bound is zero, don't want to iterate
5742 * at all.
5743 */
5744 if (upper_bound == 0)
5745 {
5746 if (*last_expression)
5747 {
5748 rx_free_rexp (&rxb->rx, *last_expression);
5749 *last_expression = 0;
5750 }
5751 }
5752 else
5753 /* Otherwise, we have a nontrivial interval. */
5754 {
5755 intptr_t iter_se = paramc;
5756 intptr_t end_se = paramc + 1;
5757 params = (params
5758 ? ((struct re_se_params *)
5759 realloc (params,
5760 sizeof (*params) * (2 + paramc)))
5761 : ((struct re_se_params *)
5762 malloc (2 * sizeof (*params))));
5763 if (!params)
5764 return REG_ESPACE;
5765 paramc += 2;
5766 params [iter_se].se = re_se_iter;
5767 params [iter_se].op1 = lower_bound;
5768 params[iter_se].op2 = upper_bound;
5769
5770 params[end_se].se = re_se_end_iter;
5771 params[end_se].op1 = lower_bound;
5772 params[end_se].op2 = upper_bound;
5773 {
5774 struct rexp_node * push0
5775 = rx_mk_r_side_effect (&rxb->rx,
5776 (rx_side_effect)re_se_push0);
5777 struct rexp_node * start_one_iter
5778 = rx_mk_r_side_effect (&rxb->rx,
5779 (rx_side_effect)iter_se);
5780 struct rexp_node * phase1
5781 = rx_mk_r_concat (&rxb->rx, start_one_iter,
5782 *last_expression);
5783 struct rexp_node * pushback
5784 = rx_mk_r_side_effect (&rxb->rx,
5785 (rx_side_effect)re_se_pushback);
5786 rx_Bitset cs = rx_cset (&rxb->rx);
5787 struct rexp_node * lit_t
5788 = rx_mk_r_cset (&rxb->rx, cs);
5789 struct rexp_node * phase2
5790 = rx_mk_r_concat (&rxb->rx, pushback, lit_t);
5791 struct rexp_node * loop
5792 = rx_mk_r_2phase_star (&rxb->rx, phase1, phase2);
5793 struct rexp_node * push_n_loop
5794 = rx_mk_r_concat (&rxb->rx, push0, loop);
5795 struct rexp_node * final_test
5796 = rx_mk_r_side_effect (&rxb->rx,
5797 (rx_side_effect)end_se);
5798 struct rexp_node * full_exp
5799 = rx_mk_r_concat (&rxb->rx, push_n_loop, final_test);
5800
5801 if (!(push0 && start_one_iter && phase1
5802 && pushback && lit_t && phase2
5803 && loop && push_n_loop && final_test && full_exp))
5804 return REG_ESPACE;
5805
5806 RX_bitset_enjoin(cs, 't');
5807
5808 *last_expression = full_exp;
5809 }
5810 }
5811 beg_interval = 0;
5812 }
5813 break;
5814
5815 unfetch_interval:
5816 /* If an invalid interval, match the characters as literals. */
5817 p = beg_interval;
5818 beg_interval = 0;
5819
5820 /* normal_char and normal_backslash need `c'. */
5821 PATFETCH (c);
5822
5823 if (!(syntax & RE_NO_BK_BRACES))
5824 {
5825 if (p > pattern && p[-1] == '\\')
5826 goto normal_backslash;
5827 }
5828 goto normal_char;
5829
5830#ifdef emacs
5831 /* There is no way to specify the before_dot and after_dot
5832 operators. rms says this is ok. --karl */
5833 case '=':
5834 side = (rx_side_effect)rx_se_at_dot;
5835 goto add_side_effect;
5836 break;
5837
5838 case 's':
5839 case 'S':
5840 {
5841 rx_Bitset cs = rx_cset (&rxb->rx);
5842 struct rexp_node * set = rx_mk_r_cset (&rxb->rx, cs);
5843 if (!(cs && set))
5844 return REG_ESPACE;
5845 if (c == 'S')
5846 rx_bitset_universe (rxb->rx.local_cset_size, cs);
5847
5848 PATFETCH (c);
5849 {
5850 int x;
5851 enum syntaxcode code = syntax_spec_code [c];
5852 for (x = 0; x < 256; ++x)
5853 {
5854
5855 if (SYNTAX (x) == code)
5856 {
5857 rx_Bitset it =
5858 inverse_translation (rxb, validate_inv_tr,
5859 inverse_translate,
5860 translate, x);
5861 rx_bitset_xor (rxb->rx.local_cset_size, cs, it);
5862 }
5863 }
5864 }
5865 append = set;
5866 goto append_node;
5867 }
5868 break;
5869#endif /* emacs */
5870
5871
5872 case 'w':
5873 case 'W':
5874 {
5875 rx_Bitset cs = rx_cset (&rxb->rx);
5876 struct rexp_node * n = (cs ? rx_mk_r_cset (&rxb->rx, cs) : 0);
5877 if (!(cs && n))
5878 return REG_ESPACE;
5879 if (c == 'W')
5880 rx_bitset_universe (rxb->rx.local_cset_size ,cs);
5881 {
5882 int x;
5883 for (x = rxb->rx.local_cset_size - 1; x > 0; --x)
5884 if (SYNTAX(x) & Sword)
5885 RX_bitset_toggle (cs, x);
5886 }
5887 append = n;
5888 goto append_node;
5889 }
5890 break;
5891
5892/* With a little extra work, some of these side effects could be optimized
5893 * away (basicly by looking at what we already know about the surrounding
5894 * chars).
5895 */
5896 case '<':
5897 side = (rx_side_effect)re_se_wordbeg;
5898 goto add_side_effect;
5899 break;
5900
5901 case '>':
5902 side = (rx_side_effect)re_se_wordend;
5903 goto add_side_effect;
5904 break;
5905
5906 case 'b':
5907 side = (rx_side_effect)re_se_wordbound;
5908 goto add_side_effect;
5909 break;
5910
5911 case 'B':
5912 side = (rx_side_effect)re_se_notwordbound;
5913 goto add_side_effect;
5914 break;
5915
5916 case '`':
5917 side = (rx_side_effect)re_se_begbuf;
5918 goto add_side_effect;
5919 break;
5920
5921 case '\'':
5922 side = (rx_side_effect)re_se_endbuf;
5923 goto add_side_effect;
5924 break;
5925
5926 add_side_effect:
5927 {
5928 struct rexp_node * se
5929 = rx_mk_r_side_effect (&rxb->rx, side);
5930 if (!se)
5931 return REG_ESPACE;
5932 append = se;
5933 goto append_node;
5934 }
5935 break;
5936
5937 case '1': case '2': case '3': case '4': case '5':
5938 case '6': case '7': case '8': case '9':
5939 if (syntax & RE_NO_BK_REFS)
5940 goto normal_char;
5941
5942 c1 = c - '0';
5943
5944 if (c1 > regnum)
5945 return REG_ESUBREG;
5946
5947 /* Can't back reference to a subexpression if inside of it. */
5948 if (group_in_compile_stack (compile_stack, c1))
5949 return REG_ESUBREG;
5950
5951 {
5952 intptr_t backref_se = paramc;
5953 params = (params
5954 ? ((struct re_se_params *)
5955 realloc (params,
5956 sizeof (*params) * (1 + paramc)))
5957 : ((struct re_se_params *)
5958 malloc (sizeof (*params))));
5959 if (!params)
5960 return REG_ESPACE;
5961 ++paramc;
5962 params[backref_se].se = re_se_backref;
5963 params[backref_se].op1 = c1;
5964 side = (rx_side_effect)backref_se;
5965 goto add_side_effect;
5966 }
5967 break;
5968
5969 case '+':
5970 case '?':
5971 if (syntax & RE_BK_PLUS_QM)
5972 goto handle_plus;
5973 else
5974 goto normal_backslash;
5975
5976 default:
5977 normal_backslash:
5978 /* You might think it would be useful for \ to mean
5979 not to translate; but if we don't translate it
5980 it will never match anything. */
5981 c = TRANSLATE (c);
5982 goto normal_char;
5983 }
5984 break;
5985
5986
5987 default:
5988 /* Expects the character in `c'. */
5989 normal_char:
5990 {
5991 rx_Bitset cs = rx_cset(&rxb->rx);
5992 struct rexp_node * match = rx_mk_r_cset (&rxb->rx, cs);
5993 rx_Bitset it;
5994 if (!(cs && match))
5995 return REG_ESPACE;
5996 it = inverse_translation (rxb, validate_inv_tr,
5997 inverse_translate, translate, c);
5998 rx_bitset_union (CHAR_SET_SIZE, cs, it);
5999 append = match;
6000
6001 append_node:
6002 /* This genericly appends the rexp APPEND to *LAST_EXPRESSION
6003 * and then parses the next character normally.
6004 */
6005 if (*last_expression)
6006 {
6007 struct rexp_node * concat
6008 = rx_mk_r_concat (&rxb->rx, *last_expression, append);
6009 if (!concat)
6010 return REG_ESPACE;
6011 *last_expression = concat;
6012 last_expression = &concat->params.pair.right;
6013 }
6014 else
6015 *last_expression = append;
6016 }
6017 } /* switch (c) */
6018 } /* while p != pend */
6019
6020
6021 {
6022 intptr_t win_se = paramc;
6023 params = (params
6024 ? ((struct re_se_params *)
6025 realloc (params,
6026 sizeof (*params) * (1 + paramc)))
6027 : ((struct re_se_params *)
6028 malloc (sizeof (*params))));
6029 if (!params)
6030 return REG_ESPACE;
6031 ++paramc;
6032 params[win_se].se = re_se_win;
6033 {
6034 struct rexp_node * se
6035 = rx_mk_r_side_effect (&rxb->rx, (rx_side_effect)win_se);
6036 struct rexp_node * concat
6037 = rx_mk_r_concat (&rxb->rx, rexp, se);
6038 if (!(se && concat))
6039 return REG_ESPACE;
6040 rexp = concat;
6041 }
6042 }
6043
6044
6045 /* Through the pattern now. */
6046
6047 if (!COMPILE_STACK_EMPTY)
6048 return REG_EPAREN;
6049
6050 free (compile_stack.stack);
6051
6052 orig_rexp = rexp;
6053#ifdef RX_DEBUG
6054 if (rx_debug_compile)
6055 {
6056 dbug_rxb = rxb;
6057 fputs ("\n\nCompiling ", stdout);
6058 fwrite (pattern, 1, size, stdout);
6059 fputs (":\n", stdout);
6060 rxb->se_params = params;
6061 print_rexp (&rxb->rx, orig_rexp, 2, re_seprint, stdout);
6062 }
6063#endif
6064 {
6065 rx_Bitset cs = rx_cset(&rxb->rx);
6066 rx_Bitset cs2 = rx_cset(&rxb->rx);
6067#ifdef __WIN32__ /* [RPAP - Feb 97: WIN32 Port] */
6068 char * se_map = (char *) _alloca (paramc);
6069#else
6070 char * se_map = (char *) alloca (paramc);
6071#endif
6072 struct rexp_node * new_rexp = 0;
6073
6074
6075 bzero (se_map, paramc);
6076 find_backrefs (se_map, rexp, params);
6077 fewer_side_effects =
6078 remove_unecessary_side_effects (&rxb->rx, se_map,
6079 rx_copy_rexp (&rxb->rx, rexp), params);
6080
6081 speed_up_alt (&rxb->rx, rexp, 0);
6082 speed_up_alt (&rxb->rx, fewer_side_effects, 1);
6083
6084 {
6085 char * syntax_parens = rxb->syntax_parens;
6086 if (syntax_parens == (char *)0x1)
6087 rexp = remove_unecessary_side_effects
6088 (&rxb->rx, se_map, rexp, params);
6089 else if (syntax_parens)
6090 {
6091 int x;
6092 for (x = 0; x < paramc; ++x)
6093 if (( (params[x].se == re_se_lparen)
6094 || (params[x].se == re_se_rparen))
6095 && (!syntax_parens [params[x].op1]))
6096 se_map [x] = 1;
6097 rexp = remove_unecessary_side_effects
6098 (&rxb->rx, se_map, rexp, params);
6099 }
6100 }
6101
6102 /* At least one more optimization would be nice to have here but i ran out
6103 * of time. The idea would be to delay side effects.
6104 * For examle, `(abc)' is the same thing as `abc()' except that the
6105 * left paren is offset by 3 (which we know at compile time).
6106 * (In this comment, write that second pattern `abc(:3:)'
6107 * where `(:3:' is a syntactic unit.)
6108 *
6109 * Trickier: `(abc|defg)' is the same as `(abc(:3:|defg(:4:))'
6110 * (The paren nesting may be hard to follow -- that's an alternation
6111 * of `abc(:3:' and `defg(:4:' inside (purely syntactic) parens
6112 * followed by the closing paren from the original expression.)
6113 *
6114 * Neither the expression tree representation nor the the nfa make
6115 * this very easy to write. :(
6116 */
6117
6118 /* What we compile is different than what the parser returns.
6119 * Suppose the parser returns expression R.
6120 * Let R' be R with unnecessary register assignments removed
6121 * (see REMOVE_UNECESSARY_SIDE_EFFECTS, above).
6122 *
6123 * What we will compile is the expression:
6124 *
6125 * m{try}R{win}\|s{try}R'{win}
6126 *
6127 * {try} and {win} denote side effect epsilons (see EXPLORE_FUTURE).
6128 *
6129 * When trying a match, we insert an `m' at the beginning of the
6130 * string if the user wants registers to be filled, `s' if not.
6131 */
6132 new_rexp =
6133 rx_mk_r_alternate
6134 (&rxb->rx,
6135 rx_mk_r_concat (&rxb->rx, rx_mk_r_cset (&rxb->rx, cs2), rexp),
6136 rx_mk_r_concat (&rxb->rx,
6137 rx_mk_r_cset (&rxb->rx, cs), fewer_side_effects));
6138
6139 if (!(new_rexp && cs && cs2))
6140 return REG_ESPACE;
6141 RX_bitset_enjoin (cs2, '\0'); /* prefixed to the rexp used for matching. */
6142 RX_bitset_enjoin (cs, '\1'); /* prefixed to the rexp used for searching. */
6143 rexp = new_rexp;
6144 }
6145
6146#ifdef RX_DEBUG
6147 if (rx_debug_compile)
6148 {
6149 fputs ("\n...which is compiled as:\n", stdout);
6150 print_rexp (&rxb->rx, rexp, 2, re_seprint, stdout);
6151 }
6152#endif
6153 {
6154 struct rx_nfa_state *start = 0;
6155 struct rx_nfa_state *end = 0;
6156
6157 if (!rx_build_nfa (&rxb->rx, rexp, &start, &end))
6158 return REG_ESPACE; /* */
6159 else
6160 {
6161 void * mem = (void *)rxb->buffer;
6162 mg_u_long size = rxb->allocated;
6163 int start_id;
6164 char * perm_mem;
6165 int iterator_size = paramc * sizeof (params[0]);
6166
6167 end->is_final = 1;
6168 start->is_start = 1;
6169 rx_name_nfa_states (&rxb->rx);
6170 start_id = start->id;
6171#ifdef RX_DEBUG
6172 if (rx_debug_compile)
6173 {
6174 fputs ("...giving the NFA: \n", stdout);
6175 dbug_rxb = rxb;
6176 print_nfa (&rxb->rx, rxb->rx.nfa_states, re_seprint, stdout);
6177 }
6178#endif
6179 if (!rx_eclose_nfa (&rxb->rx))
6180 return REG_ESPACE;
6181 else
6182 {
6183 rx_delete_epsilon_transitions (&rxb->rx);
6184
6185 /* For compatability reasons, we need to shove the
6186 * compiled nfa into one chunk of malloced memory.
6187 */
6188 rxb->rx.reserved = ( sizeof (params[0]) * paramc
6189 + rx_sizeof_bitset (rxb->rx.local_cset_size));
6190#ifdef RX_DEBUG
6191 if (rx_debug_compile)
6192 {
6193 dbug_rxb = rxb;
6194 fputs ("...which cooks down (uncompactified) to: \n", stdout);
6195 print_nfa (&rxb->rx, rxb->rx.nfa_states, re_seprint, stdout);
6196 }
6197#endif
6198 if (!rx_compactify_nfa (&rxb->rx, &mem, &size))
6199 return REG_ESPACE;
6200 rxb->buffer = mem;
6201 rxb->allocated = size;
6202 rxb->rx.buffer = mem;
6203 rxb->rx.allocated = size;
6204 perm_mem = ((char *)rxb->rx.buffer
6205 + rxb->rx.allocated - rxb->rx.reserved);
6206 rxb->se_params = ((struct re_se_params *)perm_mem);
6207 bcopy (params, rxb->se_params, iterator_size);
6208 perm_mem += iterator_size;
6209 rxb->fastset = (rx_Bitset) perm_mem;
6210 rxb->start = rx_id_to_nfa_state (&rxb->rx, start_id);
6211 }
6212 rx_bitset_null (rxb->rx.local_cset_size, rxb->fastset);
6213 rxb->can_match_empty = compute_fastset (rxb, orig_rexp);
6214 rxb->match_regs_on_stack =
6215 registers_on_stack (rxb, orig_rexp, 0, params);
6216 rxb->search_regs_on_stack =
6217 registers_on_stack (rxb, fewer_side_effects, 0, params);
6218 if (rxb->can_match_empty)
6219 rx_bitset_universe (rxb->rx.local_cset_size, rxb->fastset);
6220 rxb->is_anchored = is_anchored (orig_rexp, (rx_side_effect) re_se_hat);
6221 rxb->begbuf_only = is_anchored (orig_rexp,
6222 (rx_side_effect) re_se_begbuf);
6223 }
6224 rx_free_rexp (&rxb->rx, rexp);
6225 if (params)
6226 free (params);
6227#ifdef RX_DEBUG
6228 if (rx_debug_compile)
6229 {
6230 dbug_rxb = rxb;
6231 fputs ("...which cooks down to: \n", stdout);
6232 print_nfa (&rxb->rx, rxb->rx.nfa_states, re_seprint, stdout);
6233 }
6234#endif
6235 }
6236 return REG_NOERROR;
6237}
6238
6239
6240
6241
6242/* This table gives an error message for each of the error codes listed
6243 in regex.h. Obviously the order here has to be same as there. */
6244
6245__const__ char * rx_error_msg[] =
6246{ 0, /* REG_NOERROR */
6247 "No match", /* REG_NOMATCH */
6248 "Invalid regular expression", /* REG_BADPAT */
6249 "Invalid collation character", /* REG_ECOLLATE */
6250 "Invalid character class name", /* REG_ECTYPE */
6251 "Trailing backslash", /* REG_EESCAPE */
6252 "Invalid back reference", /* REG_ESUBREG */
6253 "Unmatched [ or [^", /* REG_EBRACK */
6254 "Unmatched ( or \\(", /* REG_EPAREN */
6255 "Unmatched \\{", /* REG_EBRACE */
6256 "Invalid content of \\{\\}", /* REG_BADBR */
6257 "Invalid range end", /* REG_ERANGE */
6258 "Memory exhausted", /* REG_ESPACE */
6259 "Invalid preceding regular expression", /* REG_BADRPT */
6260 "Premature end of regular expression", /* REG_EEND */
6261 "Regular expression too big", /* REG_ESIZE */
6262 "Unmatched ) or \\)", /* REG_ERPAREN */
6263};
6264
6265
6266
6267
6268
6269char rx_slowmap [256] =
6270{
6271 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6272 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6273 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6274 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6275 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6276 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6277 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6278 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6279 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6280 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6281 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6282 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6283 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6284 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6285 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6286 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6287};
6288
6289#ifdef __STDC__
6290RX_DECL void
6291rx_blow_up_fastmap (struct re_pattern_buffer * rxb)
6292#else
6293RX_DECL void
6294rx_blow_up_fastmap (rxb)
6295 struct re_pattern_buffer * rxb;
6296#endif
6297{
6298 int x;
6299 for (x = 0; x < 256; ++x) /* &&&& 3.6 % */
6300 rxb->fastmap [x] = !!RX_bitset_member (rxb->fastset, x);
6301 rxb->fastmap_accurate = 1;
6302}
6303
6304
6305
6306
6307
6308#if !defined(REGEX_MALLOC) && !defined(__GNUC__)
6309#define RE_SEARCH_2_FN inner_re_search_2
6310#define RE_S2_QUAL static
6311#else
6312#define RE_SEARCH_2_FN re_search_2
6313#define RE_S2_QUAL
6314#endif
6315
6316struct re_search_2_closure
6317{
6318 __const__ char * string1;
6319 int size1;
6320 __const__ char * string2;
6321 int size2;
6322};
6323
6324
6325static __inline__ enum rx_get_burst_return
6326re_search_2_get_burst (pos, vclosure, stop)
6327 struct rx_string_position * pos;
6328 void * vclosure;
6329 int stop;
6330{
6331 struct re_search_2_closure * closure;
6332 closure = (struct re_search_2_closure *)vclosure;
6333 if (!closure->string2)
6334 {
6335 int inset;
6336
6337 inset = pos->pos - pos->string;
6338 if ((inset < -1) || (inset > closure->size1))
6339 return rx_get_burst_no_more;
6340 else
6341 {
6342 pos->pos = (__const__ unsigned char *) closure->string1 + inset;
6343 pos->string = (__const__ unsigned char *) closure->string1;
6344 pos->size = closure->size1;
6345 pos->end = ((__const__ unsigned char *)
6346 MIN(closure->string1 + closure->size1,
6347 closure->string1 + stop));
6348 pos->offset = 0;
6349 return ((pos->pos < pos->end)
6350 ? rx_get_burst_ok
6351 : rx_get_burst_no_more);
6352 }
6353 }
6354 else if (!closure->string1)
6355 {
6356 int inset;
6357
6358 inset = pos->pos - pos->string;
6359 pos->pos = (__const__ unsigned char *) closure->string2 + inset;
6360 pos->string = (__const__ unsigned char *) closure->string2;
6361 pos->size = closure->size2;
6362 pos->end = ((__const__ unsigned char *)
6363 MIN(closure->string2 + closure->size2,
6364 closure->string2 + stop));
6365 pos->offset = 0;
6366 return ((pos->pos < pos->end)
6367 ? rx_get_burst_ok
6368 : rx_get_burst_no_more);
6369 }
6370 else
6371 {
6372 int inset;
6373
6374 inset = pos->pos - pos->string + pos->offset;
6375 if (inset < closure->size1)
6376 {
6377 pos->pos = (__const__ unsigned char *) closure->string1 + inset;
6378 pos->string = (__const__ unsigned char *) closure->string1;
6379 pos->size = closure->size1;
6380 pos->end = ((__const__ unsigned char *)
6381 MIN(closure->string1 + closure->size1,
6382 closure->string1 + stop));
6383 pos->offset = 0;
6384 return rx_get_burst_ok;
6385 }
6386 else
6387 {
6388 pos->pos = ((__const__ unsigned char *)
6389 closure->string2 + inset - closure->size1);
6390 pos->string = (__const__ unsigned char *) closure->string2;
6391 pos->size = closure->size2;
6392 pos->end = ((__const__ unsigned char *)
6393 MIN(closure->string2 + closure->size2,
6394 closure->string2 + stop - closure->size1));
6395 pos->offset = closure->size1;
6396 return ((pos->pos < pos->end)
6397 ? rx_get_burst_ok
6398 : rx_get_burst_no_more);
6399 }
6400 }
6401}
6402
6403
6404static __inline__ enum rx_back_check_return
6405re_search_2_back_check (pos, lparen, rparen, translate, vclosure, stop)
6406 struct rx_string_position * pos;
6407 int lparen;
6408 int rparen;
6409 unsigned char * translate;
6410 void * vclosure;
6411 int stop;
6412{
6413 struct rx_string_position there;
6414 struct rx_string_position past;
6415
6416 there = *pos;
6417 there.pos = there.string + lparen - there.offset;
6418 re_search_2_get_burst (&there, vclosure, stop);
6419
6420 past = *pos;
6421 past.pos = past.string + rparen - there.offset;
6422 re_search_2_get_burst (&past, vclosure, stop);
6423
6424 ++pos->pos;
6425 re_search_2_get_burst (pos, vclosure, stop);
6426
6427 while ( (there.pos != past.pos)
6428 && (pos->pos != pos->end))
6429 if (TRANSLATE(*there.pos) != TRANSLATE(*pos->pos))
6430 return rx_back_check_fail;
6431 else
6432 {
6433 ++there.pos;
6434 ++pos->pos;
6435 if (there.pos == there.end)
6436 re_search_2_get_burst (&there, vclosure, stop);
6437 if (pos->pos == pos->end)
6438 re_search_2_get_burst (pos, vclosure, stop);
6439 }
6440
6441 if (there.pos != past.pos)
6442 return rx_back_check_fail;
6443 --pos->pos;
6444 re_search_2_get_burst (pos, vclosure, stop);
6445 return rx_back_check_pass;
6446}
6447
6448static __inline__ int
6449re_search_2_fetch_char (pos, offset, app_closure, stop)
6450 struct rx_string_position * pos;
6451 int offset;
6452 void * app_closure;
6453 int stop;
6454{
6455 struct re_search_2_closure * closure;
6456 closure = (struct re_search_2_closure *)app_closure;
6457 if (offset == 0)
6458 {
6459 if (pos->pos >= pos->string)
6460 return *pos->pos;
6461 else
6462 {
6463 if ( (pos->string == (__const__ unsigned char *) closure->string2)
6464 && (closure->string1)
6465 && (closure->size1))
6466 return closure->string1[closure->size1 - 1];
6467 else
6468 return 0; /* sure, why not. */
6469 }
6470 }
6471 if (pos->pos == pos->end)
6472 return *closure->string2;
6473 else
6474 return pos->pos[1];
6475}
6476
6477
6478#ifdef __STDC__
6479RE_S2_QUAL int
6480RE_SEARCH_2_FN (struct re_pattern_buffer *rxb,
6481 __const__ char * string1, int size1,
6482 __const__ char * string2, int size2,
6483 int startpos, int range,
6484 struct re_registers *regs,
6485 int stop)
6486#else
6487RE_S2_QUAL int
6488RE_SEARCH_2_FN (rxb,
6489 string1, size1, string2, size2, startpos, range, regs, stop)
6490 struct re_pattern_buffer *rxb;
6491 __const__ char * string1;
6492 int size1;
6493 __const__ char * string2;
6494 int size2;
6495 int startpos;
6496 int range;
6497 struct re_registers *regs;
6498 int stop;
6499#endif
6500{
6501 int answer;
6502 struct re_search_2_closure closure;
6503 closure.string1 = string1;
6504 closure.size1 = size1;
6505 closure.string2 = string2;
6506 closure.size2 = size2;
6507 answer = rx_search (rxb, startpos, range, stop, size1 + size2,
6508 re_search_2_get_burst,
6509 re_search_2_back_check,
6510 re_search_2_fetch_char,
6511 (void *)&closure,
6512 regs,
6513 0,
6514 0);
6515 switch (answer)
6516 {
6517 case rx_search_continuation:
6518 abort ();
6519 case rx_search_error:
6520 return -2;
6521 case rx_search_soft_fail:
6522 case rx_search_fail:
6523 return -1;
6524 default:
6525 return answer;
6526 }
6527}
6528
6529/* Export rx_search to callers outside this file. */
6530
6531int
6532re_rx_search (rxb, startpos, range, stop, total_size,
6533 get_burst, back_check, fetch_char,
6534 app_closure, regs, resume_state, save_state)
6535 struct re_pattern_buffer * rxb;
6536 int startpos;
6537 int range;
6538 int stop;
6539 int total_size;
6540 rx_get_burst_fn get_burst;
6541 rx_back_check_fn back_check;
6542 rx_fetch_char_fn fetch_char;
6543 void * app_closure;
6544 struct re_registers * regs;
6545 struct rx_search_state * resume_state;
6546 struct rx_search_state * save_state;
6547{
6548 return rx_search (rxb, startpos, range, stop, total_size,
6549 get_burst, back_check, fetch_char, app_closure,
6550 regs, resume_state, save_state);
6551}
6552
6553#if !defined(REGEX_MALLOC) && !defined(__GNUC__)
6554#ifdef __STDC__
6555int
6556re_search_2 (struct re_pattern_buffer *rxb,
6557 __const__ char * string1, int size1,
6558 __const__ char * string2, int size2,
6559 int startpos, int range,
6560 struct re_registers *regs,
6561 int stop)
6562#else
6563int
6564re_search_2 (rxb, string1, size1, string2, size2, startpos, range, regs, stop)
6565 struct re_pattern_buffer *rxb;
6566 __const__ char * string1;
6567 int size1;
6568 __const__ char * string2;
6569 int size2;
6570 int startpos;
6571 int range;
6572 struct re_registers *regs;
6573 int stop;
6574#endif
6575{
6576 int ret;
6577 ret = inner_re_search_2 (rxb, string1, size1, string2, size2, startpos,
6578 range, regs, stop);
6579#ifdef __WIN32__ /* [RPAP - Feb 97: WIN32 Port] */
6580 _alloca (0);
6581#else
6582 alloca (0);
6583#endif
6584 return ret;
6585}
6586#endif
6587
6588
6589/* Like re_search_2, above, but only one string is specified, and
6590 * doesn't let you say where to stop matching.
6591 */
6592
6593#ifdef __STDC__
6594int
6595re_search (struct re_pattern_buffer * rxb, __const__ char *string,
6596 int size, int startpos, int range,
6597 struct re_registers *regs)
6598#else
6599int
6600re_search (rxb, string, size, startpos, range, regs)
6601 struct re_pattern_buffer * rxb;
6602 __const__ char * string;
6603 int size;
6604 int startpos;
6605 int range;
6606 struct re_registers *regs;
6607#endif
6608{
6609 return re_search_2 (rxb, 0, 0, string, size, startpos, range, regs, size);
6610}
6611
6612#ifdef __STDC__
6613int
6614re_match_2 (struct re_pattern_buffer * rxb,
6615 __const__ char * string1, int size1,
6616 __const__ char * string2, int size2,
6617 int pos, struct re_registers *regs, int stop)
6618#else
6619int
6620re_match_2 (rxb, string1, size1, string2, size2, pos, regs, stop)
6621 struct re_pattern_buffer * rxb;
6622 __const__ char * string1;
6623 int size1;
6624 __const__ char * string2;
6625 int size2;
6626 int pos;
6627 struct re_registers *regs;
6628 int stop;
6629#endif
6630{
6631 struct re_registers some_regs;
6632 regoff_t start;
6633 regoff_t end;
6634 int srch;
6635 int save = rxb->regs_allocated;
6636 struct re_registers * regs_to_pass = regs;
6637
6638 if (!regs)
6639 {
6640 some_regs.start = &start;
6641 some_regs.end = &end;
6642 some_regs.num_regs = 1;
6643 regs_to_pass = &some_regs;
6644 rxb->regs_allocated = REGS_FIXED;
6645 }
6646
6647 srch = re_search_2 (rxb, string1, size1, string2, size2,
6648 pos, 1, regs_to_pass, stop);
6649 if (regs_to_pass != regs)
6650 rxb->regs_allocated = save;
6651 if (srch < 0)
6652 return srch;
6653 return regs_to_pass->end[0] - regs_to_pass->start[0];
6654}
6655
6656/* re_match is like re_match_2 except it takes only a single string. */
6657
6658#ifdef __STDC__
6659int
6660re_match (struct re_pattern_buffer * rxb,
6661 __const__ char * string,
6662 int size, int pos,
6663 struct re_registers *regs)
6664#else
6665int
6666re_match (rxb, string, size, pos, regs)
6667 struct re_pattern_buffer * rxb;
6668 __const__ char *string;
6669 int size;
6670 int pos;
6671 struct re_registers *regs;
6672#endif
6673{
6674 return re_match_2 (rxb, string, size, 0, 0, pos, regs, size);
6675}
6676
6677
6678
6679
6680/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
6681 also be assigned to arbitrarily: each pattern buffer stores its own
6682 syntax, so it can be changed between regex compilations. */
6683reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
6684
6685
6686/* Specify the precise syntax of regexps for compilation. This provides
6687 for compatibility for various utilities which historically have
6688 different, incompatible syntaxes.
6689
6690 The argument SYNTAX is a bit mask comprised of the various bits
6691 defined in regex.h. We return the old syntax. */
6692
6693#ifdef __STDC__
6694reg_syntax_t
6695re_set_syntax (reg_syntax_t syntax)
6696#else
6697reg_syntax_t
6698re_set_syntax (syntax)
6699 reg_syntax_t syntax;
6700#endif
6701{
6702 reg_syntax_t ret = re_syntax_options;
6703
6704 re_syntax_options = syntax;
6705 return ret;
6706}
6707
6708
6709
6710/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
6711 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
6712 this memory for recording register information. STARTS and ENDS
6713 must be allocated using the malloc library routine, and must each
6714 be at least NUM_REGS * sizeof (regoff_t) bytes long.
6715
6716 If NUM_REGS == 0, then subsequent matches should allocate their own
6717 register data.
6718
6719 Unless this function is called, the first search or match using
6720 PATTERN_BUFFER will allocate its own register data, without
6721 freeing the old data. */
6722
6723#ifdef __STDC__
6724void
6725re_set_registers (struct re_pattern_buffer *bufp,
6726 struct re_registers *regs,
6727 unsigned num_regs,
6728 regoff_t * starts, regoff_t * ends)
6729#else
6730void
6731re_set_registers (bufp, regs, num_regs, starts, ends)
6732 struct re_pattern_buffer *bufp;
6733 struct re_registers *regs;
6734 unsigned num_regs;
6735 regoff_t * starts;
6736 regoff_t * ends;
6737#endif
6738{
6739 if (num_regs)
6740 {
6741 bufp->regs_allocated = REGS_REALLOCATE;
6742 regs->num_regs = num_regs;
6743 regs->start = starts;
6744 regs->end = ends;
6745 }
6746 else
6747 {
6748 bufp->regs_allocated = REGS_UNALLOCATED;
6749 regs->num_regs = 0;
6750 regs->start = regs->end = (regoff_t) 0;
6751 }
6752}
6753
6754
6755
6756
6757
6758#ifdef __STDC__
6759static int
6760cplx_se_sublist_len (struct rx_se_list * list)
6761#else
6762static int
6763cplx_se_sublist_len (list)
6764 struct rx_se_list * list;
6765#endif
6766{
6767 int x = 0;
6768 while (list)
6769 {
6770 if ((intptr_t)list->car >= 0)
6771 ++x;
6772 list = list->cdr;
6773 }
6774 return x;
6775}
6776
6777
6778/* For rx->se_list_cmp */
6779
6780#ifdef __STDC__
6781static int
6782posix_se_list_order (struct rx * rx,
6783 struct rx_se_list * a, struct rx_se_list * b)
6784#else
6785static int
6786posix_se_list_order (rx, a, b)
6787 struct rx * rx;
6788 struct rx_se_list * a;
6789 struct rx_se_list * b;
6790#endif
6791{
6792 int al = cplx_se_sublist_len (a);
6793 int bl = cplx_se_sublist_len (b);
6794
6795 if (!al && !bl)
6796 return ((a == b)
6797 ? 0
6798 : ((a < b) ? -1 : 1));
6799
6800 else if (!al)
6801 return -1;
6802
6803 else if (!bl)
6804 return 1;
6805
6806 else
6807 {
6808#ifdef __WIN32__ /* [RPAP - Feb 97: WIN32 Port] */
6809 rx_side_effect * av = ((rx_side_effect *)
6810 _alloca (sizeof (rx_side_effect) * (al + 1)));
6811 rx_side_effect * bv = ((rx_side_effect *)
6812 _alloca (sizeof (rx_side_effect) * (bl + 1)));
6813#else
6814 rx_side_effect * av = ((rx_side_effect *)
6815 alloca (sizeof (rx_side_effect) * (al + 1)));
6816 rx_side_effect * bv = ((rx_side_effect *)
6817 alloca (sizeof (rx_side_effect) * (bl + 1)));
6818#endif
6819 struct rx_se_list * ap = a;
6820 struct rx_se_list * bp = b;
6821 int ai, bi;
6822
6823 for (ai = al - 1; ai >= 0; --ai)
6824 {
6825 while ((intptr_t)ap->car < 0)
6826 ap = ap->cdr;
6827 av[ai] = ap->car;
6828 ap = ap->cdr;
6829 }
6830 av[al] = (rx_side_effect)-2;
6831 for (bi = bl - 1; bi >= 0; --bi)
6832 {
6833 while ((intptr_t)bp->car < 0)
6834 bp = bp->cdr;
6835 bv[bi] = bp->car;
6836 bp = bp->cdr;
6837 }
6838 bv[bl] = (rx_side_effect)-1;
6839
6840 {
6841 int ret;
6842 int x = 0;
6843 while (av[x] == bv[x])
6844 ++x;
6845 ret = (((unsigned *)(av[x]) < (unsigned *)(bv[x])) ? -1 : 1);
6846 return ret;
6847 }
6848 }
6849}
6850
6851
6852
6853
6854
6855/* re_compile_pattern is the GNU regular expression compiler: it
6856 compiles PATTERN (of length SIZE) and puts the result in RXB.
6857 Returns 0 if the pattern was valid, otherwise an error string.
6858
6859 Assumes the `allocated' (and perhaps `buffer') and `translate' fields
6860 are set in RXB on entry.
6861
6862 We call rx_compile to do the actual compilation. */
6863
6864#ifdef __STDC__
6865__const__ char *
6866re_compile_pattern (__const__ char *pattern,
6867 int length,
6868 struct re_pattern_buffer * rxb)
6869#else
6870__const__ char *
6871re_compile_pattern (pattern, length, rxb)
6872 __const__ char *pattern;
6873 int length;
6874 struct re_pattern_buffer * rxb;
6875#endif
6876{
6877 reg_errcode_t ret;
6878
6879 /* GNU code is written to assume at least RE_NREGS registers will be set
6880 (and at least one extra will be -1). */
6881 rxb->regs_allocated = REGS_UNALLOCATED;
6882
6883 /* And GNU code determines whether or not to get register information
6884 by passing null for the REGS argument to re_match, etc., not by
6885 setting no_sub. */
6886 rxb->no_sub = 0;
6887
6888 rxb->rx.local_cset_size = 256;
6889
6890 /* Match anchors at newline. */
6891 rxb->newline_anchor = 1;
6892
6893 rxb->re_nsub = 0;
6894 rxb->start = 0;
6895 rxb->se_params = 0;
6896 rxb->rx.nodec = 0;
6897 rxb->rx.epsnodec = 0;
6898 rxb->rx.instruction_table = 0;
6899 rxb->rx.nfa_states = 0;
6900 rxb->rx.se_list_cmp = posix_se_list_order;
6901 rxb->rx.start_set = 0;
6902
6903 ret = rx_compile (pattern, length, re_syntax_options, rxb);
6904#ifdef __WIN32__ /* [RPAP - Feb 97: WIN32 Port] */
6905 _alloca (0);
6906#else
6907 alloca (0);
6908#endif
6909 return rx_error_msg[(int) ret];
6910}
6911
6912
6913
6914#ifdef __STDC__
6915int
6916re_compile_fastmap (struct re_pattern_buffer * rxb)
6917#else
6918int
6919re_compile_fastmap (rxb)
6920 struct re_pattern_buffer * rxb;
6921#endif
6922{
6923 rx_blow_up_fastmap (rxb);
6924 return 0;
6925}
6926
6927
6928
6929
6930
6931/* Entry points compatible with 4.2 BSD regex library. We don't define
6932 them if this is an Emacs or POSIX compilation. */
6933
6934#if (!defined (emacs) && !defined (_POSIX_SOURCE)) || defined(USE_BSD_REGEX)
6935
6936/* BSD has one and only one pattern buffer. */
6937static struct re_pattern_buffer rx_comp_buf;
6938
6939#ifdef __STDC__
6940char *
6941re_comp (__const__ char *s)
6942#else
6943char *
6944re_comp (s)
6945 __const__ char *s;
6946#endif
6947{
6948 reg_errcode_t ret;
6949
6950 if (!s || (*s == '\0'))
6951 {
6952 if (!rx_comp_buf.buffer)
6953 return "No previous regular expression";
6954 return 0;
6955 }
6956
6957 if (!rx_comp_buf.fastmap)
6958 {
6959 rx_comp_buf.fastmap = (char *) malloc (1 << CHARBITS);
6960 if (!rx_comp_buf.fastmap)
6961 return "Memory exhausted";
6962 }
6963
6964 /* Since `rx_exec' always passes NULL for the `regs' argument, we
6965 don't need to initialize the pattern buffer fields which affect it. */
6966
6967 /* Match anchors at newlines. */
6968 rx_comp_buf.newline_anchor = 1;
6969
6970 rx_comp_buf.re_nsub = 0;
6971 rx_comp_buf.start = 0;
6972 rx_comp_buf.se_params = 0;
6973 rx_comp_buf.rx.nodec = 0;
6974 rx_comp_buf.rx.epsnodec = 0;
6975 rx_comp_buf.rx.instruction_table = 0;
6976 rx_comp_buf.rx.nfa_states = 0;
6977 rx_comp_buf.rx.start = 0;
6978 rx_comp_buf.rx.se_list_cmp = posix_se_list_order;
6979 rx_comp_buf.rx.local_cset_size = 256;
6980
6981 ret = rx_compile (s, strlen (s), re_syntax_options, &rx_comp_buf);
6982#ifdef __WIN32__ /* [RPAP - Feb 97: WIN32 Port] */
6983 _alloca (0);
6984#else
6985 alloca (0);
6986#endif
6987
6988 /* Yes, we're discarding `__const__' here. */
6989 return (char *) rx_error_msg[(int) ret];
6990}
6991
6992
6993#ifdef __STDC__
6994int
6995re_exec (__const__ char *s)
6996#else
6997int
6998re_exec (s)
6999 __const__ char *s;
7000#endif
7001{
7002 __const__ int len = strlen (s);
7003 return
7004 0 <= re_search (&rx_comp_buf, s, len, 0, len, (struct re_registers *) 0);
7005}
7006#endif /* not emacs and not _POSIX_SOURCE */
7007
7008
7009
7010
7011/* POSIX.2 functions. Don't define these for Emacs. */
7012
7013#if !defined(emacs)
7014
7015/* regcomp takes a regular expression as a string and compiles it.
7016
7017 PREG is a regex_t *. We do not expect any fields to be initialized,
7018 since POSIX says we shouldn't. Thus, we set
7019
7020 `buffer' to the compiled pattern;
7021 `used' to the length of the compiled pattern;
7022 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
7023 REG_EXTENDED bit in CFLAGS is set; otherwise, to
7024 RE_SYNTAX_POSIX_BASIC;
7025 `newline_anchor' to REG_NEWLINE being set in CFLAGS;
7026 `fastmap' and `fastmap_accurate' to zero;
7027 `re_nsub' to the number of subexpressions in PATTERN.
7028
7029 PATTERN is the address of the pattern string.
7030
7031 CFLAGS is a series of bits which affect compilation.
7032
7033 If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
7034 use POSIX basic syntax.
7035
7036 If REG_NEWLINE is set, then . and [^...] don't match newline.
7037 Also, regexec will try a match beginning after every newline.
7038
7039 If REG_ICASE is set, then we considers upper- and lowercase
7040 versions of letters to be equivalent when matching.
7041
7042 If REG_NOSUB is set, then when PREG is passed to regexec, that
7043 routine will report only success or failure, and nothing about the
7044 registers.
7045
7046 It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
7047 the return codes and their meanings.) */
7048
7049
7050#ifdef __STDC__
7051int
7052regcomp (regex_t * preg, __const__ char * pattern, int cflags)
7053#else
7054int
7055regcomp (preg, pattern, cflags)
7056 regex_t * preg;
7057 __const__ char * pattern;
7058 int cflags;
7059#endif
7060{
7061 reg_errcode_t ret;
7062 unsigned syntax
7063 = cflags & REG_EXTENDED ? RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
7064
7065 /* regex_compile will allocate the space for the compiled pattern. */
7066 preg->buffer = 0;
7067 preg->allocated = 0;
7068 preg->fastmap = malloc (256);
7069 if (!preg->fastmap)
7070 return REG_ESPACE;
7071 preg->fastmap_accurate = 0;
7072
7073 if (cflags & REG_ICASE)
7074 {
7075 unsigned i;
7076
7077 preg->translate = (unsigned char *) malloc (256);
7078 if (!preg->translate)
7079 return (int) REG_ESPACE;
7080
7081 /* Map uppercase characters to corresponding lowercase ones. */
7082 for (i = 0; i < CHAR_SET_SIZE; i++)
7083 preg->translate[i] = isupper (i) ? tolower (i) : i;
7084 }
7085 else
7086 preg->translate = 0;
7087
7088 /* If REG_NEWLINE is set, newlines are treated differently. */
7089 if (cflags & REG_NEWLINE)
7090 { /* REG_NEWLINE implies neither . nor [^...] match newline. */
7091 syntax &= ~RE_DOT_NEWLINE;
7092 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
7093 /* It also changes the matching behavior. */
7094 preg->newline_anchor = 1;
7095 }
7096 else
7097 preg->newline_anchor = 0;
7098
7099 preg->no_sub = !!(cflags & REG_NOSUB);
7100
7101 /* POSIX says a null character in the pattern terminates it, so we
7102 can use strlen here in compiling the pattern. */
7103 preg->re_nsub = 0;
7104 preg->start = 0;
7105 preg->se_params = 0;
7106 preg->syntax_parens = 0;
7107 preg->rx.nodec = 0;
7108 preg->rx.epsnodec = 0;
7109 preg->rx.instruction_table = 0;
7110 preg->rx.nfa_states = 0;
7111 preg->rx.local_cset_size = 256;
7112 preg->rx.start = 0;
7113 preg->rx.se_list_cmp = posix_se_list_order;
7114 preg->rx.start_set = 0;
7115 ret = rx_compile (pattern, strlen (pattern), syntax, preg);
7116
7117#ifdef __WIN32__ /* [RPAP - Feb 97: WIN32 Port] */
7118 _alloca (0);
7119#else
7120 alloca (0);
7121#endif
7122
7123 /* POSIX doesn't distinguish between an unmatched open-group and an
7124 unmatched close-group: both are REG_EPAREN. */
7125 if (ret == REG_ERPAREN) ret = REG_EPAREN;
7126
7127 return (int) ret;
7128}
7129
7130
7131/* regexec searches for a given pattern, specified by PREG, in the
7132 string STRING.
7133
7134 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
7135 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
7136 least NMATCH elements, and we set them to the offsets of the
7137 corresponding matched substrings.
7138
7139 EFLAGS specifies `execution flags' which affect matching: if
7140 REG_NOTBOL is set, then ^ does not match at the beginning of the
7141 string; if REG_NOTEOL is set, then $ does not match at the end.
7142
7143 We return 0 if we find a match and REG_NOMATCH if not. */
7144
7145#ifdef __STDC__
7146int
7147regexec (__const__ regex_t *preg, __const__ char *string,
7148 size_t nmatch, regmatch_t pmatch[],
7149 int eflags)
7150#else
7151int
7152regexec (preg, string, nmatch, pmatch, eflags)
7153 __const__ regex_t *preg;
7154 __const__ char *string;
7155 size_t nmatch;
7156 regmatch_t pmatch[];
7157 int eflags;
7158#endif
7159{
7160 int ret;
7161 struct re_registers regs;
7162 regex_t private_preg;
7163 int len = strlen (string);
7164 boolean want_reg_info = !preg->no_sub && nmatch > 0;
7165
7166 private_preg = *preg;
7167
7168 private_preg.not_bol = !!(eflags & REG_NOTBOL);
7169 private_preg.not_eol = !!(eflags & REG_NOTEOL);
7170
7171 /* The user has told us exactly how many registers to return
7172 * information about, via `nmatch'. We have to pass that on to the
7173 * matching routines.
7174 */
7175 private_preg.regs_allocated = REGS_FIXED;
7176
7177 if (want_reg_info)
7178 {
7179 regs.num_regs = nmatch;
7180 regs.start = (( regoff_t *) malloc ((nmatch) * sizeof ( regoff_t)));
7181 regs.end = (( regoff_t *) malloc ((nmatch) * sizeof ( regoff_t)));
7182 if (regs.start == 0 || regs.end == 0)
7183 return (int) REG_NOMATCH;
7184 }
7185
7186 /* Perform the searching operation. */
7187 ret = re_search (&private_preg,
7188 string, len,
7189 /* start: */ 0,
7190 /* range: */ len,
7191 want_reg_info ? &regs : (struct re_registers *) 0);
7192
7193 /* Copy the register information to the POSIX structure. */
7194 if (want_reg_info)
7195 {
7196 if (ret >= 0)
7197 {
7198 unsigned r;
7199
7200 for (r = 0; r < nmatch; r++)
7201 {
7202 pmatch[r].rm_so = regs.start[r];
7203 pmatch[r].rm_eo = regs.end[r];
7204 }
7205 }
7206
7207 /* If we needed the temporary register info, free the space now. */
7208 free (regs.start);
7209 free (regs.end);
7210 }
7211
7212 /* We want zero return to mean success, unlike `re_search'. */
7213 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
7214}
7215
7216
7217/* Returns a message corresponding to an error code, ERRCODE, returned
7218 from either regcomp or regexec. */
7219
7220#ifdef __STDC__
7221size_t
7222regerror (int rx_errcode, __const__ regex_t *preg,
7223 char *errbuf, size_t errbuf_size)
7224#else
7225size_t
7226regerror (rx_errcode, preg, errbuf, errbuf_size)
7227 int rx_errcode;
7228 __const__ regex_t *preg;
7229 char *errbuf;
7230 size_t errbuf_size;
7231#endif
7232{
7233 __const__ char *msg
7234 = rx_error_msg[rx_errcode] == 0 ? "Success" : rx_error_msg[rx_errcode];
7235 size_t msg_size = strlen (msg) + 1; /* Includes the 0. */
7236
7237 if (errbuf_size != 0)
7238 {
7239 if (msg_size > errbuf_size)
7240 {
7241 strncpy (errbuf, msg, errbuf_size - 1);
7242 errbuf[errbuf_size - 1] = 0;
7243 }
7244 else
7245 strcpy (errbuf, msg);
7246 }
7247
7248 return msg_size;
7249}
7250
7251
7252/* Free dynamically allocated space used by PREG. */
7253
7254#ifdef __STDC__
7255void
7256regfree (regex_t *preg)
7257#else
7258void
7259regfree (preg)
7260 regex_t *preg;
7261#endif
7262{
7263 if (preg->buffer != 0)
7264 free (preg->buffer);
7265 preg->buffer = 0;
7266 preg->allocated = 0;
7267
7268 if (preg->fastmap != 0)
7269 free (preg->fastmap);
7270 preg->fastmap = 0;
7271 preg->fastmap_accurate = 0;
7272
7273 if (preg->translate != 0)
7274 free (preg->translate);
7275 preg->translate = 0;
7276}
7277
7278#endif /* not emacs */
7279
7280
7281
7282
7283
Note: See TracBrowser for help on using the repository browser.