1 | /**************************************************************************
|
---|
2 | *
|
---|
3 | * stem-Lovins-iterated.c
|
---|
4 | * Stem STDIN text with the iterated Lovins stemmer.
|
---|
5 | * Copyright 1997 by Gordon Paynter ([email protected])
|
---|
6 | *
|
---|
7 | * This program is free software; you can redistribute it and/or modify
|
---|
8 | * it under the terms of the GNU General Public License as published by
|
---|
9 | * the Free Software Foundation; either version 2 of the License, or
|
---|
10 | * (at your option) any later version.
|
---|
11 | *
|
---|
12 | * This program is distributed in the hope that it will be useful,
|
---|
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
15 | * GNU General Public License for more details.
|
---|
16 | *
|
---|
17 | * You should have received a copy of the GNU General Public License
|
---|
18 | * along with this program; if not, write to the Free Software
|
---|
19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
20 | *
|
---|
21 | **************************************************************************/
|
---|
22 |
|
---|
23 | #include <assert.h>
|
---|
24 | #include <string.h>
|
---|
25 | #include <stdio.h>
|
---|
26 | #include "stem.h"
|
---|
27 |
|
---|
28 | void iterated_stem( char *lword ) {
|
---|
29 | char length = lword[0];
|
---|
30 | char last = lword[length];
|
---|
31 | stem(lword);
|
---|
32 | while ((length > lword[0]) || (last != lword[length])) {
|
---|
33 | length = lword[0];
|
---|
34 | last = lword[length];
|
---|
35 | stem(lword);
|
---|
36 | }
|
---|
37 | }
|
---|
38 |
|
---|
39 | void main()
|
---|
40 | {
|
---|
41 | /* the stem algorithm takes a string with
|
---|
42 | the length in the first byte as input.
|
---|
43 | this is the lword variable. word is a
|
---|
44 | pointer to the start of the string in that
|
---|
45 | variable--the second character--for use in
|
---|
46 | satring operations. */
|
---|
47 |
|
---|
48 | char lword[1000];
|
---|
49 | char *word = &lword[1];
|
---|
50 |
|
---|
51 | int length = 0;
|
---|
52 | int c;
|
---|
53 | int going = 1;
|
---|
54 | int case_offset = (int) 'A' - 'a';
|
---|
55 |
|
---|
56 | while (going) {
|
---|
57 |
|
---|
58 | c = getchar();
|
---|
59 | if (((char) c >= 'A') && ((char) c <= 'Z'))
|
---|
60 | c -= case_offset;
|
---|
61 |
|
---|
62 | if (c == EOF) {
|
---|
63 | // we've finished
|
---|
64 | going = 0;
|
---|
65 |
|
---|
66 | } else if (((char) c >= 'a') && ((char) c <= 'z')) {
|
---|
67 | // add the new character to the word
|
---|
68 | word[length++] = (char) c;
|
---|
69 | assert(length < 1000);
|
---|
70 |
|
---|
71 | } else if (length > 255) {
|
---|
72 | // the word is too long for the stemmer to handle, output it
|
---|
73 | word[length] = (char) 0;
|
---|
74 | printf("%s%c", word, c);
|
---|
75 | length = 0;
|
---|
76 |
|
---|
77 | } else if (length > 0) {
|
---|
78 | // the word is the correct length to be stemmed
|
---|
79 | lword[0] = (char) length;
|
---|
80 | iterated_stem( lword );
|
---|
81 | word[lword[0]] = (char) 0;
|
---|
82 | printf("%s%c", word, c);
|
---|
83 | length = 0;
|
---|
84 |
|
---|
85 | } else {
|
---|
86 | // there is no word to be stemmed
|
---|
87 | printf("%c", c);
|
---|
88 | }
|
---|
89 | }
|
---|
90 | }
|
---|