source: trunk/gsdl/perllib/Kea-1.1.4/Iterated-Lovins-stemmer/stem-Lovins-iterated.c@ 3161

Last change on this file since 3161 was 1972, checked in by jmt14, 23 years ago

* empty log message *

  • Property svn:keywords set to Author Date Id Revision
File size: 2.5 KB
Line 
1/**************************************************************************
2 *
3 * stem-Lovins-iterated.c
4 * Stem STDIN text with the iterated Lovins stemmer.
5 * Copyright 1997 by Gordon Paynter ([email protected])
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 **************************************************************************/
22
23#include <assert.h>
24#include <string.h>
25#include <stdio.h>
26#include "stem.h"
27
28void iterated_stem( char *lword ) {
29 char length = lword[0];
30 char last = lword[length];
31 stem(lword);
32 while ((length > lword[0]) || (last != lword[length])) {
33 length = lword[0];
34 last = lword[length];
35 stem(lword);
36 }
37}
38
39void main()
40{
41 /* the stem algorithm takes a string with
42 the length in the first byte as input.
43 this is the lword variable. word is a
44 pointer to the start of the string in that
45 variable--the second character--for use in
46 satring operations. */
47
48 char lword[1000];
49 char *word = &lword[1];
50
51 int length = 0;
52 int c;
53 int going = 1;
54 int case_offset = (int) 'A' - 'a';
55
56 while (going) {
57
58 c = getchar();
59 if (((char) c >= 'A') && ((char) c <= 'Z'))
60 c -= case_offset;
61
62 if (c == EOF) {
63 // we've finished
64 going = 0;
65
66 } else if (((char) c >= 'a') && ((char) c <= 'z')) {
67 // add the new character to the word
68 word[length++] = (char) c;
69 assert(length < 1000);
70
71 } else if (length > 255) {
72 // the word is too long for the stemmer to handle, output it
73 word[length] = (char) 0;
74 printf("%s%c", word, c);
75 length = 0;
76
77 } else if (length > 0) {
78 // the word is the correct length to be stemmed
79 lword[0] = (char) length;
80 iterated_stem( lword );
81 word[lword[0]] = (char) 0;
82 printf("%s%c", word, c);
83 length = 0;
84
85 } else {
86 // there is no word to be stemmed
87 printf("%c", c);
88 }
89 }
90}
Note: See TracBrowser for help on using the repository browser.