source: other-projects/trunk/protemix/src/recpt/equivterms.cpp@ 14162

Last change on this file since 14162 was 3182, checked in by sjboddie, 22 years ago

* empty log message *

  • Property svn:keywords set to Author Date Id Revision
File size: 4.2 KB
Line 
1
2#include "text_t.h"
3#include <ctype.h> // for isspace
4#include <vector>
5
6void equiv_lookup(text_t& terms) {
7 // eventually we'll get these terms out of a config file or something...
8 vector<text_t> group;
9 vector< vector<text_t> > all_groups;
10
11 group.push_back("triene");
12 group.push_back("trientine");
13 group.push_back("trien");
14 group.push_back("\"triethylenetetramine dihydrochloride\"");
15 group.push_back("\"triethylene tetramine dihydrochloride\"");
16 group.push_back("syprine");
17 group.push_back("teta");
18 group.push_back(text_t("\"trien 2hcl\""));
19 group.push_back(text_t("\"triethylenetetramine 2hcl\""));
20 group.push_back(text_t("\"bis aminoethyl ethanediamine dihydrochloride\""));
21 all_groups.push_back(group);
22
23
24 group.clear();
25 group.push_back("pkpd");
26 group.push_back("pkin");
27 group.push_back("pharmocokinetic");
28 group.push_back("pharmacodynamic");
29 all_groups.push_back(group);
30
31
32 group.clear();
33 group.push_back("safety");
34 group.push_back("\"side effects\"");
35 group.push_back("toxicity");
36 group.push_back("toxicology");
37 all_groups.push_back(group);
38
39
40 group.clear();
41 group.push_back("sod");
42 group.push_back("\"superoxide dismutase\"");
43 group.push_back("\"super oxide dismutase\"");
44 all_groups.push_back(group);
45
46
47 group.clear();
48 group.push_back("ceruloplasmin");
49 group.push_back("caeruloplasmin");
50 all_groups.push_back(group);
51
52
53 // we always convert to lowercase to do the lookup
54 text_t casestemoptions="";
55 text_t termproper="";
56
57 text_t::const_iterator hash=terms.end();
58 text_t::const_iterator start=terms.begin();
59 while (hash!=start) {
60 if (*hash=='#') break;
61 --hash;
62 }
63 if (hash!=start) { // we found a hash
64 casestemoptions=substr(hash,terms.end());
65 termproper=substr(start,hash);
66 } else {
67 termproper=terms;
68 }
69
70 // canonicalise termproper to lowercase for lookup
71 lc(termproper.begin(), termproper.end());
72
73 // look up vectors
74 vector< vector<text_t> >::const_iterator group_here=all_groups.begin();
75 vector< vector<text_t> >::const_iterator group_done=all_groups.end();
76
77 while (group_here != group_done) {
78 vector<text_t>::const_iterator syn_here=group_here->begin();
79 vector<text_t>::const_iterator syn_done=group_here->end();
80
81 while (syn_here != syn_done) {
82 if (*syn_here == termproper) // we found a match
83 break;
84 ++syn_here;
85 }
86 if (syn_here != syn_done) { // we found a match in this group
87 syn_here=group_here->begin();
88 terms="(";
89
90 while (syn_here != syn_done) {
91 terms+= *syn_here;
92 terms+= casestemoptions;
93 if (syn_here != syn_done - 1) {
94 terms+=" | ";
95 }
96 ++syn_here;
97 }
98 terms.push_back(')');
99 break;
100 }
101 ++group_here;
102 }
103
104}
105
106void insert_equiv_terms(text_t& query) {
107
108 // we assume that for protemix, we don't have "high" unicode values.
109 // don't do terms inside quotes
110 text_t new_query="";
111
112 unsigned int query_length=query.size();
113 unsigned int i;
114 bool inside_quotes=false;
115 bool inside_field=false; // we need to OR together the terms inside fields
116 bool in_a_term=false;
117 text_t::const_iterator here = query.begin();
118 text_t::const_iterator end = query.end();
119
120 text_t::const_iterator term_start=here;
121 // | and &...
122 while (here < end) {
123 if (*here=='"') {
124 new_query.push_back('"');
125 if (inside_quotes==false) {
126 inside_quotes=true;
127 while (*(++here) != '"')
128 new_query.push_back(*here);
129 new_query.push_back('"');
130 } else {
131 inside_quotes=false;
132 }
133 } else if (isspace(*here)) {
134 if (in_a_term) { // we've reached the end of a term
135 // this might include case and stem options...
136 text_t term=substr(term_start,here);
137 equiv_lookup(term);
138 new_query+=term;
139 new_query+=" ";
140 in_a_term=false;
141 }
142 } else if (*here=='[') {
143 // start of a field...
144 } else if (*here==']') {
145 // end of a field...
146 } else if (!in_a_term) { // this char is start of term
147 term_start=here;
148 in_a_term=true;
149 }
150
151 ++here;
152 }
153
154 // tidy up for last term
155 if (in_a_term) {
156 text_t term=substr(term_start,here);
157 equiv_lookup(term);
158 new_query+=term;
159 }
160 query=new_query;
161}
Note: See TracBrowser for help on using the repository browser.