Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: other-projects/trunk/protemix/src/recpt/equivterms.cpp@ 14162

Last change on this file since 14162 was 3182, checked in by sjboddie, 22 years ago
* empty log message *
Property svn:keywords set to `Author Date Id Revision`
File size: 4.2 KB

Line
1
2	#include "text_t.h"
3	#include <ctype.h> // for isspace
4	#include <vector>
5
6	void equiv_lookup(text_t& terms) {
7	// eventually we'll get these terms out of a config file or something...
8	vector<text_t> group;
9	vector< vector<text_t> > all_groups;
10
11	group.push_back("triene");
12	group.push_back("trientine");
13	group.push_back("trien");
14	group.push_back("\"triethylenetetramine dihydrochloride\"");
15	group.push_back("\"triethylene tetramine dihydrochloride\"");
16	group.push_back("syprine");
17	group.push_back("teta");
18	group.push_back(text_t("\"trien 2hcl\""));
19	group.push_back(text_t("\"triethylenetetramine 2hcl\""));
20	group.push_back(text_t("\"bis aminoethyl ethanediamine dihydrochloride\""));
21	all_groups.push_back(group);
22
23
24	group.clear();
25	group.push_back("pkpd");
26	group.push_back("pkin");
27	group.push_back("pharmocokinetic");
28	group.push_back("pharmacodynamic");
29	all_groups.push_back(group);
30
31
32	group.clear();
33	group.push_back("safety");
34	group.push_back("\"side effects\"");
35	group.push_back("toxicity");
36	group.push_back("toxicology");
37	all_groups.push_back(group);
38
39
40	group.clear();
41	group.push_back("sod");
42	group.push_back("\"superoxide dismutase\"");
43	group.push_back("\"super oxide dismutase\"");
44	all_groups.push_back(group);
45
46
47	group.clear();
48	group.push_back("ceruloplasmin");
49	group.push_back("caeruloplasmin");
50	all_groups.push_back(group);
51
52
53	// we always convert to lowercase to do the lookup
54	text_t casestemoptions="";
55	text_t termproper="";
56
57	text_t::const_iterator hash=terms.end();
58	text_t::const_iterator start=terms.begin();
59	while (hash!=start) {
60	if (*hash=='#') break;
61	--hash;
62	}
63	if (hash!=start) { // we found a hash
64	casestemoptions=substr(hash,terms.end());
65	termproper=substr(start,hash);
66	} else {
67	termproper=terms;
68	}
69
70	// canonicalise termproper to lowercase for lookup
71	lc(termproper.begin(), termproper.end());
72
73	// look up vectors
74	vector< vector<text_t> >::const_iterator group_here=all_groups.begin();
75	vector< vector<text_t> >::const_iterator group_done=all_groups.end();
76
77	while (group_here != group_done) {
78	vector<text_t>::const_iterator syn_here=group_here->begin();
79	vector<text_t>::const_iterator syn_done=group_here->end();
80
81	while (syn_here != syn_done) {
82	if (*syn_here == termproper) // we found a match
83	break;
84	++syn_here;
85	}
86	if (syn_here != syn_done) { // we found a match in this group
87	syn_here=group_here->begin();
88	terms="(";
89
90	while (syn_here != syn_done) {
91	terms+= *syn_here;
92	terms+= casestemoptions;
93	if (syn_here != syn_done - 1) {
94	terms+=" \| ";
95	}
96	++syn_here;
97	}
98	terms.push_back(')');
99	break;
100	}
101	++group_here;
102	}
103
104	}
105
106	void insert_equiv_terms(text_t& query) {
107
108	// we assume that for protemix, we don't have "high" unicode values.
109	// don't do terms inside quotes
110	text_t new_query="";
111
112	unsigned int query_length=query.size();
113	unsigned int i;
114	bool inside_quotes=false;
115	bool inside_field=false; // we need to OR together the terms inside fields
116	bool in_a_term=false;
117	text_t::const_iterator here = query.begin();
118	text_t::const_iterator end = query.end();
119
120	text_t::const_iterator term_start=here;
121	// \| and &...
122	while (here < end) {
123	if (*here=='"') {
124	new_query.push_back('"');
125	if (inside_quotes==false) {
126	inside_quotes=true;
127	while (*(++here) != '"')
128	new_query.push_back(*here);
129	new_query.push_back('"');
130	} else {
131	inside_quotes=false;
132	}
133	} else if (isspace(*here)) {
134	if (in_a_term) { // we've reached the end of a term
135	// this might include case and stem options...
136	text_t term=substr(term_start,here);
137	equiv_lookup(term);
138	new_query+=term;
139	new_query+=" ";
140	in_a_term=false;
141	}
142	} else if (*here=='[') {
143	// start of a field...
144	} else if (*here==']') {
145	// end of a field...
146	} else if (!in_a_term) { // this char is start of term
147	term_start=here;
148	in_a_term=true;
149	}
150
151	++here;
152	}
153
154	// tidy up for last term
155	if (in_a_term) {
156	text_t term=substr(term_start,here);
157	equiv_lookup(term);
158	new_query+=term;
159	}
160	query=new_query;
161	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: