1 | /**********************************************************************
|
---|
2 | *
|
---|
3 | * z3950proxy.cpp --
|
---|
4 | * Copyright (C) 2000 The New Zealand Digital Library Project
|
---|
5 | *
|
---|
6 | * A component of the Greenstone digital library software
|
---|
7 | * from the New Zealand Digital Library Project at the
|
---|
8 | * University of Waikato, New Zealand.
|
---|
9 | *
|
---|
10 | * This program is free software; you can redistribute it and/or modify
|
---|
11 | * it under the terms of the GNU General Public License as published by
|
---|
12 | * the Free Software Foundation; either version 2 of the License, or
|
---|
13 | * (at your option) any later version.
|
---|
14 | *
|
---|
15 | * This program is distributed in the hope that it will be useful,
|
---|
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
18 | * GNU General Public License for more details.
|
---|
19 | *
|
---|
20 | * You should have received a copy of the GNU General Public License
|
---|
21 | * along with this program; if not, write to the Free Software
|
---|
22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
23 | *
|
---|
24 | *********************************************************************/
|
---|
25 |
|
---|
26 | #include "z3950proxy.h"
|
---|
27 | #include "comtypes.h"
|
---|
28 | #include <stdio.h>
|
---|
29 |
|
---|
30 | // z39.50 yaz stuff
|
---|
31 | #define min(a,b) (((a)<(b))?(a):(b))
|
---|
32 |
|
---|
33 | extern "C" {
|
---|
34 | #include "yaz_zclient.h"
|
---|
35 | }
|
---|
36 |
|
---|
37 | /***
|
---|
38 | each z39.50 server+database pair is a GSDL collection.
|
---|
39 | ***/
|
---|
40 |
|
---|
41 | z3950_proxy::z3950_proxy() {
|
---|
42 | info=NULL;
|
---|
43 | connected=false;
|
---|
44 | titles=NULL;
|
---|
45 | gotsinglerecord=false;
|
---|
46 | // for now, assume that all records will have text associated with them.
|
---|
47 | meta["hastxt"]="1";
|
---|
48 | // for now, assume we don't want ANY DocumentButtons.
|
---|
49 | format["DocumentButtons"]=g_EmptyText;
|
---|
50 | }
|
---|
51 |
|
---|
52 | z3950_proxy::~z3950_proxy() {
|
---|
53 | }
|
---|
54 |
|
---|
55 | void z3950_proxy::setMeta(const text_t &key, const text_t &value) {
|
---|
56 | meta[key]=value;
|
---|
57 | }
|
---|
58 |
|
---|
59 | void z3950_proxy::setName(const text_t &newname) {
|
---|
60 | title=newname;
|
---|
61 | meta["collectionname"]=newname;
|
---|
62 | }
|
---|
63 |
|
---|
64 | void z3950_proxy::addcfgAbout(const text_t &lang, const text_t &abouttext) {
|
---|
65 | about[lang]=abouttext;
|
---|
66 | }
|
---|
67 |
|
---|
68 | bool z3950_proxy::getcfgAbout(const text_t &lang, text_t &abouttxt) {
|
---|
69 | text_tmap::iterator it;
|
---|
70 | it=about.find(lang);
|
---|
71 | if (it==about.end()) return (false);
|
---|
72 | abouttxt=((*it).second);
|
---|
73 | return (true);
|
---|
74 | }
|
---|
75 |
|
---|
76 |
|
---|
77 | // now functions that actually talk over the tcp connection.
|
---|
78 |
|
---|
79 | // create a tcp connection to the associated target. Currently, this will
|
---|
80 | // re-initialise if we are already connected.
|
---|
81 | bool z3950_proxy::connect() {
|
---|
82 | text_t server_and_port;
|
---|
83 | char *zserverinfo;
|
---|
84 |
|
---|
85 | server_and_port=info->host+":"+info->port;
|
---|
86 | // remember that info.name is the database name
|
---|
87 |
|
---|
88 | z_initialize();
|
---|
89 | char* serv_str=server_and_port.getcstr();
|
---|
90 | char* name_str=info->name.getcstr();
|
---|
91 | int retval=z_cmd_open(serv_str,name_str);
|
---|
92 | delete []serv_str;
|
---|
93 | delete []name_str;
|
---|
94 | if (retval==1)
|
---|
95 | // we got a connection error
|
---|
96 | return false;
|
---|
97 |
|
---|
98 | // get initialisation response.
|
---|
99 | z_getnextAPDU();
|
---|
100 | zserverinfo=z_get_initResponse();
|
---|
101 | if (zserverinfo!=NULL) {
|
---|
102 | z_initstr.appendcstr(zserverinfo);
|
---|
103 | }
|
---|
104 | free(zserverinfo);
|
---|
105 |
|
---|
106 | connected=true;
|
---|
107 | return true;
|
---|
108 | }
|
---|
109 |
|
---|
110 | void z3950_proxy::parseQuery(const text_t &query,
|
---|
111 | const int querytype,
|
---|
112 | const text_t &fields,
|
---|
113 | text_t &parsed_query) {
|
---|
114 | /****** FIXME *****/
|
---|
115 | // We need to format the query string into RPN -
|
---|
116 | // by just passing it like this, it will only work for simple queries.
|
---|
117 | // This will require us to actually come up with a query syntax and
|
---|
118 | // a parser. For now, we'll just do an "AND" query for all terms
|
---|
119 | // But look at Common Command Language (CCL) query syntax (ISO 8777).
|
---|
120 |
|
---|
121 | // need to remove " chars from the query. We should really tell the server
|
---|
122 | // to do a phrase search on the terms that are between the "s, but we
|
---|
123 | // can't (easily) tell if the server can do that or not,
|
---|
124 | // so we'll currently just do a query and then post-process. (not yet
|
---|
125 | // implemented........)
|
---|
126 |
|
---|
127 | // we need to count number of terms separated by a space
|
---|
128 | char *ptr=query.getcstr();
|
---|
129 | int strlength=strlen(ptr);
|
---|
130 | bool inword=false;
|
---|
131 | int num_terms=0;
|
---|
132 |
|
---|
133 |
|
---|
134 | for (int i=0;i<strlength;++i) {
|
---|
135 | if (*(ptr+i)=='"') { // convert " to SPACE...
|
---|
136 | *(ptr+i)=' ';
|
---|
137 | }
|
---|
138 | if (*(ptr+i)!=' ') {
|
---|
139 | if (inword==false) {
|
---|
140 | inword=true;
|
---|
141 | ++num_terms;
|
---|
142 | }
|
---|
143 | }
|
---|
144 | else { // ptr+i is a space
|
---|
145 | inword=false;
|
---|
146 | }
|
---|
147 | }
|
---|
148 |
|
---|
149 | // set the field(s) to search on - main ones include:
|
---|
150 | // 1016 => Any
|
---|
151 | // 1 => (Personal) Name
|
---|
152 | // 4 => Title
|
---|
153 | // 21 => Subject Heading
|
---|
154 | // 45 => Subject precis
|
---|
155 | // Note I have no idea how these actually work - I think some servers
|
---|
156 | // only have limited fields, and map all subject-type requests into that
|
---|
157 | // subject field, etc.
|
---|
158 |
|
---|
159 | parsed_query="@attr 1=";
|
---|
160 | if (fields==".author")
|
---|
161 | parsed_query+="1 ";
|
---|
162 | else if (fields==".title")
|
---|
163 | parsed_query+="4 ";
|
---|
164 | else // fields==".any"
|
---|
165 | parsed_query+="1016 ";
|
---|
166 |
|
---|
167 | // querytype=1 => ranked/or, =2 => boolean/and
|
---|
168 | // append "@and" for each term after the first
|
---|
169 | {
|
---|
170 | char and_str[]="@and ";
|
---|
171 | char or_str[]="@or ";
|
---|
172 | char *q_type;
|
---|
173 | if (querytype==1) q_type=or_str; else q_type=and_str;
|
---|
174 | for (int i=1;i<num_terms;++i)
|
---|
175 | parsed_query+=q_type;
|
---|
176 | // append the actual query
|
---|
177 | parsed_query+=ptr;
|
---|
178 | }
|
---|
179 | delete []ptr;
|
---|
180 | }
|
---|
181 |
|
---|
182 | text_tarray *z3950_proxy::getrecordTitles(const text_t &query,
|
---|
183 | const int querytype,
|
---|
184 | const text_t &fields,
|
---|
185 | int first, int count,
|
---|
186 | int *nummatches, comerror_t &err) {
|
---|
187 | /* NOTE!!!!!! Because this code currently only works in cgi-bin mode,
|
---|
188 | we only ever do one request. Therefore, it is CURRENTLY OK to store
|
---|
189 | (cache) the retrieved titles, because if this function is ever called
|
---|
190 | more than once, the arguments will be the same each time.
|
---|
191 | (I think :)
|
---|
192 | */
|
---|
193 |
|
---|
194 | char **c_str_titles;
|
---|
195 | int i;
|
---|
196 | int last;
|
---|
197 |
|
---|
198 | if (gotsinglerecord==true) {
|
---|
199 | /* If true, then this whole execution was done to retrieve a single
|
---|
200 | document. Therefore, the list of titles of all records matching the
|
---|
201 | query isn't actually required. It's just that for some reason our
|
---|
202 | filter (z3950proto::filter) gets called at least 7 times, with the
|
---|
203 | 7th being a "QueryFilter" for some reason... */
|
---|
204 | nummatches=0;
|
---|
205 | return NULL; /* shouldn't really return NULL, but nummatches is checked
|
---|
206 | first (I hope) */
|
---|
207 | }
|
---|
208 |
|
---|
209 | // if (titles!=NULL) delete (titles);
|
---|
210 | if (titles!=NULL) return titles;
|
---|
211 | titles=new text_tarray;
|
---|
212 |
|
---|
213 | /* check if connected */
|
---|
214 | if (connected==false)
|
---|
215 | if (connect()==false) {
|
---|
216 | // we could not connect.
|
---|
217 | err=protocolError;
|
---|
218 | return (NULL);
|
---|
219 | }
|
---|
220 |
|
---|
221 |
|
---|
222 | text_t expanded_query=g_EmptyText;
|
---|
223 | parseQuery(query,querytype,fields,expanded_query);
|
---|
224 |
|
---|
225 | char* query_str=expanded_query.getcstr();
|
---|
226 | // following functions defined in yaz_zclient.c
|
---|
227 | *nummatches=z_cmd_dosearch(query_str); // returns # found, -1 on err.
|
---|
228 | delete []query_str;
|
---|
229 | if (*nummatches<=0) {
|
---|
230 | if (*nummatches==0) {
|
---|
231 | // no matches
|
---|
232 | return (NULL);
|
---|
233 | } else if (*nummatches==-1) {
|
---|
234 | // prefix query error
|
---|
235 | err=protocolError;
|
---|
236 | return (NULL);
|
---|
237 | } else if (*nummatches==-2) {
|
---|
238 | // sendsearchRequest not answered by searchResponse
|
---|
239 | err=protocolError;
|
---|
240 | return (NULL);
|
---|
241 | }
|
---|
242 | }
|
---|
243 | // could do a sort eventually, eg on date, title, etc.
|
---|
244 | // (non-existent function) z_sort(field, asc|desc);
|
---|
245 | /* min of (count, first + (*nummatches) ) */
|
---|
246 | // z_getrecordTitles ( first, howmany )
|
---|
247 | c_str_titles=z_getrecordTitles(first,min(count,*nummatches-first+1));
|
---|
248 | if (c_str_titles==NULL) {
|
---|
249 | // an error occurred. we need a logout/err as an arg
|
---|
250 | return (NULL);
|
---|
251 | }
|
---|
252 | if (c_str_titles[0]==0) {
|
---|
253 | // no matches.
|
---|
254 | return (NULL);
|
---|
255 | }
|
---|
256 | last=(int)c_str_titles[0];
|
---|
257 | for (i=1;i<=last;++i) {
|
---|
258 | titles->push_back(c_str_titles[i]);
|
---|
259 | free(c_str_titles[i]);
|
---|
260 | }
|
---|
261 | free(c_str_titles);
|
---|
262 | return (titles);
|
---|
263 |
|
---|
264 | }
|
---|
265 |
|
---|
266 | bool z3950_proxy::getfullrecord(const text_t &query,
|
---|
267 | const int querytype,
|
---|
268 | const text_t &fields,
|
---|
269 | const int ID,
|
---|
270 | text_t &rettitle,
|
---|
271 | text_t &rettext, comerror_t &err) {
|
---|
272 |
|
---|
273 | static char **c_str_titles=NULL;
|
---|
274 | static char *fulltext=NULL;
|
---|
275 | /* NOTE!!!!!! Because this code currently only works in cgi-bin mode,
|
---|
276 | we only ever do one request. Therefore, it is CURRENTLY OK to store
|
---|
277 | (cache) the retrieved titles, because if this function is ever called
|
---|
278 | more than once, the arguments will be the same each time.
|
---|
279 | (I think :)
|
---|
280 | */
|
---|
281 |
|
---|
282 | gotsinglerecord=true; // well, not yet, but we've been called...
|
---|
283 |
|
---|
284 | if (connected==false) {
|
---|
285 | if (connect()==false) {
|
---|
286 | // error connecting...
|
---|
287 | err=protocolError;
|
---|
288 | return (false);
|
---|
289 | }
|
---|
290 | // since we have just re-connected, we need to do the
|
---|
291 | // query again.
|
---|
292 |
|
---|
293 | text_t expanded_query=g_EmptyText;
|
---|
294 | parseQuery(query,querytype,fields,expanded_query);
|
---|
295 |
|
---|
296 | char* query_str=expanded_query.getcstr();
|
---|
297 | int returned=z_cmd_dosearch(query_str);
|
---|
298 | delete []query_str;
|
---|
299 | if (returned<=0) {
|
---|
300 | // 0 => none.
|
---|
301 | // <0 => error
|
---|
302 | err=protocolError;
|
---|
303 | return (false);
|
---|
304 | }
|
---|
305 | }
|
---|
306 |
|
---|
307 | if (c_str_titles==NULL)
|
---|
308 | c_str_titles=z_getrecordTitles(ID,1); // check this return value.
|
---|
309 |
|
---|
310 | if (rettitle!="unneeded") {
|
---|
311 | //int dummy;
|
---|
312 | if (c_str_titles!=NULL && (int)c_str_titles[0]==1) {
|
---|
313 | rettitle.setcstr(c_str_titles[1]); // and check this
|
---|
314 | ////// free (c_str_titles); - we want to "cache" it
|
---|
315 | } else {
|
---|
316 | // we didn't get something....
|
---|
317 | rettitle="Nothing Returned...";
|
---|
318 | }
|
---|
319 | }
|
---|
320 |
|
---|
321 | if (fulltext==NULL)
|
---|
322 | // get the text
|
---|
323 | fulltext=z_getfullRecord(ID);
|
---|
324 |
|
---|
325 | if (rettext!="unneeded") {
|
---|
326 | rettext.setcstr(fulltext);
|
---|
327 | }
|
---|
328 | return (true);
|
---|
329 | }
|
---|
330 |
|
---|
331 | text_t &z3950_proxy::getzAbout() {
|
---|
332 | text_t zserverresp;
|
---|
333 |
|
---|
334 | // Assume we have not yet connected, so that must be done here.
|
---|
335 | if (connected==true)
|
---|
336 | return (z_initstr);
|
---|
337 |
|
---|
338 | // we need to create the tcp connection to the target (server)
|
---|
339 | // z_initstr=new text_t;
|
---|
340 |
|
---|
341 | if (connect()==false) {
|
---|
342 | z_initstr.setcstr("<H2>Server offline</H2>Error - could not connect to server <B>");
|
---|
343 | z_initstr += info->host;
|
---|
344 | z_initstr += "</B> on port ";
|
---|
345 | z_initstr += info->port;
|
---|
346 | z_initstr += "\n";
|
---|
347 | return (z_initstr);
|
---|
348 | }
|
---|
349 |
|
---|
350 | // z_initstr currently contains the target's response. We want to
|
---|
351 | // PREPEND the following information.
|
---|
352 | zserverresp=z_initstr;
|
---|
353 | z_initstr="Internet server: <b>";
|
---|
354 | z_initstr+=info->host;
|
---|
355 | z_initstr+="</b> on port ";
|
---|
356 | z_initstr+=info->port;
|
---|
357 | z_initstr+=".<br>\n";
|
---|
358 | z_initstr+=zserverresp;
|
---|
359 |
|
---|
360 | // should close /******* WHAT IF DOING A QUERY!??!?!? ********/
|
---|
361 | // z_cmd_close(0);
|
---|
362 | // connected=false;
|
---|
363 | return (z_initstr);
|
---|
364 | }
|
---|
365 |
|
---|