source: main/trunk/greenstone2/runtime-src/src/z3950/z3950proxy.cpp@ 25234

Last change on this file since 25234 was 22292, checked in by sjm84, 14 years ago

Changed from using an int to using a long so that it would be compatible with 64-bit machine-sized pointers

File size: 10.3 KB
Line 
1/**********************************************************************
2 *
3 * z3950proxy.cpp --
4 * Copyright (C) 2000 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "z3950proxy.h"
27#include "comtypes.h"
28#include <stdio.h>
29#include <stdlib.h>
30#include <string.h>
31
32// z39.50 yaz stuff
33#define min(a,b) (((a)<(b))?(a):(b))
34
35extern "C" {
36#include "yaz_zclient.h"
37}
38
39/***
40 each z39.50 server+database pair is a GSDL collection.
41***/
42
43z3950_proxy::z3950_proxy() {
44 info=NULL;
45 connected=false;
46 titles=NULL;
47 gotsinglerecord=false;
48 // for now, assume that all records will have text associated with them.
49 meta["hastxt"]="1";
50 // for now, assume we don't want ANY DocumentButtons.
51 format["DocumentButtons"]=g_EmptyText;
52}
53
54z3950_proxy::~z3950_proxy() {
55}
56
57void z3950_proxy::setMeta(const text_t &key, const text_t &value) {
58 meta[key]=value;
59}
60
61void z3950_proxy::setName(const text_t &newname) {
62 title=newname;
63 meta["collectionname"]=newname;
64}
65
66void z3950_proxy::addcfgAbout(const text_t &lang, const text_t &abouttext) {
67 about[lang]=abouttext;
68}
69
70bool z3950_proxy::getcfgAbout(const text_t &lang, text_t &abouttxt) {
71 text_tmap::iterator it;
72 it=about.find(lang);
73 if (it==about.end()) return (false);
74 abouttxt=((*it).second);
75 return (true);
76}
77
78
79// now functions that actually talk over the tcp connection.
80
81// create a tcp connection to the associated target. Currently, this will
82// re-initialise if we are already connected.
83bool z3950_proxy::connect() {
84 text_t server_and_port;
85 char *zserverinfo;
86
87 server_and_port=info->host+":"+info->port;
88 // remember that info.name is the database name
89
90 z_initialize();
91 char* serv_str=server_and_port.getcstr();
92 char* name_str=info->name.getcstr();
93 int retval=z_cmd_open(serv_str,name_str);
94 delete []serv_str;
95 delete []name_str;
96 if (retval==1)
97 // we got a connection error
98 return false;
99
100 // get initialisation response.
101 z_getnextAPDU();
102 zserverinfo=z_get_initResponse();
103 if (zserverinfo!=NULL) {
104 z_initstr.appendcstr(zserverinfo);
105 }
106 free(zserverinfo);
107
108 connected=true;
109 return true;
110}
111
112void z3950_proxy::parseQuery(const text_t &query,
113 const int querytype,
114 const text_t &fields,
115 text_t &parsed_query) {
116 /****** FIXME *****/
117 // We need to format the query string into RPN -
118 // by just passing it like this, it will only work for simple queries.
119 // This will require us to actually come up with a query syntax and
120 // a parser. For now, we'll just do an "AND" query for all terms
121 // But look at Common Command Language (CCL) query syntax (ISO 8777).
122
123 // need to remove " chars from the query. We should really tell the server
124 // to do a phrase search on the terms that are between the "s, but we
125 // can't (easily) tell if the server can do that or not,
126 // so we'll currently just do a query and then post-process. (not yet
127 // implemented........)
128
129 // we need to count number of terms separated by a space
130 char *ptr=query.getcstr();
131 int strlength=strlen(ptr);
132 bool inword=false;
133 int num_terms=0;
134
135
136 for (int i=0;i<strlength;++i) {
137 if (*(ptr+i)=='"') { // convert " to SPACE...
138 *(ptr+i)=' ';
139 }
140 if (*(ptr+i)!=' ') {
141 if (inword==false) {
142 inword=true;
143 ++num_terms;
144 }
145 }
146 else { // ptr+i is a space
147 inword=false;
148 }
149 }
150
151 // set the field(s) to search on - main ones include:
152 // 1016 => Any
153 // 1 => (Personal) Name
154 // 4 => Title
155 // 21 => Subject Heading
156 // 45 => Subject precis
157 // Note I have no idea how these actually work - I think some servers
158 // only have limited fields, and map all subject-type requests into that
159 // subject field, etc.
160
161 parsed_query="@attr 1=";
162 if (fields==".author")
163 parsed_query+="1 ";
164 else if (fields==".title")
165 parsed_query+="4 ";
166 else // fields==".any"
167 parsed_query+="1016 ";
168
169 // querytype=1 => ranked/or, =2 => boolean/and
170 // append "@and" for each term after the first
171 {
172 char and_str[]="@and ";
173 char or_str[]="@or ";
174 char *q_type;
175 if (querytype==1) q_type=or_str; else q_type=and_str;
176 for (int i=1;i<num_terms;++i)
177 parsed_query+=q_type;
178 // append the actual query
179 parsed_query+=ptr;
180 }
181 delete []ptr;
182}
183
184text_tarray *z3950_proxy::getrecordTitles(const text_t &query,
185 const int querytype,
186 const text_t &fields,
187 int first, int count,
188 int *nummatches, comerror_t &err) {
189 /* NOTE!!!!!! Because this code currently only works in cgi-bin mode,
190 we only ever do one request. Therefore, it is CURRENTLY OK to store
191 (cache) the retrieved titles, because if this function is ever called
192 more than once, the arguments will be the same each time.
193 (I think :)
194 */
195
196 char **c_str_titles;
197 int i;
198 long last;
199
200 if (gotsinglerecord==true) {
201 /* If true, then this whole execution was done to retrieve a single
202 document. Therefore, the list of titles of all records matching the
203 query isn't actually required. It's just that for some reason our
204 filter (z3950proto::filter) gets called at least 7 times, with the
205 7th being a "QueryFilter" for some reason... */
206 nummatches=0;
207 return NULL; /* shouldn't really return NULL, but nummatches is checked
208 first (I hope) */
209 }
210
211 // if (titles!=NULL) delete (titles);
212 if (titles!=NULL) return titles;
213 titles=new text_tarray;
214
215 /* check if connected */
216 if (connected==false)
217 if (connect()==false) {
218 // we could not connect.
219 err=protocolError;
220 return (NULL);
221 }
222
223
224 text_t expanded_query=g_EmptyText;
225 parseQuery(query,querytype,fields,expanded_query);
226
227 char* query_str=expanded_query.getcstr();
228 // following functions defined in yaz_zclient.c
229 *nummatches=z_cmd_dosearch(query_str); // returns # found, -1 on err.
230 delete []query_str;
231 if (*nummatches<=0) {
232 if (*nummatches==0) {
233 // no matches
234 return (NULL);
235 } else if (*nummatches==-1) {
236 // prefix query error
237 err=protocolError;
238 return (NULL);
239 } else if (*nummatches==-2) {
240 // sendsearchRequest not answered by searchResponse
241 err=protocolError;
242 return (NULL);
243 }
244 }
245 // could do a sort eventually, eg on date, title, etc.
246 // (non-existent function) z_sort(field, asc|desc);
247 /* min of (count, first + (*nummatches) ) */
248 // z_getrecordTitles ( first, howmany )
249 c_str_titles=z_getrecordTitles(first,min(count,*nummatches-first+1));
250 if (c_str_titles==NULL) {
251 // an error occurred. we need a logout/err as an arg
252 return (NULL);
253 }
254 if (c_str_titles[0]==0) {
255 // no matches.
256 return (NULL);
257 }
258 last=(long)c_str_titles[0];
259 for (i=1;i<=last;++i) {
260 titles->push_back(c_str_titles[i]);
261 free(c_str_titles[i]);
262 }
263 free(c_str_titles);
264 return (titles);
265
266}
267
268bool z3950_proxy::getfullrecord(const text_t &query,
269 const int querytype,
270 const text_t &fields,
271 const int ID,
272 text_t &rettitle,
273 text_t &rettext, comerror_t &err) {
274
275 static char **c_str_titles=NULL;
276 static char *fulltext=NULL;
277 /* NOTE!!!!!! Because this code currently only works in cgi-bin mode,
278 we only ever do one request. Therefore, it is CURRENTLY OK to store
279 (cache) the retrieved titles, because if this function is ever called
280 more than once, the arguments will be the same each time.
281 (I think :)
282 */
283
284 gotsinglerecord=true; // well, not yet, but we've been called...
285
286 if (connected==false) {
287 if (connect()==false) {
288 // error connecting...
289 err=protocolError;
290 return (false);
291 }
292 // since we have just re-connected, we need to do the
293 // query again.
294
295 text_t expanded_query=g_EmptyText;
296 parseQuery(query,querytype,fields,expanded_query);
297
298 char* query_str=expanded_query.getcstr();
299 int returned=z_cmd_dosearch(query_str);
300 delete []query_str;
301 if (returned<=0) {
302 // 0 => none.
303 // <0 => error
304 err=protocolError;
305 return (false);
306 }
307 }
308
309 if (c_str_titles==NULL)
310 c_str_titles=z_getrecordTitles(ID,1); // check this return value.
311
312 if (rettitle!="unneeded") {
313 //int dummy;
314 if (c_str_titles!=NULL && (long)c_str_titles[0]==1) {
315 rettitle.setcstr(c_str_titles[1]); // and check this
316 ////// free (c_str_titles); - we want to "cache" it
317 } else {
318 // we didn't get something....
319 rettitle="Nothing Returned...";
320 }
321 }
322
323 if (fulltext==NULL)
324 // get the text
325 fulltext=z_getfullRecord(ID);
326
327 if (rettext!="unneeded") {
328 rettext.setcstr(fulltext);
329 }
330 return (true);
331 }
332
333text_t &z3950_proxy::getzAbout() {
334 text_t zserverresp;
335
336 // Assume we have not yet connected, so that must be done here.
337 if (connected==true)
338 return (z_initstr);
339
340 // we need to create the tcp connection to the target (server)
341 // z_initstr=new text_t;
342
343 if (connect()==false) {
344 z_initstr.setcstr("<H2>Server offline</H2>Error - could not connect to server <B>");
345 z_initstr += info->host;
346 z_initstr += "</B> on port ";
347 z_initstr += info->port;
348 z_initstr += "\n";
349 return (z_initstr);
350 }
351
352 // z_initstr currently contains the target's response. We want to
353 // PREPEND the following information.
354 zserverresp=z_initstr;
355 z_initstr="Internet server: <b>";
356 z_initstr+=info->host;
357 z_initstr+="</b> on port ";
358 z_initstr+=info->port;
359 z_initstr+=".<br>\n";
360 z_initstr+=zserverresp;
361
362 // should close /******* WHAT IF DOING A QUERY!??!?!? ********/
363 // z_cmd_close(0);
364 // connected=false;
365 return (z_initstr);
366}
367
Note: See TracBrowser for help on using the repository browser.