1 | /**********************************************************************
|
---|
2 | *
|
---|
3 | * z3950proxy.cpp --
|
---|
4 | * Copyright (C) 2000 The New Zealand Digital Library Project
|
---|
5 | *
|
---|
6 | * A component of the Greenstone digital library software
|
---|
7 | * from the New Zealand Digital Library Project at the
|
---|
8 | * University of Waikato, New Zealand.
|
---|
9 | *
|
---|
10 | * This program is free software; you can redistribute it and/or modify
|
---|
11 | * it under the terms of the GNU General Public License as published by
|
---|
12 | * the Free Software Foundation; either version 2 of the License, or
|
---|
13 | * (at your option) any later version.
|
---|
14 | *
|
---|
15 | * This program is distributed in the hope that it will be useful,
|
---|
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
18 | * GNU General Public License for more details.
|
---|
19 | *
|
---|
20 | * You should have received a copy of the GNU General Public License
|
---|
21 | * along with this program; if not, write to the Free Software
|
---|
22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
23 | *
|
---|
24 | *********************************************************************/
|
---|
25 |
|
---|
26 | #include "z3950proxy.h"
|
---|
27 | #include "comtypes.h"
|
---|
28 | #include <stdio.h>
|
---|
29 | #include <stdlib.h>
|
---|
30 | #include <string.h>
|
---|
31 |
|
---|
32 | // z39.50 yaz stuff
|
---|
33 | #define min(a,b) (((a)<(b))?(a):(b))
|
---|
34 |
|
---|
35 | extern "C" {
|
---|
36 | #include "yaz_zclient.h"
|
---|
37 | }
|
---|
38 |
|
---|
39 | /***
|
---|
40 | each z39.50 server+database pair is a GSDL collection.
|
---|
41 | ***/
|
---|
42 |
|
---|
43 | z3950_proxy::z3950_proxy() {
|
---|
44 | info=NULL;
|
---|
45 | connected=false;
|
---|
46 | titles=NULL;
|
---|
47 | gotsinglerecord=false;
|
---|
48 | // for now, assume that all records will have text associated with them.
|
---|
49 | meta["hastxt"]="1";
|
---|
50 | // for now, assume we don't want ANY DocumentButtons.
|
---|
51 | format["DocumentButtons"]=g_EmptyText;
|
---|
52 | }
|
---|
53 |
|
---|
54 | z3950_proxy::~z3950_proxy() {
|
---|
55 | }
|
---|
56 |
|
---|
57 | void z3950_proxy::setMeta(const text_t &key, const text_t &value) {
|
---|
58 | meta[key]=value;
|
---|
59 | }
|
---|
60 |
|
---|
61 | void z3950_proxy::setName(const text_t &newname) {
|
---|
62 | title=newname;
|
---|
63 | meta["collectionname"]=newname;
|
---|
64 | }
|
---|
65 |
|
---|
66 | void z3950_proxy::addcfgAbout(const text_t &lang, const text_t &abouttext) {
|
---|
67 | about[lang]=abouttext;
|
---|
68 | }
|
---|
69 |
|
---|
70 | bool z3950_proxy::getcfgAbout(const text_t &lang, text_t &abouttxt) {
|
---|
71 | text_tmap::iterator it;
|
---|
72 | it=about.find(lang);
|
---|
73 | if (it==about.end()) return (false);
|
---|
74 | abouttxt=((*it).second);
|
---|
75 | return (true);
|
---|
76 | }
|
---|
77 |
|
---|
78 |
|
---|
79 | // now functions that actually talk over the tcp connection.
|
---|
80 |
|
---|
81 | // create a tcp connection to the associated target. Currently, this will
|
---|
82 | // re-initialise if we are already connected.
|
---|
83 | bool z3950_proxy::connect() {
|
---|
84 | text_t server_and_port;
|
---|
85 | char *zserverinfo;
|
---|
86 |
|
---|
87 | server_and_port=info->host+":"+info->port;
|
---|
88 | // remember that info.name is the database name
|
---|
89 |
|
---|
90 | z_initialize();
|
---|
91 | char* serv_str=server_and_port.getcstr();
|
---|
92 | char* name_str=info->name.getcstr();
|
---|
93 | int retval=z_cmd_open(serv_str,name_str);
|
---|
94 | delete []serv_str;
|
---|
95 | delete []name_str;
|
---|
96 | if (retval==1)
|
---|
97 | // we got a connection error
|
---|
98 | return false;
|
---|
99 |
|
---|
100 | // get initialisation response.
|
---|
101 | z_getnextAPDU();
|
---|
102 | zserverinfo=z_get_initResponse();
|
---|
103 | if (zserverinfo!=NULL) {
|
---|
104 | z_initstr.appendcstr(zserverinfo);
|
---|
105 | }
|
---|
106 | free(zserverinfo);
|
---|
107 |
|
---|
108 | connected=true;
|
---|
109 | return true;
|
---|
110 | }
|
---|
111 |
|
---|
112 | void z3950_proxy::parseQuery(const text_t &query,
|
---|
113 | const int querytype,
|
---|
114 | const text_t &fields,
|
---|
115 | text_t &parsed_query) {
|
---|
116 | /****** FIXME *****/
|
---|
117 | // We need to format the query string into RPN -
|
---|
118 | // by just passing it like this, it will only work for simple queries.
|
---|
119 | // This will require us to actually come up with a query syntax and
|
---|
120 | // a parser. For now, we'll just do an "AND" query for all terms
|
---|
121 | // But look at Common Command Language (CCL) query syntax (ISO 8777).
|
---|
122 |
|
---|
123 | // need to remove " chars from the query. We should really tell the server
|
---|
124 | // to do a phrase search on the terms that are between the "s, but we
|
---|
125 | // can't (easily) tell if the server can do that or not,
|
---|
126 | // so we'll currently just do a query and then post-process. (not yet
|
---|
127 | // implemented........)
|
---|
128 |
|
---|
129 | // we need to count number of terms separated by a space
|
---|
130 | char *ptr=query.getcstr();
|
---|
131 | int strlength=strlen(ptr);
|
---|
132 | bool inword=false;
|
---|
133 | int num_terms=0;
|
---|
134 |
|
---|
135 |
|
---|
136 | for (int i=0;i<strlength;++i) {
|
---|
137 | if (*(ptr+i)=='"') { // convert " to SPACE...
|
---|
138 | *(ptr+i)=' ';
|
---|
139 | }
|
---|
140 | if (*(ptr+i)!=' ') {
|
---|
141 | if (inword==false) {
|
---|
142 | inword=true;
|
---|
143 | ++num_terms;
|
---|
144 | }
|
---|
145 | }
|
---|
146 | else { // ptr+i is a space
|
---|
147 | inword=false;
|
---|
148 | }
|
---|
149 | }
|
---|
150 |
|
---|
151 | // set the field(s) to search on - main ones include:
|
---|
152 | // 1016 => Any
|
---|
153 | // 1 => (Personal) Name
|
---|
154 | // 4 => Title
|
---|
155 | // 21 => Subject Heading
|
---|
156 | // 45 => Subject precis
|
---|
157 | // Note I have no idea how these actually work - I think some servers
|
---|
158 | // only have limited fields, and map all subject-type requests into that
|
---|
159 | // subject field, etc.
|
---|
160 |
|
---|
161 | parsed_query="@attr 1=";
|
---|
162 | if (fields==".author")
|
---|
163 | parsed_query+="1 ";
|
---|
164 | else if (fields==".title")
|
---|
165 | parsed_query+="4 ";
|
---|
166 | else // fields==".any"
|
---|
167 | parsed_query+="1016 ";
|
---|
168 |
|
---|
169 | // querytype=1 => ranked/or, =2 => boolean/and
|
---|
170 | // append "@and" for each term after the first
|
---|
171 | {
|
---|
172 | char and_str[]="@and ";
|
---|
173 | char or_str[]="@or ";
|
---|
174 | char *q_type;
|
---|
175 | if (querytype==1) q_type=or_str; else q_type=and_str;
|
---|
176 | for (int i=1;i<num_terms;++i)
|
---|
177 | parsed_query+=q_type;
|
---|
178 | // append the actual query
|
---|
179 | parsed_query+=ptr;
|
---|
180 | }
|
---|
181 | delete []ptr;
|
---|
182 | }
|
---|
183 |
|
---|
184 | text_tarray *z3950_proxy::getrecordTitles(const text_t &query,
|
---|
185 | const int querytype,
|
---|
186 | const text_t &fields,
|
---|
187 | int first, int count,
|
---|
188 | int *nummatches, comerror_t &err) {
|
---|
189 | /* NOTE!!!!!! Because this code currently only works in cgi-bin mode,
|
---|
190 | we only ever do one request. Therefore, it is CURRENTLY OK to store
|
---|
191 | (cache) the retrieved titles, because if this function is ever called
|
---|
192 | more than once, the arguments will be the same each time.
|
---|
193 | (I think :)
|
---|
194 | */
|
---|
195 |
|
---|
196 | char **c_str_titles;
|
---|
197 | int i;
|
---|
198 | long last;
|
---|
199 |
|
---|
200 | if (gotsinglerecord==true) {
|
---|
201 | /* If true, then this whole execution was done to retrieve a single
|
---|
202 | document. Therefore, the list of titles of all records matching the
|
---|
203 | query isn't actually required. It's just that for some reason our
|
---|
204 | filter (z3950proto::filter) gets called at least 7 times, with the
|
---|
205 | 7th being a "QueryFilter" for some reason... */
|
---|
206 | nummatches=0;
|
---|
207 | return NULL; /* shouldn't really return NULL, but nummatches is checked
|
---|
208 | first (I hope) */
|
---|
209 | }
|
---|
210 |
|
---|
211 | // if (titles!=NULL) delete (titles);
|
---|
212 | if (titles!=NULL) return titles;
|
---|
213 | titles=new text_tarray;
|
---|
214 |
|
---|
215 | /* check if connected */
|
---|
216 | if (connected==false)
|
---|
217 | if (connect()==false) {
|
---|
218 | // we could not connect.
|
---|
219 | err=protocolError;
|
---|
220 | return (NULL);
|
---|
221 | }
|
---|
222 |
|
---|
223 |
|
---|
224 | text_t expanded_query=g_EmptyText;
|
---|
225 | parseQuery(query,querytype,fields,expanded_query);
|
---|
226 |
|
---|
227 | char* query_str=expanded_query.getcstr();
|
---|
228 | // following functions defined in yaz_zclient.c
|
---|
229 | *nummatches=z_cmd_dosearch(query_str); // returns # found, -1 on err.
|
---|
230 | delete []query_str;
|
---|
231 | if (*nummatches<=0) {
|
---|
232 | if (*nummatches==0) {
|
---|
233 | // no matches
|
---|
234 | return (NULL);
|
---|
235 | } else if (*nummatches==-1) {
|
---|
236 | // prefix query error
|
---|
237 | err=protocolError;
|
---|
238 | return (NULL);
|
---|
239 | } else if (*nummatches==-2) {
|
---|
240 | // sendsearchRequest not answered by searchResponse
|
---|
241 | err=protocolError;
|
---|
242 | return (NULL);
|
---|
243 | }
|
---|
244 | }
|
---|
245 | // could do a sort eventually, eg on date, title, etc.
|
---|
246 | // (non-existent function) z_sort(field, asc|desc);
|
---|
247 | /* min of (count, first + (*nummatches) ) */
|
---|
248 | // z_getrecordTitles ( first, howmany )
|
---|
249 | c_str_titles=z_getrecordTitles(first,min(count,*nummatches-first+1));
|
---|
250 | if (c_str_titles==NULL) {
|
---|
251 | // an error occurred. we need a logout/err as an arg
|
---|
252 | return (NULL);
|
---|
253 | }
|
---|
254 | if (c_str_titles[0]==0) {
|
---|
255 | // no matches.
|
---|
256 | return (NULL);
|
---|
257 | }
|
---|
258 | last=(long)c_str_titles[0];
|
---|
259 | for (i=1;i<=last;++i) {
|
---|
260 | titles->push_back(c_str_titles[i]);
|
---|
261 | free(c_str_titles[i]);
|
---|
262 | }
|
---|
263 | free(c_str_titles);
|
---|
264 | return (titles);
|
---|
265 |
|
---|
266 | }
|
---|
267 |
|
---|
268 | bool z3950_proxy::getfullrecord(const text_t &query,
|
---|
269 | const int querytype,
|
---|
270 | const text_t &fields,
|
---|
271 | const int ID,
|
---|
272 | text_t &rettitle,
|
---|
273 | text_t &rettext, comerror_t &err) {
|
---|
274 |
|
---|
275 | static char **c_str_titles=NULL;
|
---|
276 | static char *fulltext=NULL;
|
---|
277 | /* NOTE!!!!!! Because this code currently only works in cgi-bin mode,
|
---|
278 | we only ever do one request. Therefore, it is CURRENTLY OK to store
|
---|
279 | (cache) the retrieved titles, because if this function is ever called
|
---|
280 | more than once, the arguments will be the same each time.
|
---|
281 | (I think :)
|
---|
282 | */
|
---|
283 |
|
---|
284 | gotsinglerecord=true; // well, not yet, but we've been called...
|
---|
285 |
|
---|
286 | if (connected==false) {
|
---|
287 | if (connect()==false) {
|
---|
288 | // error connecting...
|
---|
289 | err=protocolError;
|
---|
290 | return (false);
|
---|
291 | }
|
---|
292 | // since we have just re-connected, we need to do the
|
---|
293 | // query again.
|
---|
294 |
|
---|
295 | text_t expanded_query=g_EmptyText;
|
---|
296 | parseQuery(query,querytype,fields,expanded_query);
|
---|
297 |
|
---|
298 | char* query_str=expanded_query.getcstr();
|
---|
299 | int returned=z_cmd_dosearch(query_str);
|
---|
300 | delete []query_str;
|
---|
301 | if (returned<=0) {
|
---|
302 | // 0 => none.
|
---|
303 | // <0 => error
|
---|
304 | err=protocolError;
|
---|
305 | return (false);
|
---|
306 | }
|
---|
307 | }
|
---|
308 |
|
---|
309 | if (c_str_titles==NULL)
|
---|
310 | c_str_titles=z_getrecordTitles(ID,1); // check this return value.
|
---|
311 |
|
---|
312 | if (rettitle!="unneeded") {
|
---|
313 | //int dummy;
|
---|
314 | if (c_str_titles!=NULL && (long)c_str_titles[0]==1) {
|
---|
315 | rettitle.setcstr(c_str_titles[1]); // and check this
|
---|
316 | ////// free (c_str_titles); - we want to "cache" it
|
---|
317 | } else {
|
---|
318 | // we didn't get something....
|
---|
319 | rettitle="Nothing Returned...";
|
---|
320 | }
|
---|
321 | }
|
---|
322 |
|
---|
323 | if (fulltext==NULL)
|
---|
324 | // get the text
|
---|
325 | fulltext=z_getfullRecord(ID);
|
---|
326 |
|
---|
327 | if (rettext!="unneeded") {
|
---|
328 | rettext.setcstr(fulltext);
|
---|
329 | }
|
---|
330 | return (true);
|
---|
331 | }
|
---|
332 |
|
---|
333 | text_t &z3950_proxy::getzAbout() {
|
---|
334 | text_t zserverresp;
|
---|
335 |
|
---|
336 | // Assume we have not yet connected, so that must be done here.
|
---|
337 | if (connected==true)
|
---|
338 | return (z_initstr);
|
---|
339 |
|
---|
340 | // we need to create the tcp connection to the target (server)
|
---|
341 | // z_initstr=new text_t;
|
---|
342 |
|
---|
343 | if (connect()==false) {
|
---|
344 | z_initstr.setcstr("<H2>Server offline</H2>Error - could not connect to server <B>");
|
---|
345 | z_initstr += info->host;
|
---|
346 | z_initstr += "</B> on port ";
|
---|
347 | z_initstr += info->port;
|
---|
348 | z_initstr += "\n";
|
---|
349 | return (z_initstr);
|
---|
350 | }
|
---|
351 |
|
---|
352 | // z_initstr currently contains the target's response. We want to
|
---|
353 | // PREPEND the following information.
|
---|
354 | zserverresp=z_initstr;
|
---|
355 | z_initstr="Internet server: <b>";
|
---|
356 | z_initstr+=info->host;
|
---|
357 | z_initstr+="</b> on port ";
|
---|
358 | z_initstr+=info->port;
|
---|
359 | z_initstr+=".<br>\n";
|
---|
360 | z_initstr+=zserverresp;
|
---|
361 |
|
---|
362 | // should close /******* WHAT IF DOING A QUERY!??!?!? ********/
|
---|
363 | // z_cmd_close(0);
|
---|
364 | // connected=false;
|
---|
365 | return (z_initstr);
|
---|
366 | }
|
---|
367 |
|
---|