1 | /**********************************************************************
|
---|
2 | *
|
---|
3 | * z3950server.cpp --
|
---|
4 | * Copyright (C) 2000 The New Zealand Digital Library Project
|
---|
5 | *
|
---|
6 | * A component of the Greenstone digital library software
|
---|
7 | * from the New Zealand Digital Library Project at the
|
---|
8 | * University of Waikato, New Zealand.
|
---|
9 | *
|
---|
10 | * This program is free software; you can redistribute it and/or modify
|
---|
11 | * it under the terms of the GNU General Public License as published by
|
---|
12 | * the Free Software Foundation; either version 2 of the License, or
|
---|
13 | * (at your option) any later version.
|
---|
14 | *
|
---|
15 | * This program is distributed in the hope that it will be useful,
|
---|
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
18 | * GNU General Public License for more details.
|
---|
19 | *
|
---|
20 | * You should have received a copy of the GNU General Public License
|
---|
21 | * along with this program; if not, write to the Free Software
|
---|
22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
23 | *
|
---|
24 | *********************************************************************/
|
---|
25 |
|
---|
26 | #include "z3950server.h"
|
---|
27 | #include "comtypes.h"
|
---|
28 | #include <stdio.h>
|
---|
29 | // z39.50 yaz stuff
|
---|
30 |
|
---|
31 | extern "C" {
|
---|
32 | #include "yaz/yaz_zclient.h"
|
---|
33 | }
|
---|
34 |
|
---|
35 | /***
|
---|
36 | each z39.50 server+database pair is a GSDL collection.
|
---|
37 | ***/
|
---|
38 |
|
---|
39 | z3950_server::z3950_server() {
|
---|
40 | info=NULL;
|
---|
41 | connected=false;
|
---|
42 | titles=NULL;
|
---|
43 | gotsinglerecord=false;
|
---|
44 | // for now, assume that all records will have text associated with them.
|
---|
45 | meta["hastxt"]="1";
|
---|
46 | // for now, assume we don't want ANY DocumentButtons.
|
---|
47 | format["DocumentButtons"]=g_EmptyText;
|
---|
48 | }
|
---|
49 |
|
---|
50 | z3950_server::~z3950_server() {
|
---|
51 | }
|
---|
52 |
|
---|
53 | void z3950_server::setMeta(const text_t &key, const text_t &value) {
|
---|
54 | meta[key]=value;
|
---|
55 | }
|
---|
56 |
|
---|
57 | void z3950_server::setName(const text_t &newname) {
|
---|
58 | title=newname;
|
---|
59 | meta["collectionname"]=newname;
|
---|
60 | }
|
---|
61 |
|
---|
62 | void z3950_server::addcfgAbout(const text_t &lang, const text_t &abouttext) {
|
---|
63 | about[lang]=abouttext;
|
---|
64 | }
|
---|
65 |
|
---|
66 | bool z3950_server::getcfgAbout(const text_t &lang, text_t &abouttxt) {
|
---|
67 | text_tmap::iterator it;
|
---|
68 | it=about.find(lang);
|
---|
69 | if (it==about.end()) return (false);
|
---|
70 | abouttxt=((*it).second);
|
---|
71 | return (true);
|
---|
72 | }
|
---|
73 |
|
---|
74 |
|
---|
75 | // now functions that actually talk over the tcp connection.
|
---|
76 |
|
---|
77 | // create a tcp connection to the associated target. Currently, this will
|
---|
78 | // re-initialise if we are already connected.
|
---|
79 | bool z3950_server::connect() {
|
---|
80 | text_t server_and_port;
|
---|
81 | char *zserverinfo;
|
---|
82 |
|
---|
83 | server_and_port=info->host+":"+info->port;
|
---|
84 | // remember that info.name is the database name
|
---|
85 |
|
---|
86 | z_initialize();
|
---|
87 | char* serv_str=server_and_port.getcstr();
|
---|
88 | char* name_str=info->name.getcstr();
|
---|
89 | int retval=z_cmd_open(serv_str,name_str);
|
---|
90 | delete []serv_str;
|
---|
91 | delete []name_str;
|
---|
92 | if (retval==1)
|
---|
93 | // we got a connection error
|
---|
94 | return false;
|
---|
95 |
|
---|
96 | // get initialisation response.
|
---|
97 | z_getnextAPDU();
|
---|
98 | zserverinfo=z_get_initResponse();
|
---|
99 | if (zserverinfo!=NULL) {
|
---|
100 | z_initstr.appendcstr(zserverinfo);
|
---|
101 | }
|
---|
102 | free(zserverinfo);
|
---|
103 |
|
---|
104 | connected=true;
|
---|
105 | return true;
|
---|
106 | }
|
---|
107 |
|
---|
108 | void z3950_server::parseQuery(const text_t &query,
|
---|
109 | const int querytype,
|
---|
110 | const text_t &fields,
|
---|
111 | text_t &parsed_query) {
|
---|
112 | /****** FIXME *****/
|
---|
113 | // We need to format the query string into RPN -
|
---|
114 | // by just passing it like this, it will only work for simple queries.
|
---|
115 | // This will require us to actually come up with a query syntax and
|
---|
116 | // a parser. For now, we'll just do an "AND" query for all terms
|
---|
117 | // But look at Common Command Language (CCL) query syntax (ISO 8777).
|
---|
118 |
|
---|
119 | // need to remove " chars from the query. We should really tell the server
|
---|
120 | // to do a phrase search on the terms that are between the "s, but we
|
---|
121 | // can't (easily) tell if the server can do that or not,
|
---|
122 | // so we'll currently just do a query and then post-process. (not yet
|
---|
123 | // implemented........)
|
---|
124 |
|
---|
125 | // we need to count number of terms separated by a space
|
---|
126 | char *ptr=query.getcstr();
|
---|
127 | int strlength=strlen(ptr);
|
---|
128 | bool inword=false;
|
---|
129 | int num_terms=0;
|
---|
130 |
|
---|
131 |
|
---|
132 | for (int i=0;i<strlength;i++) {
|
---|
133 | if (*(ptr+i)=='"') { // convert " to SPACE...
|
---|
134 | *(ptr+i)=' ';
|
---|
135 | }
|
---|
136 | if (*(ptr+i)!=' ') {
|
---|
137 | if (inword==false) {
|
---|
138 | inword=true;
|
---|
139 | num_terms++;
|
---|
140 | }
|
---|
141 | }
|
---|
142 | else { // ptr+i is a space
|
---|
143 | inword=false;
|
---|
144 | }
|
---|
145 | }
|
---|
146 |
|
---|
147 | // set the field(s) to search on - main ones include:
|
---|
148 | // 1016 => Any
|
---|
149 | // 1 => (Personal) Name
|
---|
150 | // 4 => Title
|
---|
151 | // 21 => Subject Heading
|
---|
152 | // 45 => Subject precis
|
---|
153 | // Note I have no idea how these actually work - I think some servers
|
---|
154 | // only have limited fields, and map all subject-type requests into that
|
---|
155 | // subject field, etc.
|
---|
156 |
|
---|
157 | parsed_query="@attr 1=";
|
---|
158 | if (fields==".author")
|
---|
159 | parsed_query+="1 ";
|
---|
160 | else if (fields==".title")
|
---|
161 | parsed_query+="4 ";
|
---|
162 | else // fields==".any"
|
---|
163 | parsed_query+="1016 ";
|
---|
164 |
|
---|
165 | // querytype=1 => ranked/or, =2 => boolean/and
|
---|
166 | // append "@and" for each term after the first
|
---|
167 | {
|
---|
168 | char and_str[]="@and ";
|
---|
169 | char or_str[]="@or ";
|
---|
170 | char *q_type;
|
---|
171 | if (querytype==1) q_type=or_str; else q_type=and_str;
|
---|
172 | for (int i=1;i<num_terms;i++)
|
---|
173 | parsed_query+=q_type;
|
---|
174 | // append the actual query
|
---|
175 | parsed_query+=ptr;
|
---|
176 | }
|
---|
177 | delete []ptr;
|
---|
178 | }
|
---|
179 |
|
---|
180 | text_tarray *z3950_server::getrecordTitles(const text_t &query,
|
---|
181 | const int querytype,
|
---|
182 | const text_t &fields,
|
---|
183 | int first, int count,
|
---|
184 | int *nummatches, comerror_t &err) {
|
---|
185 | /* NOTE!!!!!! Because this code currently only works in cgi-bin mode,
|
---|
186 | we only ever do one request. Therefore, it is CURRENTLY OK to store
|
---|
187 | (cache) the retrieved titles, because if this function is ever called
|
---|
188 | more than once, the arguments will be the same each time.
|
---|
189 | (I think :)
|
---|
190 | */
|
---|
191 |
|
---|
192 | char **c_str_titles;
|
---|
193 | int i;
|
---|
194 | int last;
|
---|
195 |
|
---|
196 | if (gotsinglerecord==true) {
|
---|
197 | /* If true, then this whole execution was done to retrieve a single
|
---|
198 | document. Therefore, the list of titles of all records matching the
|
---|
199 | query isn't actually required. It's just that for some reason our
|
---|
200 | filter (z3950proto::filter) gets called at least 7 times, with the
|
---|
201 | 7th being a "QueryFilter" for some reason... */
|
---|
202 | nummatches=0;
|
---|
203 | return NULL; /* shouldn't really return NULL, but nummatches is checked
|
---|
204 | first (I hope) */
|
---|
205 | }
|
---|
206 |
|
---|
207 | // if (titles!=NULL) delete (titles);
|
---|
208 | if (titles!=NULL) return titles;
|
---|
209 | titles=new text_tarray;
|
---|
210 |
|
---|
211 | /* check if connected */
|
---|
212 | if (connected==false)
|
---|
213 | if (connect()==false) {
|
---|
214 | // we could not connect.
|
---|
215 | err=protocolError;
|
---|
216 | return (NULL);
|
---|
217 | }
|
---|
218 |
|
---|
219 |
|
---|
220 | text_t expanded_query=g_EmptyText;
|
---|
221 | parseQuery(query,querytype,fields,expanded_query);
|
---|
222 |
|
---|
223 | char* query_str=expanded_query.getcstr();
|
---|
224 | // following functions defined in yaz_zclient.c
|
---|
225 | *nummatches=z_cmd_dosearch(query_str); // returns # found, -1 on err.
|
---|
226 | delete []query_str;
|
---|
227 | if (*nummatches<=0) {
|
---|
228 | if (*nummatches==0) {
|
---|
229 | // no matches
|
---|
230 | return (NULL);
|
---|
231 | } else if (*nummatches==-1) {
|
---|
232 | // prefix query error
|
---|
233 | err=protocolError;
|
---|
234 | return (NULL);
|
---|
235 | } else if (*nummatches==-2) {
|
---|
236 | // sendsearchRequest not answered by searchResponse
|
---|
237 | err=protocolError;
|
---|
238 | return (NULL);
|
---|
239 | }
|
---|
240 | }
|
---|
241 | // could do a sort eventually, eg on date, title, etc.
|
---|
242 | // (non-existent function) z_sort(field, asc|desc);
|
---|
243 | /* min of (count, first + (*nummatches) ) */
|
---|
244 | // z_getrecordTitles ( first, howmany )
|
---|
245 | c_str_titles=z_getrecordTitles(first,min(count,*nummatches-first+1));
|
---|
246 | if (c_str_titles==NULL) {
|
---|
247 | // an error occurred. we need a logout/err as an arg
|
---|
248 | return (NULL);
|
---|
249 | }
|
---|
250 | if (c_str_titles[0]==0) {
|
---|
251 | // no matches.
|
---|
252 | return (NULL);
|
---|
253 | }
|
---|
254 | last=(int)c_str_titles[0];
|
---|
255 | for (i=1;i<=last;i++) {
|
---|
256 | titles->push_back(c_str_titles[i]);
|
---|
257 | free(c_str_titles[i]);
|
---|
258 | }
|
---|
259 | free(c_str_titles);
|
---|
260 | return (titles);
|
---|
261 |
|
---|
262 | }
|
---|
263 |
|
---|
264 | bool z3950_server::getfullrecord(const text_t &query,
|
---|
265 | const int querytype,
|
---|
266 | const text_t &fields,
|
---|
267 | const int ID,
|
---|
268 | text_t &rettitle,
|
---|
269 | text_t &rettext, comerror_t &err) {
|
---|
270 |
|
---|
271 | static char **c_str_titles=NULL;
|
---|
272 | static char *fulltext=NULL;
|
---|
273 | /* NOTE!!!!!! Because this code currently only works in cgi-bin mode,
|
---|
274 | we only ever do one request. Therefore, it is CURRENTLY OK to store
|
---|
275 | (cache) the retrieved titles, because if this function is ever called
|
---|
276 | more than once, the arguments will be the same each time.
|
---|
277 | (I think :)
|
---|
278 | */
|
---|
279 |
|
---|
280 | gotsinglerecord=true; // well, not yet, but we've been called...
|
---|
281 |
|
---|
282 | if (connected==false) {
|
---|
283 | if (connect()==false) {
|
---|
284 | // error connecting...
|
---|
285 | err=protocolError;
|
---|
286 | return (false);
|
---|
287 | }
|
---|
288 | // since we have just re-connected, we need to do the
|
---|
289 | // query again.
|
---|
290 |
|
---|
291 | text_t expanded_query=g_EmptyText;
|
---|
292 | parseQuery(query,querytype,fields,expanded_query);
|
---|
293 |
|
---|
294 | char* query_str=expanded_query.getcstr();
|
---|
295 | int returned=z_cmd_dosearch(query_str);
|
---|
296 | delete []query_str;
|
---|
297 | if (returned<=0) {
|
---|
298 | // 0 => none.
|
---|
299 | // <0 => error
|
---|
300 | err=protocolError;
|
---|
301 | return (false);
|
---|
302 | }
|
---|
303 | }
|
---|
304 |
|
---|
305 | if (c_str_titles==NULL)
|
---|
306 | c_str_titles=z_getrecordTitles(ID,1); // check this return value.
|
---|
307 |
|
---|
308 | if (rettitle!="unneeded") {
|
---|
309 | //int dummy;
|
---|
310 | if (c_str_titles!=NULL && (int)c_str_titles[0]==1) {
|
---|
311 | rettitle.setcstr(c_str_titles[1]); // and check this
|
---|
312 | ////// free (c_str_titles); - we want to "cache" it
|
---|
313 | } else {
|
---|
314 | // we didn't get something....
|
---|
315 | rettitle="Nothing Returned...";
|
---|
316 | }
|
---|
317 | }
|
---|
318 |
|
---|
319 | if (fulltext==NULL)
|
---|
320 | // get the text
|
---|
321 | fulltext=z_getfullRecord(ID);
|
---|
322 |
|
---|
323 | if (rettext!="unneeded") {
|
---|
324 | rettext.setcstr(fulltext);
|
---|
325 | }
|
---|
326 | return (true);
|
---|
327 | }
|
---|
328 |
|
---|
329 | text_t &z3950_server::getzAbout() {
|
---|
330 | text_t zserverresp;
|
---|
331 |
|
---|
332 | // Assume we have not yet connected, so that must be done here.
|
---|
333 | if (connected==true)
|
---|
334 | return (z_initstr);
|
---|
335 |
|
---|
336 | // we need to create the tcp connection to the target (server)
|
---|
337 | // z_initstr=new text_t;
|
---|
338 |
|
---|
339 | if (connect()==false) {
|
---|
340 | z_initstr.setcstr("<H2>Server offline</H2>Error - could not connect to server <B>");
|
---|
341 | z_initstr += info->host;
|
---|
342 | z_initstr += "</B> on port ";
|
---|
343 | z_initstr += info->port;
|
---|
344 | z_initstr += "\n";
|
---|
345 | return (z_initstr);
|
---|
346 | }
|
---|
347 |
|
---|
348 | // z_initstr currently contains the target's response. We want to
|
---|
349 | // PREPEND the following information.
|
---|
350 | zserverresp=z_initstr;
|
---|
351 | z_initstr="Internet server: <b>";
|
---|
352 | z_initstr+=info->host;
|
---|
353 | z_initstr+="</b> on port ";
|
---|
354 | z_initstr+=info->port;
|
---|
355 | z_initstr+=".<br>\n";
|
---|
356 | z_initstr+=zserverresp;
|
---|
357 |
|
---|
358 | // should close /******* WHAT IF DOING A QUERY!??!?!? ********/
|
---|
359 | // z_cmd_close(0);
|
---|
360 | // connected=false;
|
---|
361 | return (z_initstr);
|
---|
362 | }
|
---|
363 |
|
---|