source: gs3-extensions/solr/trunk/src/perllib/solrserver.pm@ 29711

Last change on this file since 29711 was 29711, checked in by ak19, 9 years ago

Moving from using the solr jetty server to solr using the GS3 tomcat server. Now localhost:8383/solr hosts the solr server RESTful pages. Changes: 1. Minor changes to GS3 build.xml. 2. GLI no longer does the temporary stopping of the GS3 server, launching jetty server for building a solr collection, stopping jetty, restarting GS3 tomcat server. GLI leaves the GS3 server running. 3. The main changes are to ext/solr. The ext/solr/gs3-setup.sh sets the new SOLR_PORT and SOLR_HOST variables read from the GS3 build.properties, as the jetty port and host variables are no longer used. ext/solr/build.xml now puts the solr war file into tomcat's webapps, as well as helper libraries necessary (xalan related); a solr.xml context file is created from a template file and placed into tomcat's conf/Catalina/localhost; additional solr jar files are copied into tomcat/lib, as well as the slf4j bridge being copied into GS3/web/WEB-INF/lib; the solr perl code has been changed to use the new RESTful URLs and particularly to work with solr running off the GS3 tomcat server, or stop and start it as required, rather than working with (or stopping and starting) the solr jetty server. A new run_solr_server.pl executable script runs the tomcat server rather than the jetty server; major changes to the Java Solr code to no longer work with the EmbeddedSolrServer (which caused a conflict when the index is accessed by solr jetty server upon rebuild of solr collections), our solr Java code now uses HttpSolrServer to contact the solr servlet running off tomcat. 5. Still a bug: when search results go over a page after rebuilding a solr collection in GLI against a running GS3 server, the 2nd page of search results aren't present and things break. But if the server is not running, solr collections rebuild fine, so the changes do everything that GS3.06 did and more.

File size: 13.5 KB
Line 
1###########################################################################
2#
3# solrserver.pm -- class for starting and stopping the Solr with the
4# GS3 tomcat server.
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27
28package solrserver;
29
30use strict;
31#no strict 'refs';
32
33use solrutil;
34
35sub new {
36 my $class = shift(@_);
37 my ($build_dir) = @_;
38
39 my $self = { 'build_dir' => $build_dir };
40
41 my $search_path = &solrutil::get_search_path();
42
43 $self->{'server_explicitly_started'} = undef;
44
45 my $server_port = $ENV{'SOLR_PORT'};
46 my $server_host = $ENV{'SOLR_HOST'};
47 my $base_url = "http://$server_host:$server_port/solr/";
48 my $admin_url = "http://$server_host:$server_port/solr/admin/cores";
49
50 $self->{'base-url'} = $base_url;
51 $self->{'admin-url'} = $admin_url;
52
53 return bless $self, $class;
54}
55
56sub _wget_service
57{
58 my $self = shift (@_);
59 my ($output_format,$url,$cgi_get_args) = @_;
60
61 my $full_url = $url;
62
63 $url .= "?$cgi_get_args" if (defined $cgi_get_args);
64
65 print STDERR "\n\n**** _wget_service SOLR WEB URL: $url\n\n";
66
67 # the wget binary is dependent on the gnomelib_env (particularly lib/libiconv2.dylib) being set, particularly on Mac Lion binaries (android too?)
68 &util::set_gnomelib_env(); # this will set the gnomelib env once for each subshell launched, by first checking if GEXTGNOME is not already set
69
70 my $cmd = "wget -O - \"$url\" 2>&1";
71
72 my $preamble_output = "";
73 my $xml_output = "";
74 my $error_output = undef;
75
76 my $in_preamble = ($output_format eq "xml") ? 1 : 0;
77
78## print STDERR "**** wgetcmd = \n $cmd\n";
79
80 if (open(WIN,"$cmd |")) {
81
82 my $line;
83 while (defined ($line=<WIN>)) {
84
85 if ($line =~ m/ERROR \d+:/) {
86 chomp $line;
87 $error_output = $line;
88 last;
89 }
90 elsif ($line =~ m/failed: Connection refused/) {
91 chomp $line;
92 $error_output = $line;
93 last;
94 }
95 elsif ($in_preamble) {
96 if ($line =~ m/<.*>/) {
97 $in_preamble = 0;
98 }
99 else {
100 $preamble_output .= $line;
101 }
102 }
103
104 if (! $in_preamble) {
105 $xml_output .= $line;
106 }
107 }
108 close(WIN);
109
110 }
111 else {
112 $error_output = "Error: failed to run $cmd\n";
113 $error_output .= " $!\n";
114 }
115
116 if(defined $error_output) {
117 print STDERR "\n\n**** WGET_SERVICE got an error: $error_output\n\n";
118 }
119
120 my $output = { 'url' => $full_url,
121 'preamble' => $preamble_output,
122 'output' => $xml_output,
123 'error' => $error_output };
124
125 return $output;
126}
127
128
129sub _base_service
130{
131 my $self = shift (@_);
132 my ($cgi_get_args) = @_;
133
134 my $base_url = $self->{'base-url'};
135
136 return $self->_wget_service("html",$base_url,$cgi_get_args);
137}
138
139sub _admin_service
140{
141 my $self = shift (@_);
142 my ($cgi_get_args) = @_;
143
144 my $admin_url = $self->{'admin-url'};
145
146 return $self->_wget_service("xml",$admin_url,$cgi_get_args);
147}
148
149
150sub server_running
151{
152 my $self = shift @_;
153
154 my $output = $self->_base_service();
155
156 my $have_error = defined $output->{'error'};
157
158 my $running = ($have_error) ? 0 : 1;
159
160 return $running;
161}
162
163
164sub admin_ping_core
165{
166 my $self = shift @_;
167 my ($core) = @_;
168
169 my $cgi_get_args = "action=STATUS&core=$core";
170
171 my $ping_status = 1;
172
173 my $output = $self->_admin_service($cgi_get_args);
174
175 if (defined $output->{'error'}) {
176 # severe error, such as failing to connect to the server
177 $ping_status = 0;
178
179 my $url = $output->{'url'};
180 my $preamble = $output->{'preamble'};
181 my $error = $output->{'error'};
182
183 print STDERR "----\n";
184 print STDERR "Error: Failed to get XML response from:\n";
185 print STDERR " $url\n";
186 print STDERR "Output was:\n";
187 print STDERR $preamble if ($preamble ne "");
188 print STDERR "$error\n";
189 print STDERR "----\n";
190 }
191 else {
192
193 # If the collection doesn't exist yet, then there will be
194 # an empty element of the form:
195 # <lst name="collect-doc"/>
196 # where 'collect' is the actual name of the collection,
197 # such as demo
198
199 my $xml_output = $output->{'output'};
200
201 my $empty_element="<lst\\s+name=\"$core\"\\s*\\/>";
202
203 $ping_status = !($xml_output =~ m/$empty_element/s);
204 }
205
206 return $ping_status;
207}
208
209sub filtered_copy
210{
211 my $self = shift @_;
212
213 my $src_file = shift @_;
214 my $dst_file = shift @_;
215 my $re_substitutions = shift @_;
216
217 # $re_substitutions is a hashmap of the form: [re_key] => subst_str
218
219 my $content = "";
220
221 if (open(FIN,'<:utf8',$src_file)) {
222
223 my $line;
224 while (defined($line=<FIN>)) {
225 $content .= $line;
226 }
227 }
228
229 close(FIN);
230
231 # perform RE string substitutions
232 foreach my $re_key (keys %$re_substitutions) {
233
234 my $subst_str = $re_substitutions->{$re_key};
235
236 # Perform substitution of the form:
237 # $content =~ s/$re_key/$subst_str/g;
238 # but allow allow separator char (default '/')
239 # and flags (default 'g') to be parameterized
240
241 $content =~ s/$re_key/$subst_str/g;
242 }
243
244 if (open(FOUT, '>:utf8', $dst_file)) {
245 print FOUT $content;
246 close(FOUT);
247 }
248 else {
249 print STDERR "Error: Failed to open file '$dst_file' for writing.\n$!\n";
250 }
251}
252
253sub solr_xml_to_solr_xml_in
254{
255 my $self = shift @_;
256 my ($solr_xml_dir) = @_;
257
258 my $gsdl3home = $ENV{'GSDL3HOME'};
259
260 if (!defined $solr_xml_dir || !-d $solr_xml_dir) {
261 # if not passed in, use stored solr_live_home
262 $solr_xml_dir = $self->{'solr_live_home'};
263 }
264
265 my $solrxml_in = &util::filename_cat($solr_xml_dir, "solr.xml.in");
266 my $solrxml = &util::filename_cat($solr_xml_dir, "solr.xml");
267
268 my $gsdl3home_re = &util::filename_to_regex($gsdl3home);
269
270 my $replacement_map = { qr/$gsdl3home_re/ => "\@gsdl3home\@" };
271
272 $self->filtered_copy($solrxml,$solrxml_in,$replacement_map);
273}
274
275
276sub solr_xml_in_to_solr_xml
277{
278 my $self = shift @_;
279 my ($solr_xml_dir) = @_;
280
281 my $gsdl3home = $ENV{'GSDL3HOME'};
282 if (!defined $solr_xml_dir || !-d $solr_xml_dir) {
283 # if not passed in, use stored solr home
284 $solr_xml_dir = $self->{'solr_live_home'};
285 }
286 my $solrxml_in = &util::filename_cat($solr_xml_dir, "solr.xml.in");
287 my $solrxml = &util::filename_cat($solr_xml_dir, "solr.xml");
288
289 my $gsdl3home_re = &util::filename_to_regex($gsdl3home);
290
291 my $replacement_map = { qr/\@gsdl3home\@/ => $gsdl3home_re };
292
293 $self->filtered_copy($solrxml_in,$solrxml,$replacement_map);
294}
295
296
297# Some of the Solr CoreAdmin API calls available.
298# See http://wiki.apache.org/solr/CoreAdmin
299sub admin_reload_core
300{
301 my $self = shift @_;
302 my ($core) = @_;
303
304 my $cgi_get_args = "action=RELOAD&core=$core";
305
306 $self->_admin_service($cgi_get_args);
307
308}
309
310sub admin_rename_core
311{
312 my $self = shift @_;
313 my ($oldcore, $newcore) = @_;
314
315 my $cgi_get_args = "action=RENAME&core=$oldcore&other=$newcore";
316
317 $self->_admin_service($cgi_get_args);
318
319}
320
321sub admin_swap_core
322{
323 my $self = shift @_;
324 my ($oldcore, $newcore) = @_;
325
326 my $cgi_get_args = "action=SWAP&core=$oldcore&other=$newcore";
327
328 $self->_admin_service($cgi_get_args);
329
330}
331
332# The ALIAS action is not supported in our version of solr (despite it
333# being marked as experimental in the documentation for Core Admin)
334sub admin_alias_core
335{
336 my $self = shift @_;
337 my ($oldcore, $newcore) = @_;
338
339 my $cgi_get_args = "action=ALIAS&core=$oldcore&other=$newcore";
340
341 $self->_admin_service($cgi_get_args);
342
343}
344
345sub admin_create_core
346{
347 my $self = shift @_;
348 my ($core, $data_parent_dir) = @_; # data_parent_dir is optional, can be index_dir. Defaults to builddir if not provided
349
350 my ($ds_idx) = ($core =~ m/^.*-(.*?)$/);
351
352 my $cgi_get_args = "action=CREATE&name=$core";
353
354 my $collect_home = $ENV{'GSDLCOLLECTDIR'};
355 my $etc_dirname = &util::filename_cat($collect_home,"etc");
356
357 if(!defined $data_parent_dir) {
358 $data_parent_dir = $self->{'build_dir'};
359 }
360
361 my $idx_dirname = &util::filename_cat($data_parent_dir,$ds_idx); # "dataDir"
362
363 $cgi_get_args .= "&instanceDir=$etc_dirname";
364 $cgi_get_args .= "&dataDir=$idx_dirname";
365
366 $self->_admin_service($cgi_get_args);
367
368}
369
370# removes (unloads) core from the ext/solr/sorl.xml config file
371sub admin_unload_core
372{
373 my $self = shift @_;
374 my ($core, $delete) = @_;
375
376 my $cgi_get_args = "action=UNLOAD&core=$core";
377 # &deleteIndex=true available from Solr3.3, see https://wiki.apache.org/solr/CoreAdmin.
378 # Also available since later Solr versions: deleteDataDir and deleteInstanceDir
379 if(defined $delete && $delete == 1) {
380 $cgi_get_args = $cgi_get_args."&deleteIndex=true";
381 }
382
383 $self->_admin_service($cgi_get_args);
384
385}
386
387sub start
388{
389 my $self = shift @_;
390 my ($verbosity) = @_;
391
392 $verbosity = 1 unless defined $verbosity;
393
394 my $solr_live_home = &util::filename_cat($ENV{'GSDL3HOME'}, "ext", "solr");
395 $self->{'solr_live_home'} = $solr_live_home; # will be used later to generate solr.xml.in from solr.xml and vice-versa
396 my $server_port = $ENV{'SOLR_PORT'};
397 my $server_host = $ENV{'SOLR_HOST'};
398
399 chdir($ENV{'GSDL3SRCHOME'});
400
401 my $server_java_cmd = "ant start";
402
403 my $server_status = "unknown";
404
405 if ($self->server_running()) {
406 $server_status = "already-running";
407 print STDERR "@@@@ server already running\n\n";
408 }
409 elsif (open(STARTIN,"$server_java_cmd 2>&1 |")) {
410
411 print STDERR "@@@@ need to start tomcat\n\n";
412 print STDERR "**** starting up tomcat server with cmd start =\n $server_java_cmd\n" if ($verbosity > 1);
413
414 my $line;
415 while (defined($line=<STARTIN>)) {
416
417 #if ($line =~ m/^(BUILD FAILED/) {
418 print "Tomcat startup: $line";
419 #}
420 }
421 if ($self->server_running()) {
422 $server_status = "explicitly-started";
423 #print STDERR "\n*** Tomcat server has started up now.\n\n";
424 } else {
425 $server_status = "failed-to-start"; # no need to set this, will be exiting below anyway
426
427 print STDERR "Error: failed to start greenstone tomcat server\n";
428 print STDERR "$!\n";
429 print STDERR "Command attempted was:\n";
430 print STDERR " $server_java_cmd\n";
431 print STDERR "run from directory:\n";
432 print STDERR " $ENV{'GSDL3SRCHOME'}\n";
433 print STDERR "----\n";
434
435 exit -1;
436 }
437 }
438 else {
439 print STDERR "@@@@ failed to start tomcat\n\n";
440 $server_status = "failed-to-start"; # no need to set this, will be exiting below anyway
441
442 print STDERR "Error: unable to start greenstone tomcat server\n";
443 print STDERR "$!\n";
444 print STDERR "Command attempted was:\n";
445 print STDERR " $server_java_cmd\n";
446 print STDERR "run from directory:\n";
447 print STDERR " $ENV{'GSDL3SRCHOME'}\n";
448 print STDERR "----\n";
449
450 exit -1;
451 }
452
453 if ($server_status eq "explicitly-started") {
454 $self->{'server_explicitly_started'} = 1;
455 print "Tomcat server ready and listening for connections at ";
456 print " $server_host:$server_port\n";
457
458 # now we know the server is ready to accept connections, fork a
459 # child process that continues to listen to the output and
460 # prints out any lines that are not INFO lines
461
462 if (fork()==0) {
463 # child process
464
465 my $line;
466 while (defined ($line = <STARTIN>)) {
467
468 # if here, then some non-trival message has been logged
469 print "Tomcat/Solr processing: $line";
470 }
471 close(STARTIN);
472
473 # And now stop nicely
474 exit 0;
475 }
476 # otherwise let the parent continue on
477 }
478 elsif ($server_status eq "already-running") {
479 print STDERR "Using existing tomcat server detected at $server_host:$server_port\n";
480 $self->{'server_explicitly_started'} = 0;
481 }
482 elsif ($server_status eq "failed-to-start") {
483 print STDERR "Started Solr/Tomcat web server at $server_host:$server_port";
484 print STDERR ", but encountered an initialization error\n";
485 exit -1;
486 }
487
488}
489
490sub explicitly_started
491{
492 my $self = shift @_;
493
494 return $self->{'server_explicitly_started'};
495}
496
497sub stop
498{
499 my $self = shift @_;
500 my ($options) = @_;
501
502 my $solr_home = $ENV{'GEXT_SOLR'};
503
504 chdir($ENV{'GSDL3SRCHOME'});
505
506 # defaults
507 my $do_wait = 1;
508 my $output_verbosity = 1;
509
510 if (defined $options) {
511 if (defined $options->{'do_wait'}) {
512 $do_wait = $options->{'do_wait'};
513 }
514 if (defined $options->{'output_verbosity'}) {
515 $output_verbosity = $options->{'output_verbosity'};
516 }
517 }
518
519 my $server_java_cmd = "ant stop";
520
521 print STDERR "**** java server stop cmd:\n $server_java_cmd\n" if ($output_verbosity>1);
522
523 if (open(STOPIN,"$server_java_cmd 2>&1 |")) {
524
525 my $line;
526 while (defined($line=<STOPIN>)) {
527 print "@@@@ Tomcat shutdown: $line"; #if ($output_verbosity>1);
528 }
529 close(STOPIN);
530
531 if ($do_wait) {
532 wait(); # let the child process finish
533 }
534
535 if ($output_verbosity>0) {
536 print "@@@@@ Tomcat server shutdown\n";
537 }
538 }
539 else {
540 print STDERR "Error: failed to stop tomcat-server\n";
541 print STDERR "$!\n";
542 print STDERR "Command attempted was:\n";
543 print STDERR " $server_java_cmd\n";
544 print STDERR "run from directory:\n";
545 print STDERR " $solr_home\n";
546 print STDERR "----\n";
547
548 exit -2;
549 }
550}
551
552
553
5541;
Note: See TracBrowser for help on using the repository browser.