source: gs3-extensions/solr/trunk/src/perllib/solrserver.pm

Last change on this file was 37786, checked in by kjdon, 10 months ago

added admin_unload_all_cores_for_prefix - when we are rebuilding a collection, we don't know what cores were previously there. eg if had subcollections, and now they have changed/gone. So we just unload all existing cores for that collection

File size: 16.2 KB
Line 
1###########################################################################
2#
3# solrserver.pm -- class for starting and stopping the Solr with the
4# GS3 tomcat server.
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27
28package solrserver;
29
30use strict;
31#no strict 'refs';
32
33use solrutil;
34
35sub new {
36 my $class = shift(@_);
37 my ($build_dir) = @_;
38
39 my $self = { 'build_dir' => $build_dir };
40
41 my $search_path = &solrutil::get_search_path();
42
43 $self->{'server_explicitly_started'} = undef;
44
45 # set SOLR_HOST and SOLR_PORT env vars (tomcat host and port, if not using jetty)
46 # by calling ant get-default-solr-servlet if possible. Else fallback on whatever the existing env vars are.
47 # tomcat host and port would have been set up in the env as SOLR_HOST and SOLR_PORT
48 # In case someone changed the tomcat host/port, we want to update the solr server variables too
49 my $solr_url = &solrutil::get_solr_servlet_url();
50 # get the url parts, though we won't be using most of them
51 my ($protocol, $server_host, $server_port, $servlet_name) = &solrutil::get_solr_url_parts($solr_url);
52
53 # set the solr server env vars to what was discovered, so that any other old perl code
54 # dependent on these env vars will have any changes propagated.
55 # (All perl code referencing these env vars should already be updated, but still...)
56 $ENV{'SOLR_HOST'} = $server_host;
57 $ENV{'SOLR_PORT'} = $server_port;
58
59 $self->{'base-url'} = $solr_url; # e.g. of the form http://localhost:8383/solr
60 $self->{'admin-url'} = "$solr_url/admin/cores";
61
62 return bless $self, $class;
63}
64
65sub get_solr_base_url {
66 my $self = shift (@_);
67 return $self->{'base-url'};
68}
69
70sub _wget_service
71{
72 my $self = shift (@_);
73 my ($output_format,$url,$cgi_get_args) = @_;
74
75 my $full_url = $url;
76
77 $url .= "?$cgi_get_args" if (defined $cgi_get_args);
78
79## print STDERR "\n\n**** _wget_service SOLR WEB URL: $url\n\n";
80
81 # the wget binary is dependent on the gnomelib_env (particularly lib/libiconv2.dylib) being set, particularly on Mac Lion binaries (android too?)
82 &util::set_gnomelib_env(); # this will set the gnomelib env once for each subshell launched, by first checking if GEXTGNOME is not already set
83
84 my $cmd = "wget -O - \"$url\" 2>&1";
85
86 my $preamble_output = "";
87 my $xml_output = "";
88 my $error_output = undef;
89 my $is_error = 0;
90
91 my $in_preamble = ($output_format eq "xml") ? 1 : 0;
92
93## print STDERR "**** wgetcmd = \n $cmd\n";
94
95 if (open(WIN,"$cmd |")) {
96
97 my $line;
98 while (defined ($line=<WIN>)) {
99
100 if ($line =~ m/ERROR \d+:/) {
101 chomp $line;
102 $error_output = $line;
103 $is_error = 1;
104 last;
105 }
106 elsif ($line =~ m/failed: (Connection refused|Bad file descriptor)/ || $line =~ m/failed:/i) {
107 # When the server wasn't running on windows, also got "failed: Bad file descriptor".
108 # But making more robust by adding support for any "failed:..." wget response msg
109 chomp $line;
110 $error_output = $line;
111 last;
112 }
113 elsif ($in_preamble) {
114 if ($line =~ m/<.*>/) {
115 $in_preamble = 0;
116 }
117 else {
118 $preamble_output .= $line;
119 }
120 }
121
122 if (! $in_preamble) {
123 $xml_output .= $line;
124 }
125 }
126 close(WIN);
127
128 }
129 else {
130 $error_output = "Error: failed to run $cmd\n";
131 $error_output .= " $!\n";
132 }
133
134 if(defined $error_output) {
135 if($is_error) {
136 print STDERR "\n\n**** WGET_SERVICE got an error: $error_output\n\n";
137 } else {
138 print STDERR "\n\n**** WGET_SERVICE got: $error_output. (GS3 server likely not running.)\n\n";
139 }
140 }
141
142 my $output = { 'url' => $full_url,
143 'preamble' => $preamble_output,
144 'output' => $xml_output,
145 'error' => $error_output };
146
147 return $output;
148}
149
150
151sub _base_service
152{
153 my $self = shift (@_);
154 my ($cgi_get_args) = @_;
155
156 my $base_url = $self->{'base-url'};
157
158 return $self->_wget_service("html",$base_url,$cgi_get_args);
159}
160
161sub _admin_service
162{
163 my $self = shift (@_);
164 my ($cgi_get_args) = @_;
165
166 my $admin_url = $self->{'admin-url'};
167
168 return $self->_wget_service("xml",$admin_url,$cgi_get_args);
169}
170
171
172sub server_running
173{
174 my $self = shift @_;
175
176 my $output = $self->_base_service();
177
178 my $have_error = defined $output->{'error'};
179
180 my $running = ($have_error) ? 0 : 1;
181
182 return $running;
183}
184
185sub admin_unload_all_cores_for_prefix
186{
187 my $self = shift @_;
188 my ($coreprefix) = @_;
189
190 my $cgi_get_args = "action=STATUS&indexInfo=false";
191 my $output = $self->_admin_service($cgi_get_args);
192
193 if (defined $output->{'error'}) {
194 # severe error, such as failing to connect to the server
195 $self->print_error($output);
196 return;
197 }
198
199 my $xml_output = $output->{'output'};
200
201 my $matching_element="<lst\\s+name=\"($coreprefix-[a-z]+)\">";
202 my @matches = ($xml_output =~ m/$matching_element/g);
203
204 foreach my $core (@matches) {
205 print STDERR "unloading solr core $core\n";
206 $self->admin_unload_core_explicitly_retaining_index($core);
207 }
208}
209
210sub print_error {
211 my $self = shift @_;
212 my ($output) = @_;
213
214 my $url = $output->{'url'};
215 my $preamble = $output->{'preamble'};
216 my $error = $output->{'error'};
217
218 print STDERR "----\n";
219 print STDERR "Error: Failed to get XML response from:\n";
220 print STDERR " $url\n";
221 print STDERR "Output was:\n";
222 print STDERR $preamble if ($preamble ne "");
223 print STDERR "$error\n";
224 print STDERR "----\n";
225}
226
227sub admin_ping_core
228{
229 my $self = shift @_;
230 my ($core) = @_;
231
232 my $cgi_get_args = "action=STATUS&core=$core";
233
234 my $ping_status = 1;
235
236 my $output = $self->_admin_service($cgi_get_args);
237
238 if (defined $output->{'error'}) {
239 # severe error, such as failing to connect to the server
240 $ping_status = 0;
241 $self->print_error($output);
242
243 }
244 else {
245
246 # If the collection doesn't exist yet, then there will be
247 # an empty element of the form:
248 # <lst name="collect-doc"/>
249 # where 'collect' is the actual name of the collection,
250 # such as demo
251
252 my $xml_output = $output->{'output'};
253
254 my $empty_element="<lst\\s+name=\"$core\"\\s*\\/>";
255
256 $ping_status = !($xml_output =~ m/$empty_element/s);
257 }
258
259 return $ping_status;
260}
261
262sub filtered_copy
263{
264 my $self = shift @_;
265
266 my $src_file = shift @_;
267 my $dst_file = shift @_;
268 my $re_substitutions = shift @_;
269
270 # $re_substitutions is a hashmap of the form: [re_key] => subst_str
271
272 my $content = "";
273
274 if (open(FIN,'<:utf8',$src_file)) {
275
276 my $line;
277 while (defined($line=<FIN>)) {
278 $content .= $line;
279 }
280 }
281
282 close(FIN);
283
284 # perform RE string substitutions
285 foreach my $re_key (keys %$re_substitutions) {
286
287 my $subst_str = $re_substitutions->{$re_key};
288
289 # Perform substitution of the form:
290 # $content =~ s/$re_key/$subst_str/g;
291 # but allow allow separator char (default '/')
292 # and flags (default 'g') to be parameterized
293
294 $content =~ s/$re_key/$subst_str/g;
295 }
296
297 if (open(FOUT, '>:utf8', $dst_file)) {
298 print FOUT $content;
299 close(FOUT);
300 }
301 else {
302 print STDERR "Error: Failed to open file '$dst_file' for writing.\n$!\n";
303 }
304}
305
306sub solr_xml_to_solr_xml_in
307{
308 my $self = shift @_;
309 my ($solr_xml_dir) = @_;
310
311 my $gsdl3home = $ENV{'GSDL3HOME'};
312
313 if (!defined $solr_xml_dir || !-d $solr_xml_dir) {
314 # if not passed in, use stored solr_live_home
315 $solr_xml_dir = $self->{'solr_live_home'};
316 }
317
318 my $solrxml_in = &util::filename_cat($solr_xml_dir, "solr.xml.in");
319 my $solrxml = &util::filename_cat($solr_xml_dir, "solr.xml");
320
321 my $gsdl3home_re = &util::filename_to_regex($gsdl3home);
322
323 my $replacement_map = { qr/$gsdl3home_re/ => "\@gsdl3home\@" };
324
325 $self->filtered_copy($solrxml,$solrxml_in,$replacement_map);
326}
327
328
329sub solr_xml_in_to_solr_xml
330{
331 my $self = shift @_;
332 my ($solr_xml_dir) = @_;
333
334 my $gsdl3home = $ENV{'GSDL3HOME'};
335 if (!defined $solr_xml_dir || !-d $solr_xml_dir) {
336 # if not passed in, use stored solr home
337 $solr_xml_dir = $self->{'solr_live_home'};
338 }
339 my $solrxml_in = &util::filename_cat($solr_xml_dir, "solr.xml.in");
340 my $solrxml = &util::filename_cat($solr_xml_dir, "solr.xml");
341
342 my $gsdl3home_re = &util::filename_to_regex($gsdl3home);
343
344 my $replacement_map = { qr/\@gsdl3home\@/ => $gsdl3home_re };
345
346 $self->filtered_copy($solrxml_in,$solrxml,$replacement_map);
347}
348
349
350# Some of the Solr CoreAdmin API calls available.
351# See http://wiki.apache.org/solr/CoreAdmin
352sub admin_reload_core
353{
354 my $self = shift @_;
355 my ($core) = @_;
356
357 my $cgi_get_args = "action=RELOAD&core=$core";
358
359 $self->_admin_service($cgi_get_args);
360
361}
362
363sub admin_rename_core
364{
365 my $self = shift @_;
366 my ($oldcore, $newcore) = @_;
367
368 my $cgi_get_args = "action=RENAME&core=$oldcore&other=$newcore";
369
370 $self->_admin_service($cgi_get_args);
371
372}
373
374sub admin_swap_core
375{
376 my $self = shift @_;
377 my ($oldcore, $newcore) = @_;
378
379 my $cgi_get_args = "action=SWAP&core=$oldcore&other=$newcore";
380
381 $self->_admin_service($cgi_get_args);
382
383}
384
385# The ALIAS action is not supported in our version of solr (despite it
386# being marked as experimental in the documentation for Core Admin)
387sub admin_alias_core
388{
389 my $self = shift @_;
390 my ($oldcore, $newcore) = @_;
391
392 my $cgi_get_args = "action=ALIAS&core=$oldcore&other=$newcore";
393
394 $self->_admin_service($cgi_get_args);
395
396}
397
398sub admin_create_core
399{
400 my $self = shift @_;
401 my ($core, $data_parent_dir) = @_; # data_parent_dir is optional, can be index_dir. Defaults to builddir if not provided
402
403 my ($ds_idx) = ($core =~ m/^.*-(.*?)$/);
404
405 my $cgi_get_args = "action=CREATE&name=$core";
406
407 my $collect_home = $ENV{'GSDLCOLLECTDIR'};
408 my $etc_dirname = &util::filename_cat($collect_home,"etc");
409
410 if(!defined $data_parent_dir) {
411 $data_parent_dir = $self->{'build_dir'};
412 }
413
414 my $idx_dirname = &util::filename_cat($data_parent_dir,$ds_idx); # "dataDir"
415
416 $cgi_get_args .= "&instanceDir=$etc_dirname";
417 $cgi_get_args .= "&dataDir=$idx_dirname";
418
419 $self->_admin_service($cgi_get_args);
420
421}
422
423# removes (unloads) core from the ext/solr/sorl.xml config file
424sub admin_unload_core
425{
426 my $self = shift @_;
427 my ($core, $delete) = @_;
428
429 my $cgi_get_args = "action=UNLOAD&core=$core";
430 # &deleteIndex=true available from Solr3.3, see https://wiki.apache.org/solr/CoreAdmin.
431 # Also available since later Solr versions: deleteDataDir and deleteInstanceDir
432 if(defined $delete && $delete == 1) {
433 $cgi_get_args = $cgi_get_args."&deleteIndex=true";
434 }
435
436 $self->_admin_service($cgi_get_args);
437
438}
439
440sub admin_unload_core_explicitly_retaining_index
441{
442 # For UNLOAD core params, see page 315 of
443 # https://archive.apache.org/dist/lucene/solr/ref-guide/apache-solr-ref-guide-4.7.pdf
444
445 my $self = shift @_;
446 my ($core) = @_;
447
448 # Don't delete index (sidx/didx folder) along with unloading core, so force 0 as parameter
449 # (though not deleting the index is the default behaviour of admin_unload_core() anyway,
450 # since activate is meant to manually take care of deleting the index folder and moving the
451 # building folder to replace index, instead of activate asking unload_core to delete the
452 # index folder).
453 # But this function's very particular behaviour may be crucial for other instances such as
454 # its use in solrbuilder::post_build_indexes(), so even if admin_unload_core() could
455 # conceivably be changed to delete the index by default, this method would still do the
456 # right thing when called by solrbuilder::post_build_indexes().
457 $self->admin_unload_core($core, 0);
458}
459
460
461sub start
462{
463 my $self = shift @_;
464 my ($verbosity) = @_;
465
466 $verbosity = 1 unless defined $verbosity;
467
468 my $solr_live_home = &util::filename_cat($ENV{'GSDL3HOME'}, "ext", "solr");
469 $self->{'solr_live_home'} = $solr_live_home; # will be used later to generate solr.xml.in from solr.xml and vice-versa
470 my $server_port = $ENV{'SOLR_PORT'};
471 my $server_host = $ENV{'SOLR_HOST'};
472
473 chdir($ENV{'GSDL3SRCHOME'});
474
475 my $server_java_cmd = "ant start";
476
477 my $server_status = "unknown";
478 if ($self->server_running()) {
479
480 $server_status = "already-running";
481 ## print STDERR "@@@@ server already running\n\n";
482 }
483 elsif (open(STARTIN,"$server_java_cmd 2>&1 |")) {
484
485 ## print STDERR "@@@@ need to start tomcat\n\n";
486 print STDERR "**** starting up tomcat server with cmd start =\n $server_java_cmd\n" if ($verbosity > 1);
487
488 my $line;
489 while (defined($line=<STARTIN>)) {
490
491 #if ($line =~ m/^(BUILD FAILED)/) {
492 print "Tomcat startup: $line";
493 #}
494 if ($line =~ m/^BUILD SUCCESSFUL/) {
495 last;
496 }
497 }
498
499 close(STARTIN);
500
501 if ($self->server_running()) {
502 $server_status = "explicitly-started";
503 #print STDERR "\n*** Tomcat server has started up now.\n\n";
504 } else {
505 $server_status = "failed-to-start"; # no need to set this, will be exiting below anyway
506
507 print STDERR "Error: failed to start greenstone tomcat server\n";
508 print STDERR "$!\n";
509 print STDERR "Command attempted was:\n";
510 print STDERR " $server_java_cmd\n";
511 print STDERR "run from directory:\n";
512 print STDERR " $ENV{'GSDL3SRCHOME'}\n";
513 print STDERR "----\n";
514
515 exit -1;
516 }
517 }
518 else {
519 ## print STDERR "@@@@ failed to start tomcat\n\n";
520 $server_status = "failed-to-start"; # no need to set this, will be exiting below anyway
521
522 print STDERR "Error: unable to start greenstone tomcat server\n";
523 print STDERR "$!\n";
524 print STDERR "Command attempted was:\n";
525 print STDERR " $server_java_cmd\n";
526 print STDERR "run from directory:\n";
527 print STDERR " $ENV{'GSDL3SRCHOME'}\n";
528 print STDERR "----\n";
529
530 exit -1;
531 }
532
533 if ($server_status eq "explicitly-started") {
534 $self->{'server_explicitly_started'} = 1;
535 print "Tomcat server ready and listening for connections at ";
536 print " $server_host:$server_port\n";
537
538 # now we know the server is ready to accept connections
539 }
540 elsif ($server_status eq "already-running") {
541 print STDERR "Using existing tomcat server detected at $server_host:$server_port\n";
542 $self->{'server_explicitly_started'} = 0;
543 }
544 elsif ($server_status eq "failed-to-start") {
545 print STDERR "Started Solr/Tomcat web server at $server_host:$server_port";
546 print STDERR ", but encountered an initialization error\n";
547 exit -1;
548 }
549
550}
551
552sub explicitly_started
553{
554 my $self = shift @_;
555
556 return $self->{'server_explicitly_started'};
557}
558
559sub stop
560{
561 my $self = shift @_;
562 my ($options) = @_;
563
564 my $solr_home = $ENV{'GEXT_SOLR'};
565
566 chdir($ENV{'GSDL3SRCHOME'});
567
568 # defaults
569 my $do_wait = 1;
570 my $output_verbosity = 1;
571
572 if (defined $options) {
573 if (defined $options->{'do_wait'}) {
574 $do_wait = $options->{'do_wait'};
575 }
576 if (defined $options->{'output_verbosity'}) {
577 $output_verbosity = $options->{'output_verbosity'};
578 }
579 }
580
581 my $server_java_cmd = "ant stop";
582
583 print STDERR "**** java server stop cmd:\n $server_java_cmd\n" if ($output_verbosity>1);
584
585 if (open(STOPIN,"$server_java_cmd 2>&1 |")) {
586
587 my $line;
588 while (defined($line=<STOPIN>)) {
589 print "Tomcat shutdown: $line" if ($output_verbosity>1);
590 }
591 close(STOPIN);
592
593 if ($do_wait) {
594 wait(); # let the child process finish
595 }
596
597 if ($output_verbosity>0) {
598 print "Tomcat server shutdown\n";
599 }
600 }
601 else {
602 print STDERR "Error: failed to stop tomcat-server\n";
603 print STDERR "$!\n";
604 print STDERR "Command attempted was:\n";
605 print STDERR " $server_java_cmd\n";
606 print STDERR "run from directory:\n";
607 print STDERR " $solr_home\n";
608 print STDERR "----\n";
609
610 exit -2;
611 }
612}
613
614
615
6161;
Note: See TracBrowser for help on using the repository browser.