source: gs3-extensions/solr/trunk/src/perllib/solrserver.pm@ 33392

Last change on this file since 33392 was 33392, checked in by ak19, 5 years ago

Kathy found a problem whereby she wanted to run consecutive buildcols without activate on a solr collection. She experienced file locking issues on Windows, which the original solr related building code would inevitably cause without activate. Dr Bainbridge's solution was to change our way of thinking about what activate and buildcol should now be doing as regards solr collections. The solution was to unload building-cores for indexes at the end of buildcol, instead of only doing this during activate.pl. I've tried to be conservative with the changes made to the existing code, so that activate still attempts to also unload building-cores, but first pings them (and any other cores it attempts to unload) to ensure the cores exist. During buildcol too, the building-cores are pinged to check they exist before we attempt to unload them.

File size: 15.4 KB
Line 
1###########################################################################
2#
3# solrserver.pm -- class for starting and stopping the Solr with the
4# GS3 tomcat server.
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27
28package solrserver;
29
30use strict;
31#no strict 'refs';
32
33use solrutil;
34
35sub new {
36 my $class = shift(@_);
37 my ($build_dir) = @_;
38
39 my $self = { 'build_dir' => $build_dir };
40
41 my $search_path = &solrutil::get_search_path();
42
43 $self->{'server_explicitly_started'} = undef;
44
45 # set SOLR_HOST and SOLR_PORT env vars (tomcat host and port, if not using jetty)
46 # by calling ant get-default-solr-servlet if possible. Else fallback on whatever the existing env vars are.
47 # tomcat host and port would have been set up in the env as SOLR_HOST and SOLR_PORT
48 # In case someone changed the tomcat host/port, we want to update the solr server variables too
49 my $solr_url = &solrutil::get_solr_servlet_url();
50 # get the url parts, though we won't be using most of them
51 my ($protocol, $server_host, $server_port, $servlet_name) = &solrutil::get_solr_url_parts($solr_url);
52
53 # set the solr server env vars to what was discovered, so that any other old perl code
54 # dependent on these env vars will have any changes propagated.
55 # (All perl code referencing these env vars should already be updated, but still...)
56 $ENV{'SOLR_HOST'} = $server_host;
57 $ENV{'SOLR_PORT'} = $server_port;
58
59 $self->{'base-url'} = $solr_url; # e.g. of the form http://localhost:8383/solr
60 $self->{'admin-url'} = "$solr_url/admin/cores";
61
62 return bless $self, $class;
63}
64
65sub get_solr_base_url {
66 my $self = shift (@_);
67 return $self->{'base-url'};
68}
69
70sub _wget_service
71{
72 my $self = shift (@_);
73 my ($output_format,$url,$cgi_get_args) = @_;
74
75 my $full_url = $url;
76
77 $url .= "?$cgi_get_args" if (defined $cgi_get_args);
78
79 print STDERR "\n\n**** _wget_service SOLR WEB URL: $url\n\n";
80
81 # the wget binary is dependent on the gnomelib_env (particularly lib/libiconv2.dylib) being set, particularly on Mac Lion binaries (android too?)
82 &util::set_gnomelib_env(); # this will set the gnomelib env once for each subshell launched, by first checking if GEXTGNOME is not already set
83
84 my $cmd = "wget -O - \"$url\" 2>&1";
85
86 my $preamble_output = "";
87 my $xml_output = "";
88 my $error_output = undef;
89 my $is_error = 0;
90
91 my $in_preamble = ($output_format eq "xml") ? 1 : 0;
92
93## print STDERR "**** wgetcmd = \n $cmd\n";
94
95 if (open(WIN,"$cmd |")) {
96
97 my $line;
98 while (defined ($line=<WIN>)) {
99
100 if ($line =~ m/ERROR \d+:/) {
101 chomp $line;
102 $error_output = $line;
103 $is_error = 1;
104 last;
105 }
106 elsif ($line =~ m/failed: (Connection refused|Bad file descriptor)/ || $line =~ m/failed:/i) {
107 # When the server wasn't running on windows, also got "failed: Bad file descriptor".
108 # But making more robust by adding support for any "failed:..." wget response msg
109 chomp $line;
110 $error_output = $line;
111 last;
112 }
113 elsif ($in_preamble) {
114 if ($line =~ m/<.*>/) {
115 $in_preamble = 0;
116 }
117 else {
118 $preamble_output .= $line;
119 }
120 }
121
122 if (! $in_preamble) {
123 $xml_output .= $line;
124 }
125 }
126 close(WIN);
127
128 }
129 else {
130 $error_output = "Error: failed to run $cmd\n";
131 $error_output .= " $!\n";
132 }
133
134 if(defined $error_output) {
135 if($is_error) {
136 print STDERR "\n\n**** WGET_SERVICE got an error: $error_output\n\n";
137 } else {
138 print STDERR "\n\n**** WGET_SERVICE got: $error_output. (GS3 server likely not running.)\n\n";
139 }
140 }
141
142 my $output = { 'url' => $full_url,
143 'preamble' => $preamble_output,
144 'output' => $xml_output,
145 'error' => $error_output };
146
147 return $output;
148}
149
150
151sub _base_service
152{
153 my $self = shift (@_);
154 my ($cgi_get_args) = @_;
155
156 my $base_url = $self->{'base-url'};
157
158 return $self->_wget_service("html",$base_url,$cgi_get_args);
159}
160
161sub _admin_service
162{
163 my $self = shift (@_);
164 my ($cgi_get_args) = @_;
165
166 my $admin_url = $self->{'admin-url'};
167
168 return $self->_wget_service("xml",$admin_url,$cgi_get_args);
169}
170
171
172sub server_running
173{
174 my $self = shift @_;
175
176 my $output = $self->_base_service();
177
178 my $have_error = defined $output->{'error'};
179
180 my $running = ($have_error) ? 0 : 1;
181
182 return $running;
183}
184
185
186sub admin_ping_core
187{
188 my $self = shift @_;
189 my ($core) = @_;
190
191 my $cgi_get_args = "action=STATUS&core=$core";
192
193 my $ping_status = 1;
194
195 my $output = $self->_admin_service($cgi_get_args);
196
197 if (defined $output->{'error'}) {
198 # severe error, such as failing to connect to the server
199 $ping_status = 0;
200
201 my $url = $output->{'url'};
202 my $preamble = $output->{'preamble'};
203 my $error = $output->{'error'};
204
205 print STDERR "----\n";
206 print STDERR "Error: Failed to get XML response from:\n";
207 print STDERR " $url\n";
208 print STDERR "Output was:\n";
209 print STDERR $preamble if ($preamble ne "");
210 print STDERR "$error\n";
211 print STDERR "----\n";
212 }
213 else {
214
215 # If the collection doesn't exist yet, then there will be
216 # an empty element of the form:
217 # <lst name="collect-doc"/>
218 # where 'collect' is the actual name of the collection,
219 # such as demo
220
221 my $xml_output = $output->{'output'};
222
223 my $empty_element="<lst\\s+name=\"$core\"\\s*\\/>";
224
225 $ping_status = !($xml_output =~ m/$empty_element/s);
226 }
227
228 return $ping_status;
229}
230
231sub filtered_copy
232{
233 my $self = shift @_;
234
235 my $src_file = shift @_;
236 my $dst_file = shift @_;
237 my $re_substitutions = shift @_;
238
239 # $re_substitutions is a hashmap of the form: [re_key] => subst_str
240
241 my $content = "";
242
243 if (open(FIN,'<:utf8',$src_file)) {
244
245 my $line;
246 while (defined($line=<FIN>)) {
247 $content .= $line;
248 }
249 }
250
251 close(FIN);
252
253 # perform RE string substitutions
254 foreach my $re_key (keys %$re_substitutions) {
255
256 my $subst_str = $re_substitutions->{$re_key};
257
258 # Perform substitution of the form:
259 # $content =~ s/$re_key/$subst_str/g;
260 # but allow allow separator char (default '/')
261 # and flags (default 'g') to be parameterized
262
263 $content =~ s/$re_key/$subst_str/g;
264 }
265
266 if (open(FOUT, '>:utf8', $dst_file)) {
267 print FOUT $content;
268 close(FOUT);
269 }
270 else {
271 print STDERR "Error: Failed to open file '$dst_file' for writing.\n$!\n";
272 }
273}
274
275sub solr_xml_to_solr_xml_in
276{
277 my $self = shift @_;
278 my ($solr_xml_dir) = @_;
279
280 my $gsdl3home = $ENV{'GSDL3HOME'};
281
282 if (!defined $solr_xml_dir || !-d $solr_xml_dir) {
283 # if not passed in, use stored solr_live_home
284 $solr_xml_dir = $self->{'solr_live_home'};
285 }
286
287 my $solrxml_in = &util::filename_cat($solr_xml_dir, "solr.xml.in");
288 my $solrxml = &util::filename_cat($solr_xml_dir, "solr.xml");
289
290 my $gsdl3home_re = &util::filename_to_regex($gsdl3home);
291
292 my $replacement_map = { qr/$gsdl3home_re/ => "\@gsdl3home\@" };
293
294 $self->filtered_copy($solrxml,$solrxml_in,$replacement_map);
295}
296
297
298sub solr_xml_in_to_solr_xml
299{
300 my $self = shift @_;
301 my ($solr_xml_dir) = @_;
302
303 my $gsdl3home = $ENV{'GSDL3HOME'};
304 if (!defined $solr_xml_dir || !-d $solr_xml_dir) {
305 # if not passed in, use stored solr home
306 $solr_xml_dir = $self->{'solr_live_home'};
307 }
308 my $solrxml_in = &util::filename_cat($solr_xml_dir, "solr.xml.in");
309 my $solrxml = &util::filename_cat($solr_xml_dir, "solr.xml");
310
311 my $gsdl3home_re = &util::filename_to_regex($gsdl3home);
312
313 my $replacement_map = { qr/\@gsdl3home\@/ => $gsdl3home_re };
314
315 $self->filtered_copy($solrxml_in,$solrxml,$replacement_map);
316}
317
318
319# Some of the Solr CoreAdmin API calls available.
320# See http://wiki.apache.org/solr/CoreAdmin
321sub admin_reload_core
322{
323 my $self = shift @_;
324 my ($core) = @_;
325
326 my $cgi_get_args = "action=RELOAD&core=$core";
327
328 $self->_admin_service($cgi_get_args);
329
330}
331
332sub admin_rename_core
333{
334 my $self = shift @_;
335 my ($oldcore, $newcore) = @_;
336
337 my $cgi_get_args = "action=RENAME&core=$oldcore&other=$newcore";
338
339 $self->_admin_service($cgi_get_args);
340
341}
342
343sub admin_swap_core
344{
345 my $self = shift @_;
346 my ($oldcore, $newcore) = @_;
347
348 my $cgi_get_args = "action=SWAP&core=$oldcore&other=$newcore";
349
350 $self->_admin_service($cgi_get_args);
351
352}
353
354# The ALIAS action is not supported in our version of solr (despite it
355# being marked as experimental in the documentation for Core Admin)
356sub admin_alias_core
357{
358 my $self = shift @_;
359 my ($oldcore, $newcore) = @_;
360
361 my $cgi_get_args = "action=ALIAS&core=$oldcore&other=$newcore";
362
363 $self->_admin_service($cgi_get_args);
364
365}
366
367sub admin_create_core
368{
369 my $self = shift @_;
370 my ($core, $data_parent_dir) = @_; # data_parent_dir is optional, can be index_dir. Defaults to builddir if not provided
371
372 my ($ds_idx) = ($core =~ m/^.*-(.*?)$/);
373
374 my $cgi_get_args = "action=CREATE&name=$core";
375
376 my $collect_home = $ENV{'GSDLCOLLECTDIR'};
377 my $etc_dirname = &util::filename_cat($collect_home,"etc");
378
379 if(!defined $data_parent_dir) {
380 $data_parent_dir = $self->{'build_dir'};
381 }
382
383 my $idx_dirname = &util::filename_cat($data_parent_dir,$ds_idx); # "dataDir"
384
385 $cgi_get_args .= "&instanceDir=$etc_dirname";
386 $cgi_get_args .= "&dataDir=$idx_dirname";
387
388 $self->_admin_service($cgi_get_args);
389
390}
391
392# removes (unloads) core from the ext/solr/sorl.xml config file
393sub admin_unload_core
394{
395 my $self = shift @_;
396 my ($core, $delete) = @_;
397
398 my $cgi_get_args = "action=UNLOAD&core=$core";
399 # &deleteIndex=true available from Solr3.3, see https://wiki.apache.org/solr/CoreAdmin.
400 # Also available since later Solr versions: deleteDataDir and deleteInstanceDir
401 if(defined $delete && $delete == 1) {
402 $cgi_get_args = $cgi_get_args."&deleteIndex=true";
403 }
404
405 $self->_admin_service($cgi_get_args);
406
407}
408
409sub admin_unload_core_explicitly_retaining_index
410{
411 # For UNLOAD core params, see page 315 of
412 # https://archive.apache.org/dist/lucene/solr/ref-guide/apache-solr-ref-guide-4.7.pdf
413
414 my $self = shift @_;
415 my ($core) = @_;
416
417 # Don't delete index (sidx/didx folder) along with unloading core, so force 0 as parameter
418 # (though not deleting the index is the default behaviour of admin_unload_core() anyway,
419 # since activate is meant to manually take care of deleting the index folder and moving the
420 # building folder to replace index, instead of activate asking unload_core to delete the
421 # index folder).
422 # But this function's very particular behaviour may be crucial for other instances such as
423 # its use in solrbuilder::post_build_indexes(), so even if admin_unload_core() could
424 # conceivably be changed to delete the index by default, this method would still do the
425 # right thing when called by solrbuilder::post_build_indexes().
426 $self->admin_unload_core($core, 0);
427}
428
429
430sub start
431{
432 my $self = shift @_;
433 my ($verbosity) = @_;
434
435 $verbosity = 1 unless defined $verbosity;
436
437 my $solr_live_home = &util::filename_cat($ENV{'GSDL3HOME'}, "ext", "solr");
438 $self->{'solr_live_home'} = $solr_live_home; # will be used later to generate solr.xml.in from solr.xml and vice-versa
439 my $server_port = $ENV{'SOLR_PORT'};
440 my $server_host = $ENV{'SOLR_HOST'};
441
442 chdir($ENV{'GSDL3SRCHOME'});
443
444 my $server_java_cmd = "ant start";
445
446 my $server_status = "unknown";
447 if ($self->server_running()) {
448
449 $server_status = "already-running";
450 print STDERR "@@@@ server already running\n\n";
451 }
452 elsif (open(STARTIN,"$server_java_cmd 2>&1 |")) {
453
454 print STDERR "@@@@ need to start tomcat\n\n";
455 print STDERR "**** starting up tomcat server with cmd start =\n $server_java_cmd\n" if ($verbosity > 1);
456
457 my $line;
458 while (defined($line=<STARTIN>)) {
459
460 #if ($line =~ m/^(BUILD FAILED)/) {
461 print "Tomcat startup: $line";
462 #}
463 if ($line =~ m/^BUILD SUCCESSFUL/) {
464 last;
465 }
466 }
467
468 close(STARTIN);
469
470 if ($self->server_running()) {
471 $server_status = "explicitly-started";
472 #print STDERR "\n*** Tomcat server has started up now.\n\n";
473 } else {
474 $server_status = "failed-to-start"; # no need to set this, will be exiting below anyway
475
476 print STDERR "Error: failed to start greenstone tomcat server\n";
477 print STDERR "$!\n";
478 print STDERR "Command attempted was:\n";
479 print STDERR " $server_java_cmd\n";
480 print STDERR "run from directory:\n";
481 print STDERR " $ENV{'GSDL3SRCHOME'}\n";
482 print STDERR "----\n";
483
484 exit -1;
485 }
486 }
487 else {
488 print STDERR "@@@@ failed to start tomcat\n\n";
489 $server_status = "failed-to-start"; # no need to set this, will be exiting below anyway
490
491 print STDERR "Error: unable to start greenstone tomcat server\n";
492 print STDERR "$!\n";
493 print STDERR "Command attempted was:\n";
494 print STDERR " $server_java_cmd\n";
495 print STDERR "run from directory:\n";
496 print STDERR " $ENV{'GSDL3SRCHOME'}\n";
497 print STDERR "----\n";
498
499 exit -1;
500 }
501
502 if ($server_status eq "explicitly-started") {
503 $self->{'server_explicitly_started'} = 1;
504 print "Tomcat server ready and listening for connections at ";
505 print " $server_host:$server_port\n";
506
507 # now we know the server is ready to accept connections
508 }
509 elsif ($server_status eq "already-running") {
510 print STDERR "Using existing tomcat server detected at $server_host:$server_port\n";
511 $self->{'server_explicitly_started'} = 0;
512 }
513 elsif ($server_status eq "failed-to-start") {
514 print STDERR "Started Solr/Tomcat web server at $server_host:$server_port";
515 print STDERR ", but encountered an initialization error\n";
516 exit -1;
517 }
518
519}
520
521sub explicitly_started
522{
523 my $self = shift @_;
524
525 return $self->{'server_explicitly_started'};
526}
527
528sub stop
529{
530 my $self = shift @_;
531 my ($options) = @_;
532
533 my $solr_home = $ENV{'GEXT_SOLR'};
534
535 chdir($ENV{'GSDL3SRCHOME'});
536
537 # defaults
538 my $do_wait = 1;
539 my $output_verbosity = 1;
540
541 if (defined $options) {
542 if (defined $options->{'do_wait'}) {
543 $do_wait = $options->{'do_wait'};
544 }
545 if (defined $options->{'output_verbosity'}) {
546 $output_verbosity = $options->{'output_verbosity'};
547 }
548 }
549
550 my $server_java_cmd = "ant stop";
551
552 print STDERR "**** java server stop cmd:\n $server_java_cmd\n" if ($output_verbosity>1);
553
554 if (open(STOPIN,"$server_java_cmd 2>&1 |")) {
555
556 my $line;
557 while (defined($line=<STOPIN>)) {
558 print "@@@@ Tomcat shutdown: $line"; #if ($output_verbosity>1);
559 }
560 close(STOPIN);
561
562 if ($do_wait) {
563 wait(); # let the child process finish
564 }
565
566 if ($output_verbosity>0) {
567 print "@@@@@ Tomcat server shutdown\n";
568 }
569 }
570 else {
571 print STDERR "Error: failed to stop tomcat-server\n";
572 print STDERR "$!\n";
573 print STDERR "Command attempted was:\n";
574 print STDERR " $server_java_cmd\n";
575 print STDERR "run from directory:\n";
576 print STDERR " $solr_home\n";
577 print STDERR "----\n";
578
579 exit -2;
580 }
581}
582
583
584
5851;
Note: See TracBrowser for help on using the repository browser.