root/gs3-extensions/solr/trunk/src/perllib/solrutil.pm @ 31490

Revision 31490, 4.8 KB (checked in by ak19, 4 years ago)

1. Fix to issue of a tomcat host/port change not propagating to solr host/port change when rebuilding a solr collection after tomcat host/port change. The change to tomcat server props need to be made after gs3-setup.sh was already run in the terminal earlier, to encouner the problem upon solr build. The bug was reproduced on Linux, and the fix for it also tested on Linux. Still need to test fix out on Windows. 2. Simultaneously made http protocol used in solr more robust to whether it's http or https.

Line 
1###########################################################################
2#
3# solrutil.pm -- support module for Solr extension
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package solrutil;
27
28use strict;
29
30sub locate_file
31{
32    my ($search_path,$suffix) = @_;
33       
34    foreach my $sp (@$search_path) {
35    my $full_path = &util::filename_cat($sp,$suffix);
36   
37    if (-f $full_path) {
38        return $full_path;
39    }
40    }
41   
42    # if get to here, then failed to find match
43
44    print STDERR "Error: Failed to find '$suffix'\n";
45    print STDERR "  Looked in: ", join(", ", @$search_path), "\n";
46    exit -1;
47}
48
49
50sub get_search_path
51{
52  my $search_path = [];
53
54  push(@$search_path,$ENV{'GSDLCOLLECTDIR'}) if defined $ENV{'GSDLCOLLECTDIR'};
55  push(@$search_path,$ENV{'GSDLHOME'})       if defined $ENV{'GSDLHOME'};
56  push(@$search_path,$ENV{'GEXT_SOLR'})      if defined $ENV{'GEXT_SOLR'};
57
58  return $search_path;
59}
60
61# The get-solr-servlet-url ant target can be run from anywhere by specifying the
62# location of GS3's ant build.xml buildfile.
63# GSDL3SRCHOME will be set for GS3 by gs3-setup.sh.
64# Based on servercontrol::get_library_URL.
65sub get_solr_servlet_url {
66    # Set up fall backs, incl. old way of using solr host and port values that's already in the environment
67    my $solr_url = "http://".$ENV{'SOLR_HOST'}.$ENV{'SOLR_PORT'}."/solr"; # fallback to default
68
69    my $perl_command = "ant -buildfile \"$ENV{'GSDL3SRCHOME'}/build.xml\" get-solr-servlet-url";
70   
71    if (open(PIN, "$perl_command |")) {
72    while (defined (my $perl_output_line = <PIN>)) {
73        if($perl_output_line =~ m@(https?):\/\/(\S*)@) { # grab all the non-whitespace chars
74        $solr_url="$1://".$2; # preserve the http protocol
75        }
76    }
77    close(PIN);
78   
79    #print STDERR "XXXXXXXXXX SOLR URL: $solr_url\n";
80
81    } else {
82    print STDERR "*** ERROR IN solrutil::get_solr_servlet_url:\n";
83    print STDERR "    Failed to run $perl_command to work out GS3's solr URL\n";
84    print STDERR "    falling back to using original solr_URL: $solr_url\n";
85    }
86
87    return $solr_url;
88}
89
90# Given the solr base url (e.g. http://localhost:8383/solr by default), this function
91# returns the url's parts: protocol, host, port, solr servlet
92sub get_solr_url_parts {
93    my $solr_url = shift (@_);
94
95    # Set up fall backs, incl. old way of using solr host and port values that's already in the environment
96    my ($protocol, $server_host, $server_port, $servlet_name)
97    = ("http://", $ENV{'SOLR_HOST'}, $ENV{'SOLR_PORT'}, "solr");
98
99   
100    # http://stackoverflow.com/questions/8206135/storing-regex-result-in-a-new-variable
101    if($solr_url =~ m@(https?)://([^:]*):([0-9]*)/(.*)$@) { # m@https?://([^:]*):([^/])/(.*)@) {
102   
103    ($protocol, $server_host, $server_port, $servlet_name) = ($1, $2, $3, $4);
104   
105    #print STDERR "XXXXXXXXXX PROTOCOL: $protocol, SOLR_HOST: $server_host, SOLR_PORT: $server_port, servlet: $servlet_name\n";
106
107    } else {
108    print STDERR "*** WARNING: in solrutil::get_solr_url_parts(): solr servlet URL not in expected format\n";
109    }
110
111    return ($protocol, $server_host, $server_port, $servlet_name);
112}
113
114
115sub open_post_pipe
116{
117    my ($core, $solr_base_url) = @_;
118
119    my $search_path = get_search_path();
120
121    chdir($ENV{'GEXT_SOLR'});
122   
123    my $post_jar   = &util::filename_cat("lib","java","solr-post.jar");
124    my $full_post_jar   = solrutil::locate_file($search_path,$post_jar);
125   
126    # Now run solr-post command
127    my $post_props = "-Durl=$solr_base_url/$core/update"; # robustness of protocol is taken care of too
128
129    $post_props .= " -Ddata=stdin";
130    $post_props .= " -Dcommit=yes";
131   
132    my $post_java_cmd = "java -Xmx512M $post_props -jar \"$full_post_jar\"";
133   
134    ##print STDERR "**** post cmd = $post_java_cmd\n";
135   
136    open (PIPEOUT, "| $post_java_cmd")
137    || die "Error in solr_passes.pl: Failed to run $post_java_cmd\n!$\n";
138   
139}
140
141sub print_to_post_pipe
142{
143    my ($line) = @_;
144
145    print PIPEOUT $line;
146}
147
148sub close_post_pipe
149{
150    # closing the pipe has the effect of shutting down solr-post.jar
151    close(PIPEOUT);
152}
153
1541;
Note: See TracBrowser for help on using the browser.