1 | ###########################################################################
|
---|
2 | #
|
---|
3 | # solrutil.pm -- support module for Solr extension
|
---|
4 | # A component of the Greenstone digital library software
|
---|
5 | # from the New Zealand Digital Library Project at the
|
---|
6 | # University of Waikato, New Zealand.
|
---|
7 | #
|
---|
8 | # Copyright (C) 1999 New Zealand Digital Library Project
|
---|
9 | #
|
---|
10 | # This program is free software; you can redistribute it and/or modify
|
---|
11 | # it under the terms of the GNU General Public License as published by
|
---|
12 | # the Free Software Foundation; either version 2 of the License, or
|
---|
13 | # (at your option) any later version.
|
---|
14 | #
|
---|
15 | # This program is distributed in the hope that it will be useful,
|
---|
16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
18 | # GNU General Public License for more details.
|
---|
19 | #
|
---|
20 | # You should have received a copy of the GNU General Public License
|
---|
21 | # along with this program; if not, write to the Free Software
|
---|
22 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
23 | #
|
---|
24 | ###########################################################################
|
---|
25 |
|
---|
26 | package solrutil;
|
---|
27 |
|
---|
28 | use strict;
|
---|
29 |
|
---|
30 | sub locate_file
|
---|
31 | {
|
---|
32 | my ($search_path,$suffix) = @_;
|
---|
33 |
|
---|
34 | foreach my $sp (@$search_path) {
|
---|
35 | my $full_path = &util::filename_cat($sp,$suffix);
|
---|
36 |
|
---|
37 | if (-f $full_path) {
|
---|
38 | return $full_path;
|
---|
39 | }
|
---|
40 | }
|
---|
41 |
|
---|
42 | # if get to here, then failed to find match
|
---|
43 |
|
---|
44 | print STDERR "Error: Failed to find '$suffix'\n";
|
---|
45 | print STDERR " Looked in: ", join(", ", @$search_path), "\n";
|
---|
46 | exit -1;
|
---|
47 | }
|
---|
48 |
|
---|
49 |
|
---|
50 | sub get_search_path
|
---|
51 | {
|
---|
52 | my $search_path = [];
|
---|
53 |
|
---|
54 | push(@$search_path,$ENV{'GSDLCOLLECTDIR'}) if defined $ENV{'GSDLCOLLECTDIR'};
|
---|
55 | push(@$search_path,$ENV{'GSDLHOME'}) if defined $ENV{'GSDLHOME'};
|
---|
56 | push(@$search_path,$ENV{'GEXT_SOLR'}) if defined $ENV{'GEXT_SOLR'};
|
---|
57 |
|
---|
58 | return $search_path;
|
---|
59 | }
|
---|
60 |
|
---|
61 | # The get-solr-servlet-url ant target can be run from anywhere by specifying the
|
---|
62 | # location of GS3's ant build.xml buildfile.
|
---|
63 | # GSDL3SRCHOME will be set for GS3 by gs3-setup.sh.
|
---|
64 | # Based on servercontrol::get_library_URL.
|
---|
65 | sub get_solr_servlet_url {
|
---|
66 | # Set up fall backs, incl. old way of using solr host and port values that's already in the environment
|
---|
67 | my $solr_url = "http://".$ENV{'SOLR_HOST'}.$ENV{'SOLR_PORT'}."/solr"; # fallback to default
|
---|
68 |
|
---|
69 | my $perl_command = "ant -buildfile \"$ENV{'GSDL3SRCHOME'}/build.xml\" get-solr-servlet-url";
|
---|
70 |
|
---|
71 | if (open(PIN, "$perl_command |")) {
|
---|
72 | while (defined (my $perl_output_line = <PIN>)) {
|
---|
73 | if($perl_output_line =~ m@(https?):\/\/(\S*)@) { # grab all the non-whitespace chars
|
---|
74 | $solr_url="$1://".$2; # preserve the http protocol
|
---|
75 | }
|
---|
76 | }
|
---|
77 | close(PIN);
|
---|
78 |
|
---|
79 | #print STDERR "XXXXXXXXXX SOLR URL: $solr_url\n";
|
---|
80 |
|
---|
81 | } else {
|
---|
82 | print STDERR "*** ERROR IN solrutil::get_solr_servlet_url:\n";
|
---|
83 | print STDERR " Failed to run $perl_command to work out GS3's solr URL\n";
|
---|
84 | print STDERR " falling back to using original solr_URL: $solr_url\n";
|
---|
85 | }
|
---|
86 |
|
---|
87 | return $solr_url;
|
---|
88 | }
|
---|
89 |
|
---|
90 | # Given the solr base url (e.g. http://localhost:8383/solr by default), this function
|
---|
91 | # returns the url's parts: protocol, host, port, solr servlet
|
---|
92 | sub get_solr_url_parts {
|
---|
93 | my $solr_url = shift (@_);
|
---|
94 |
|
---|
95 | # Set up fall backs, incl. old way of using solr host and port values that's already in the environment
|
---|
96 | my ($protocol, $server_host, $server_port, $servlet_name)
|
---|
97 | = ("http://", $ENV{'SOLR_HOST'}, $ENV{'SOLR_PORT'}, "solr");
|
---|
98 |
|
---|
99 |
|
---|
100 | # http://stackoverflow.com/questions/8206135/storing-regex-result-in-a-new-variable
|
---|
101 | if($solr_url =~ m@(https?)://([^:]*):([0-9]*)/(.*)$@) { # m@https?://([^:]*):([^/])/(.*)@) {
|
---|
102 |
|
---|
103 | ($protocol, $server_host, $server_port, $servlet_name) = ($1, $2, $3, $4);
|
---|
104 |
|
---|
105 | #print STDERR "XXXXXXXXXX PROTOCOL: $protocol, SOLR_HOST: $server_host, SOLR_PORT: $server_port, servlet: $servlet_name\n";
|
---|
106 |
|
---|
107 | } else {
|
---|
108 | print STDERR "*** WARNING: in solrutil::get_solr_url_parts(): solr servlet URL not in expected format\n";
|
---|
109 | }
|
---|
110 |
|
---|
111 | return ($protocol, $server_host, $server_port, $servlet_name);
|
---|
112 | }
|
---|
113 |
|
---|
114 | sub get_post_pipe_cmd
|
---|
115 | {
|
---|
116 | my ($core, $solr_base_url) = @_;
|
---|
117 |
|
---|
118 | my $search_path = get_search_path();
|
---|
119 |
|
---|
120 | chdir($ENV{'GEXT_SOLR'});
|
---|
121 |
|
---|
122 | my $post_jar = &util::filename_cat("lib","java","solr-post.jar");
|
---|
123 | my $full_post_jar = solrutil::locate_file($search_path,$post_jar);
|
---|
124 |
|
---|
125 | # Now run solr-post command
|
---|
126 | # See https://wiki.apache.org/solr/UpdateXmlMessages
|
---|
127 | # also https://lucene.apache.org/solr/4_2_1/tutorial.html
|
---|
128 | # suffixing commit=true/commitWithin=10000 to solr's /update servlet didn't work, because
|
---|
129 | # when using SimplePostTool, the commit only happens after the pipe to the tool is closed
|
---|
130 | my $post_props = "-Durl=$solr_base_url/$core/update"; # robustness of protocol is taken care of too
|
---|
131 |
|
---|
132 | $post_props .= " -Ddata=stdin";
|
---|
133 | $post_props .= " -Dcommit=yes";
|
---|
134 |
|
---|
135 | # increased VM mem from 512 to 1024, but increasing to 2048M didn't help either when too much
|
---|
136 | # data streamed to SimplePostTool before commit. Nothing works short of committing before the
|
---|
137 | # data streamed gets too large. The solution is to close and reopen the pipe to force commits.
|
---|
138 | my $post_java_cmd = "java -Xmx1024M $post_props -jar \"$full_post_jar\"";
|
---|
139 |
|
---|
140 | ##print STDERR "**** post cmd = $post_java_cmd\n";
|
---|
141 |
|
---|
142 | return $post_java_cmd;
|
---|
143 | }
|
---|
144 |
|
---|
145 | sub open_post_pipe
|
---|
146 | {
|
---|
147 | my ($core, $solr_base_url) = @_;
|
---|
148 | my $post_java_cmd = &get_post_pipe_cmd($core, $solr_base_url);
|
---|
149 |
|
---|
150 | open (PIPEOUT, "| $post_java_cmd")
|
---|
151 | || die "Error in solr_passes.pl: Failed to run $post_java_cmd\n!$\n";
|
---|
152 |
|
---|
153 | return $post_java_cmd; # return the post_java_cmd so caller can store it and reopen_post_pipe()
|
---|
154 | }
|
---|
155 |
|
---|
156 | sub reopen_post_pipe
|
---|
157 | {
|
---|
158 | my $post_java_cmd = shift(@_);
|
---|
159 |
|
---|
160 | open (PIPEOUT, "| $post_java_cmd")
|
---|
161 | || die "Error in solrutil::reopen_post_pipe: Failed to run $post_java_cmd\n!$\n";
|
---|
162 |
|
---|
163 | }
|
---|
164 |
|
---|
165 | sub print_to_post_pipe
|
---|
166 | {
|
---|
167 | my ($line) = @_;
|
---|
168 |
|
---|
169 | print PIPEOUT $line;
|
---|
170 | }
|
---|
171 |
|
---|
172 | sub close_post_pipe
|
---|
173 | {
|
---|
174 | # closing the pipe has the effect of shutting down solr-post.jar
|
---|
175 | close(PIPEOUT);
|
---|
176 | }
|
---|
177 |
|
---|
178 | 1;
|
---|