source: gs3-extensions/solr/trunk/src/perllib/solrbuilder.pm@ 33372

Last change on this file since 33372 was 33372, checked in by kjdon, 5 years ago

when writing out facets in buildConfig, need to get them from actualfacetfields rather than from actualsortfields

File size: 24.3 KB
Line 
1###########################################################################
2#
3# solrbuilder.pm -- perl wrapper for building index with Solr
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26
27package solrbuilder;
28
29use strict;
30no strict 'refs';
31
32use lucenebuilder;
33use solrserver;
34
35sub BEGIN {
36 @solrbuilder::ISA = ('lucenebuilder');
37}
38
39
40sub new {
41 my $class = shift(@_);
42 my $self = new lucenebuilder (@_);
43 $self = bless $self, $class;
44
45 $self->{'buildtype'} = "solr";
46
47 my $solr_passes_script = "solr_passes.pl";
48
49 $self->{'solr_passes'} = "$solr_passes_script";
50 # Tack perl on the beginning to ensure execution
51 $self->{'solr_passes_exe'} = "\"".&util::get_perl_exec()."\" -S \"$solr_passes_script\"";
52 return $self;
53}
54
55
56sub default_buildproc {
57 my $self = shift (@_);
58
59 return "solrbuildproc";
60}
61
62# This writes a nice version of the text docs
63#
64# Essentially the same as the lucenebuilder.pm version, only using solr_passes
65# => refactor and make better use of inheritence
66#
67sub compress_text
68{
69 my $self = shift (@_);
70 # do nothing if we don't want compressed text
71 return if $self->{'no_text'};
72
73 my ($textindex) = @_;
74
75 # workaround to avoid hard-coding "solr" check into buildcol.pl
76 $textindex =~ s/^section://;
77
78 my $outhandle = $self->{'outhandle'};
79
80 # the text directory
81 my $text_dir = &FileUtils::filenameConcatenate($self->{'build_dir'}, "text");
82 my $build_dir = &FileUtils::filenameConcatenate($self->{'build_dir'},"");
83 &FileUtils::makeAllDirectories($text_dir);
84
85 my $osextra = "";
86 if ($ENV{'GSDLOS'} =~ /^windows$/i)
87 {
88 $text_dir =~ s@/@\\@g;
89 }
90 else
91 {
92 if ($outhandle ne "STDERR")
93 {
94 # so solr_passes doesn't print to stderr if we redirect output
95 $osextra .= " 2>/dev/null";
96 }
97 }
98
99 # Find the perl script to call to run solr
100 my $solr_passes = $self->{'solr_passes'};
101 my $solr_passes_exe = $self->{'solr_passes_exe'};
102
103 my $solr_passes_sections = "Doc";
104
105 my ($handle);
106
107 if ($self->{'debug'})
108 {
109 $handle = *STDOUT;
110 }
111 else
112 {
113 my $site = $self->{'site'};
114 my $collect = $self->{'collection'};
115 my $core_prefix = (defined $site) ? "$site-$collect" : $collect;
116 my $core = $core_prefix; # unused in this call to solr_passes
117
118 $core = "building-".$core unless $self->{'incremental'}; # core points to building only for force_removeold
119
120 print STDERR "Executable: $solr_passes_exe\n";
121 print STDERR "Sections: $solr_passes_sections\n";
122 print STDERR "Build Dir: $build_dir\n";
123 print STDERR "Cmd: $solr_passes_exe $core text \"$build_dir\" \"dummy\" $osextra\n";
124 if (!open($handle, "| $solr_passes_exe $core text \"$build_dir\" \"dummy\" $osextra"))
125 {
126 print STDERR "<FatalError name='NoRunSolrPasses'/>\n</Stage>\n" if $self->{'gli'};
127 die "solrbuilder::build_index - couldn't run $solr_passes_exe\n$!\n";
128 }
129 }
130
131 # stored text is always Doc and Sec levels
132 my $levels = { 'document' => 1, 'section' => 1 };
133 # always do database at section level
134 my $db_level = "section";
135
136 # set up the document processr
137 $self->{'buildproc'}->set_output_handle ($handle);
138 $self->{'buildproc'}->set_mode ('text');
139 $self->{'buildproc'}->set_index ($textindex);
140 $self->{'buildproc'}->set_indexing_text (0);
141 #$self->{'buildproc'}->set_indexfieldmap ($self->{'indexfieldmap'});
142 $self->{'buildproc'}->set_levels ($levels);
143 $self->{'buildproc'}->set_db_level ($db_level);
144 $self->{'buildproc'}->reset();
145
146 &plugin::begin($self->{'pluginfo'}, $self->{'source_dir'},
147 $self->{'buildproc'}, $self->{'maxdocs'});
148 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
149 "", {}, {}, $self->{'buildproc'}, $self->{'maxdocs'}, 0, $self->{'gli'});
150 &plugin::end($self->{'pluginfo'});
151
152 close ($handle) unless $self->{'debug'};
153 $self->print_stats();
154
155 print STDERR "</Stage>\n" if $self->{'gli'};
156}
157
158#----
159
160
161
162sub filter_in_out_file
163{
164 my ($in_filename,$out_filename,$replace_rules) = @_;
165
166 if (open(SIN,"<$in_filename")) {
167
168 if (open(SOUT,">$out_filename")) {
169
170 my $line;
171 while (defined ($line=<SIN>)) {
172 chomp $line;
173
174 my $done_insert = 0;
175 foreach my $rule (@$replace_rules) {
176 my $line_re = $rule->{'regexp'};
177 my $insert = $rule->{'insert'};
178
179 if ($line =~ m/$line_re/) {
180 print SOUT $insert;
181 $done_insert = 1;
182 last;
183 }
184 }
185 if (!$done_insert) {
186 print SOUT "$line\n";;
187 }
188 }
189
190 close(SOUT);
191 }
192 else {
193 print STDERR "Error: Failed to open $out_filename\n";
194 print STDERR " $!\n";
195 }
196
197 close(SIN);
198 }
199 else {
200 print STDERR "Error: Failed to open $in_filename\n";
201 print STDERR " $!\n";
202 }
203
204}
205
206# We need to push the list of indexfield to shortname mappings through to the
207# build_cfg as, unlike in MGPP, we need these mappings in advance to configure
208# Lucene/Solr. Unfortunately the original function found in mgbuilder.pm makes
209# a mess of this - it only outputs fields that have been processed (none have)
210# and it has a hardcoded renaming for 'text' so it becomes 'TX' according to
211# the schema but 'TE' according to XML sent to lucene_passes.pl/solr_passes.pl
212# This version is dumber - just copy them all across verbatim - but works. We
213# do still need to support the special case of 'allfields'
214sub make_final_field_list
215{
216 my $self = shift (@_);
217 $self->{'build_cfg'} = {};
218 my @indexfieldmap = ();
219 my @indexfields = ();
220
221 # @todo support: $self->{'buildproc'}->{'extraindexfields'}
222 foreach my $fields (@{$self->{'collect_cfg'}->{'indexes'}})
223 {
224 # remove subcoll stuff
225 $fields =~ s/:.*$//;
226 foreach my $field (split(';', $fields))
227 {
228 my $shortname = 'ERROR';
229 if ($field eq 'allfields')
230 {
231 $shortname = 'ZZ';
232 }
233 elsif (defined $self->{'buildproc'}->{'indexfieldmap'}->{$field})
234 {
235 $shortname = $self->{'buildproc'}->{'indexfieldmap'}->{$field};
236 }
237 else
238 {
239 print STDERR 'Error! Couldn\'t find indexfieldmap for field: ' . $field . "\n";
240 }
241 push (@indexfieldmap, $field . '->' . $shortname);
242 push (@indexfields, $field);
243 }
244 }
245
246 if (scalar @indexfieldmap)
247 {
248 $self->{'build_cfg'}->{'indexfieldmap'} = \@indexfieldmap;
249 }
250
251 if (scalar @indexfields)
252 {
253 $self->{'build_cfg'}->{'indexfields'} = \@indexfields;
254 }
255}
256
257# Generate solr schema.xml file based on indexmapfield and other associated
258# config files
259#
260# Unlike make_auxiliary_files(), this needs to be done up-front (rather
261# than at the end) so the data-types in schema.xml are correctly set up
262# prior to document content being pumped through solr_passes.pl
263
264
265sub premake_solr_auxiliary_files
266{
267 my $self = shift (@_);
268
269 # Replace the following marker:
270 #
271 # <!-- ##GREENSTONE-FIELDS## -->
272 #
273 # with lines of the form:
274 #
275 # <field name="<field>" type="string" ... />
276 #
277 # for each <field> in 'indexfieldmap'
278
279 my $schema_insert_xml = "";
280
281 foreach my $ifm (@{$self->{'build_cfg'}->{'indexfieldmap'}}) {
282
283 my ($fullfieldname, $field) = ($ifm =~ m/^(.*)->(.*)$/);
284
285 $schema_insert_xml .= " "; # indent
286 $schema_insert_xml .= "<field name=\"$field\" ";
287
288 if($field eq "CD" || $field eq "CS") {
289 # Coordinate and CoordShort meta should not be split but treated as a whole string for searching. So type=string, not type=text_en_splitting
290 # Can't set to type="location", which uses solr.LatLonType, since type=location fields "must not be multivalued" as per conf/schema.xml.in.
291 # And we can have multiple Coordinate (and multiple CoordShort) meta for one doc, so multivalued=true.
292 # Not certain what to set stored to. As per conf/schema.xml.in, stored=false means "you only need to search on the field but
293 # don't need to return the original value". And they advice to set stored="false" for all fields possible (esp large fields)."
294 # But stored=false makes it not visible in Luke. So setting stored=true as for other fields
295 # TermVector: '"A term vector is a list of the document's terms and their number of occurrences in that documented."
296 # Each document has one term vector which is a list.' (http://makble.com/what-is-term-vector-in-lucene and lucene API for Field.TermVector)
297 # e.g. docA contains, "cat" 5 times, "dog" 10 times. We don't care to treat Coordinate meta as a term: not a "term" occurring
298 # in the doc, and don't care how often a Coordinate occurs in a document.
299 # Consequently, we don't care about term positions and term offsets for Coordinate meta either.
300
301 $schema_insert_xml .= "type=\"string\" indexed=\"true\" stored=\"true\" multiValued=\"true\" termVectors=\"false\" termPositions=\"false\" termOffsets=\"false\" />\n";
302 }
303
304 elsif($field eq "ML") {
305 # mapLabel: same attributes as for coord meta CD and CS above
306 # mapLabel is also like facets with type="string" to not get tokenized, and multiValued="true" to allow each shape's label to be stored distinctly
307 $schema_insert_xml .= "type=\"string\" indexed=\"true\" stored=\"true\" multiValued=\"true\" termVectors=\"false\" termPositions=\"false\" termOffsets=\"false\" />\n";
308 }
309
310 else {
311 if($field eq "LT" || $field eq "LO") # full Latitude and Longitude coordinate meta, not the short variants (LatShort/LA and LongShort/LN)
312 {
313 # Latitude and Longitude is being phased out in favour of using Coord meta.
314 # However, if ever returning to using Lat and Lng instead of Coord meta, then the way the Lat Lng meta is currently written out for type="location"
315 # is in the wrong format. Lat and Lng shouldn't get written out separately but as: Lat,Lng
316 # It gets written out in solrbuildproc.pm, I think, so that would be where it needs to be corrected.
317 # For more info on type=location for our solr 4.7.2 or thereabouts, see https://web.archive.org/web/20160312154250/https://wiki.apache.org/solr/SpatialSearchDev
318 # which states:
319 # When indexing, the format is something like:
320 # <field name="store_lat_lon">12.34,-123.45</field>
321 #
322 $schema_insert_xml .= "type=\"location\" ";
323 }
324
325
326 # elsif ($field ne "ZZ" && $field ne "TX")
327 # {
328 # $schema_insert_xml .= "type=\"string\" ";
329 # }
330 else
331 {
332 #$schema_insert_xml .= "type=\"text_en_splitting\" ";
333
334 # original default solr field type for all fields is text_en_splitting
335 my $solrfieldtype = "text_en_splitting";
336 if(defined $self->{'collect_cfg'}->{'indexfieldoptions'}->{$fullfieldname}->{'solrfieldtype'}) {
337 $solrfieldtype = $self->{'collect_cfg'}->{'indexfieldoptions'}->{$fullfieldname}->{'solrfieldtype'};
338 #print STDERR "@@@@#### found TYPE: $solrfieldtype\n";
339 }
340 $schema_insert_xml .= "type=\"$solrfieldtype\" ";
341
342 }
343 # set termVectors=\"true\" when term vectors info is required,
344 # see TermsResponse termResponse = solrResponse.getTermsResponse();
345 $schema_insert_xml .= "indexed=\"true\" stored=\"true\" multiValued=\"true\" termVectors=\"true\" termPositions=\"true\" termOffsets=\"true\" />\n";
346 }
347 }
348
349 # just the one rule to date
350 my $insert_rules
351 = [ { 'regexp' => "^\\s*<!--\\s*##GREENSTONE-FIELDS##\\s*-->\\s*\$",
352 'insert' => $schema_insert_xml } ];
353
354 my $solr_home = $ENV{'GEXT_SOLR'};
355## my $in_dirname = &FileUtils::filenameConcatenate($solr_home,"etc","conf");
356 my $in_dirname = &FileUtils::filenameConcatenate($solr_home,"conf");
357 my $schema_in_filename = &FileUtils::filenameConcatenate($in_dirname,"schema.xml.in");
358
359 my $collect_home = $ENV{'GSDLCOLLECTDIR'};
360 my $out_dirname = &FileUtils::filenameConcatenate($collect_home,"etc","conf");
361 my $schema_out_filename = &FileUtils::filenameConcatenate($out_dirname,"schema.xml");
362
363 # make sure output conf directory exists
364 if (!&FileUtils::directoryExists($out_dirname)) {
365 &FileUtils::makeDirectory($out_dirname);
366 }
367
368 filter_in_out_file($schema_in_filename,$schema_out_filename,$insert_rules);
369
370 # now do the same for solrconfig.xml, stopwords, ...
371 # these are simpler, as they currently do not need any filtering
372
373 my @in_file_list = ( "solrconfig.xml", "stopwords.txt", "stopwords_en.txt",
374 "synonyms.txt", "protwords.txt", "currency.xml", "elevate.xml" );
375
376 foreach my $file ( @in_file_list ) {
377 my $in_filename = &FileUtils::filenameConcatenate($in_dirname,$file.".in");
378 my $out_filename = &FileUtils::filenameConcatenate($out_dirname,$file);
379
380 if(&FileUtils::fileExists($in_filename)) {
381 filter_in_out_file($in_filename,$out_filename,[]);
382 }
383 }
384
385 my @in_dir_list = ( "lang" );
386 foreach my $dir ( @in_dir_list ) {
387
388 my $full_subdir_name = &FileUtils::filenameConcatenate($in_dirname,$dir);
389
390 if(&FileUtils::directoryExists($full_subdir_name)) {
391 &FileUtils::copyFilesRecursiveNoSVN($full_subdir_name, $out_dirname);
392 }
393 }
394}
395
396
397sub pre_build_indexes
398{
399 my $self = shift (@_);
400 my ($indexname) = @_;
401 my $outhandle = $self->{'outhandle'};
402
403 # If the Solr/Jetty server is not already running, the following starts
404 # it up, and only returns when the server is "reading and listening"
405
406 my $solr_server = new solrserver($self->{'build_dir'});
407 $solr_server->start();
408 $self->{'solr_server'} = $solr_server;
409
410 my $indexes = [];
411 if (defined $indexname && $indexname =~ /\w/) {
412 push @$indexes, $indexname;
413 } else {
414 $indexes = $self->{'collect_cfg'}->{'indexes'};
415 }
416
417 # skip para-level check, as this is done in the main 'build_indexes'
418 # routine
419
420 my $all_metadata_specified = 0; # has the user added a 'metadata' index?
421 my $allfields_index = 0; # do we have an allfields index?
422
423 # Using a hashmap here would avoid duplications, but while more space
424 # efficient, it's not entirely clear it would be more computationally
425 # efficient
426 my @all_fields = ();
427
428 foreach my $index (@$indexes) {
429 if ($self->want_built($index)) {
430
431 # get the parameters for the output
432 # split on : just in case there is subcoll and lang stuff
433 my ($fields) = split (/:/, $index);
434
435 foreach my $field (split (/;/, $fields)) {
436 if ($field eq "metadata") {
437 $all_metadata_specified = 1;
438 }
439 else {
440 push(@all_fields,$field);
441 }
442 }
443 }
444 }
445
446 if ($all_metadata_specified) {
447
448 # (Unforunately) we need to process all the documents in the collection
449 # to figure out what the metadata_field_mapping is
450
451 # set up the document processr
452 $self->{'buildproc'}->set_output_handle (undef);
453 $self->{'buildproc'}->set_mode ('index_field_mapping');
454 $self->{'buildproc'}->reset();
455
456 &plugin::begin($self->{'pluginfo'}, $self->{'source_dir'},
457 $self->{'buildproc'}, $self->{'maxdocs'});
458 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
459 "", {}, {}, $self->{'buildproc'}, $self->{'maxdocs'}, 0, $self->{'gli'});
460 &plugin::end($self->{'pluginfo'});
461
462 }
463
464 else {
465 # Field mapping solely dependent of entries in 'indexes'
466
467 # No need to explicitly handle "allfields" as create_shortname()
468 # will get a fix on it through it's static_indexfield_map
469
470 my $buildproc = $self->{'buildproc'};
471
472 foreach my $field (@all_fields)
473 {
474 if (!defined $buildproc->{'indexfieldmap'}->{$field})
475 {
476 my $shortname = $buildproc->get_or_create_shortname($field);
477 $buildproc->{'indexfieldmap'}->{$field} = $shortname;
478 $buildproc->{'indexfieldmap'}->{$shortname} = 1;
479 }
480 }
481 }
482
483 # Write out solr 'schema.xml' (and related) file
484 #
485 $self->make_final_field_list();
486 $self->premake_solr_auxiliary_files();
487
488 # Now update the solr-core information in solr.xml
489 # => at most two cores <colname>-Doc and <colname>-Sec
490
491 my $site = $self->{'site'};
492 my $collect = $self->{'collection'};
493 my $core_prefix = (defined $site) ? "$site-$collect" : $collect;
494
495 # my $idx = $self->{'index_mapping'}->{$index};
496 my $idx = "idx";
497
498 my $build_dir = $self->{'build_dir'};
499
500 foreach my $level (keys %{$self->{'levels'}}) {
501
502 my ($pindex) = $level =~ /^(.)/;
503
504 my $index_dir = $pindex.$idx;
505 my $core = "$core_prefix-$index_dir";
506
507 # force_removeold == opposite of being run in 'incremental' mode
508 my $force_removeold = ($self->{'incremental'}) ? 0 : 1;
509
510 if ($force_removeold) {
511 print $outhandle "\n-removeold set (new index will be created)\n";
512
513 # create cores under temporary core names, corresponding to building directory
514 $core = "building-".$core;
515
516 my $full_index_dir = &FileUtils::filenameConcatenate($build_dir,$index_dir);
517 &FileUtils::removeFilesRecursive($full_index_dir);
518 &FileUtils::makeDirectory($full_index_dir);
519
520 my $full_tlog_dir = &FileUtils::filenameConcatenate($full_index_dir, "tlog");
521 &FileUtils::makeDirectory($full_tlog_dir);
522
523 # Solr then wants an "index" folder within this general index area!
524# my $full_index_index_dir = &FileUtils::filenameConcatenate($full_index_dir,"index");
525# &FileUtils::makeDirectory($full_index_index_dir);
526
527
528 # now go on and create new index
529 print $outhandle "Creating Solr core: $core\n";
530 $solr_server->admin_create_core($core);
531
532 }
533 else {
534 # if collect==core already in solr.xml (check with STATUS)
535 # => use RELOAD call to refresh fields now expressed in schema.xml
536 #
537 # else
538 # => use CREATE API to add to solr.xml
539
540 my $check_core_exists = $solr_server->admin_ping_core($core);
541
542 if ($check_core_exists) {
543 print $outhandle "Unloading Solr core: $core\n";
544 $solr_server->admin_unload_core($core);
545 }
546
547 print $outhandle "Creating Solr core: $core\n";
548 $solr_server->admin_create_core($core);
549
550 }
551 }
552
553}
554
555# Essentially the same as the lucenebuilder.pm version, only using solr_passes
556# => refactor and make better use of inheritence
557
558sub build_index {
559 my $self = shift (@_);
560 my ($index,$llevel) = @_;
561 my $outhandle = $self->{'outhandle'};
562 my $build_dir = $self->{'build_dir'};
563
564 # get the full index directory path and make sure it exists
565 my $indexdir = $self->{'index_mapping'}->{$index};
566 &FileUtils::makeAllDirectories(&FileUtils::filenameConcatenate($build_dir, $indexdir));
567
568 # Find the perl script to call to run solr
569 my $solr_passes = $self->{'solr_passes'};
570 my $solr_passes_exe = $self->{'solr_passes_exe'};
571
572 # define the section names for solrpasses
573 # define the section names and possibly the doc name for solrpasses
574 my $solr_passes_sections = $llevel;
575
576 my $osextra = "";
577 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
578 $build_dir =~ s@/@\\@g;
579 } else {
580 if ($outhandle ne "STDERR") {
581 # so solr_passes doesn't print to stderr if we redirect output
582 $osextra .= " 2>/dev/null";
583 }
584 }
585
586 # get the index expression if this index belongs
587 # to a subcollection
588 my $indexexparr = [];
589 my $langarr = [];
590
591 # there may be subcollection info, and language info.
592 my ($fields, $subcollection, $language) = split (":", $index);
593 my @subcollections = ();
594 @subcollections = split /,/, $subcollection if (defined $subcollection);
595
596 foreach $subcollection (@subcollections) {
597 if (defined ($self->{'collect_cfg'}->{'subcollection'}->{$subcollection})) {
598 push (@$indexexparr, $self->{'collect_cfg'}->{'subcollection'}->{$subcollection});
599 }
600 }
601
602 # add expressions for languages if this index belongs to
603 # a language subcollection - only put languages expressions for the
604 # ones we want in the index
605 my @languages = ();
606 my $languagemetadata = "Language";
607 if (defined ($self->{'collect_cfg'}->{'languagemetadata'})) {
608 $languagemetadata = $self->{'collect_cfg'}->{'languagemetadata'};
609 }
610 @languages = split /,/, $language if (defined $language);
611 foreach my $language (@languages) {
612 my $not=0;
613 if ($language =~ s/^\!//) {
614 $not = 1;
615 }
616 if($not) {
617 push (@$langarr, "!$language");
618 } else {
619 push (@$langarr, "$language");
620 }
621 }
622
623 # Build index dictionary. Uses verbatim stem method
624 print $outhandle "\n creating index dictionary (solr_passes -I1)\n" if ($self->{'verbosity'} >= 1);
625 print STDERR "<Phase name='CreatingIndexDic'/>\n" if $self->{'gli'};
626 my ($handle);
627
628 if ($self->{'debug'}) {
629 $handle = *STDOUT;
630 } else {
631 my $site = $self->{'site'};
632 my $collect = $self->{'collection'};
633 my $core_prefix = (defined $site) ? "$site-$collect" : $collect;
634 my $ds_idx = $self->{'index_mapping'}->{$index};
635 my $core = "$core_prefix-$ds_idx";
636
637 $core = "building-".$core unless $self->{'incremental'}; # core points to building only for force_removeold
638
639 print STDERR "Cmd: $solr_passes_exe $core index \"$build_dir\" \"$indexdir\" $osextra\n";
640 if (!open($handle, "| $solr_passes_exe $core index \"$build_dir\" \"$indexdir\" $osextra")) {
641 print STDERR "<FatalError name='NoRunSolrPasses'/>\n</Stage>\n" if $self->{'gli'};
642 die "solrbuilder::build_index - couldn't run $solr_passes_exe\n!$\n";
643 }
644 }
645
646 my $store_levels = $self->{'levels'};
647 my $db_level = "section"; #always
648 my $dom_level = "";
649 foreach my $key (keys %$store_levels) {
650 if ($mgppbuilder::level_map{$key} eq $llevel) {
651 $dom_level = $key;
652 }
653 }
654 if ($dom_level eq "") {
655 print STDERR "Warning: unrecognized tag level $llevel\n";
656 $dom_level = "document";
657 }
658
659 my $local_levels = { $dom_level => 1 }; # work on one level at a time
660
661 # set up the document processr
662 $self->{'buildproc'}->set_output_handle ($handle);
663 $self->{'buildproc'}->set_mode ('text');
664 $self->{'buildproc'}->set_index ($index, $indexexparr);
665 $self->{'buildproc'}->set_index_languages ($languagemetadata, $langarr) if (defined $language);
666 $self->{'buildproc'}->set_indexing_text (1);
667 #$self->{'buildproc'}->set_indexfieldmap ($self->{'indexfieldmap'});
668 $self->{'buildproc'}->set_levels ($local_levels);
669 if (defined $self->{'collect_cfg'}->{'sortfields'}) {
670 $self->{'buildproc'}->set_sortfields ($self->{'collect_cfg'}->{'sortfields'});
671 }
672 if (defined $self->{'collect_cfg'}->{'facetfields'}) {
673 $self->{'buildproc'}->set_facetfields ($self->{'collect_cfg'}->{'facetfields'});
674 }
675 $self->{'buildproc'}->set_db_level($db_level);
676 $self->{'buildproc'}->reset();
677
678 print $handle "<update>\n";
679
680 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
681 "", {}, {}, $self->{'buildproc'}, $self->{'maxdocs'}, 0, $self->{'gli'});
682
683 print $handle "</update>\n";
684
685 close ($handle) unless $self->{'debug'};
686
687 $self->print_stats();
688
689 $self->{'buildproc'}->set_levels ($store_levels);
690 print STDERR "</Stage>\n" if $self->{'gli'};
691
692}
693
694
695sub post_build_indexes {
696 my $self = shift(@_);
697
698 # deliberately override to prevent the mgpp post_build_index() calling
699 # $self->make_final_field_list()
700 # as this has been done in our pre_build_indexes() phase for solr
701
702
703 # Also need to stop the Solr server (be it tomcat or jetty) if it was explicitly started
704 # in pre_build_indexes()
705
706 my $solr_server = $self->{'solr_server'};
707
708 if ($solr_server->explicitly_started()) {
709 $solr_server->stop();
710 }
711
712 $self->{'solr_server'} = undef;
713
714}
715
716sub build_cfg_extra {
717 my $self = shift (@_);
718 my ($build_cfg) = @_;
719
720 $self->lucenebuilder::build_cfg_extra($build_cfg);
721
722 # need to add in facet stuff
723 my @facetfields = ();
724 my @facetfieldmap = ();
725
726 foreach my $sf (@{$self->{'buildproc'}->{'facetfields'}}) {
727 if ($self->{'buildproc'}->{'actualfacetfields'}->{$sf}) {
728 my $shortname = $self->{'buildproc'}->{'facetfieldnamemap'}->{$sf};
729 push(@facetfields, $shortname);
730 push (@facetfieldmap, "$sf\-\>$shortname");
731 }
732
733 }
734 $build_cfg->{'indexfacetfields'} = \@facetfields;
735 $build_cfg->{'indexfacetfieldmap'} = \@facetfieldmap;
736}
7371;
738
739
Note: See TracBrowser for help on using the repository browser.