Changeset 10163
- Timestamp:
- 2005-06-23T11:30:09+12:00 (19 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/bin/script/lucene_passes.pl
r9918 r10163 37 37 38 38 use util; 39 use ghtml; 39 40 40 41 41 42 sub open_java_lucene 42 43 { 43 my ($doc_tag_level,$full_builddir,$indexdir ) = @_;44 my ($doc_tag_level,$full_builddir,$indexdir,$create,$verbosity) = @_; 44 45 45 46 my $bin_java = &util::filename_cat($ENV{'GSDLHOME'},"bin","java"); … … 47 48 48 49 my $java_lucene = "java -classpath \"$classpath\" GS2LuceneIndexer"; 49 my $java_cmd = "$java_lucene $doc_tag_level \"$full_builddir\" $indexdir"; 50 my $cmd_options = "$create -verbosity $verbosity"; 51 my $java_cmd = "$java_lucene $cmd_options $doc_tag_level \"$full_builddir\" $indexdir"; 50 52 51 53 if (!open (PIPEOUT, "| $java_cmd")) { … … 74 76 print DOCOUT $doc_xml; 75 77 close(DOCOUT); 78 79 my @secs = ($doc_xml =~ m/<Sec\s+gs2:id="\d+"\s*>.*?<\/Sec>/sg); 80 81 foreach my $sec (@secs) { 82 my ($docnum,$sec_text) = ($sec =~ m/<Sec\s+gs2:id="(\d+)"\s*>(.*?)<\/Sec>/s); 83 my $docnum_filename 84 = &util::filename_cat($full_textdir,"$docnum.xml"); 85 86 open(SECOUT,">$docnum_filename") 87 || die "Unable to open $docnum_filename"; 88 89 print SECOUT &ghtml::unescape_html($sec_text); 90 close(SECOUT); 91 } 92 76 93 } 77 94 … … 126 143 my $argc = scalar(@argv); 127 144 128 if ($argc<4) { 129 print STDERR "Usage: $PROGNAME mode(text|index) doc-tag-level build-dir index-name\n"; 145 my $create = ""; 146 my $verbosity = 1; 147 148 my @filtered_argv = (); 149 150 my $i = 0; 151 while ($i<$argc) { 152 if ($argv[$i] =~ m/^-(.*)$/) { 153 154 my $option = $1; 155 156 # -create causes build to be incremental 157 if ($option eq ("create")) { 158 $create = "-create"; 159 } 160 161 # -verbosity num 162 elsif ($option eq "verbosity") { 163 $i++; 164 if ($i<$argc) { 165 $verbosity = $argv[$i]; 166 } 167 } 168 else { 169 print STDERR "Unrecognised minus option: -$option\n"; 170 } 171 } 172 else { 173 push(@filtered_argv,$argv[$i]); 174 } 175 $i++; 176 } 177 178 my $filtered_argc = scalar(@filtered_argv); 179 180 if ($filtered_argc < 4) { 181 print STDERR "Usage: $PROGNAME [-create|-verbosity num] \"text\"|\"index\" doc-tag-level build-dir index-name\n"; 130 182 exit 1; 131 183 } 132 184 133 my $mode = $ argv[0];134 my $doc_tag_level = $ argv[1];135 my $full_builddir = $ argv[2];136 my $indexdir = $ argv[3];137 print STDERR "**** ARGS = ", join(" ", @argv), "\n";185 my $mode = $filtered_argv[0]; 186 my $doc_tag_level = $filtered_argv[1]; 187 my $full_builddir = $filtered_argv[2]; 188 my $indexdir = $filtered_argv[3]; 189 ### print STDERR "**** ARGS = ", join(" ", @argv), "\n"; 138 190 139 191 my $full_textdir = &util::filename_cat($full_builddir,"text"); … … 141 193 if ($mode eq "index") { 142 194 # don't need the lucene stuff if we are just storing the docs 143 open_java_lucene($doc_tag_level,$full_builddir,$indexdir );195 open_java_lucene($doc_tag_level,$full_builddir,$indexdir,$create,$verbosity); 144 196 } 145 197 monitor_xml_stream($mode, $full_textdir);
Note:
See TracChangeset
for help on using the changeset viewer.