Changeset 10163


Ignore:
Timestamp:
2005-06-23T11:30:09+12:00 (19 years ago)
Author:
davidb
Message:

lucene_passes.pl upgraded to support incremental building. Changes mostly
involve supporting minus options (i.e. -create to start a new index, nothing
for incremental) and passing this down into the indexer code.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/bin/script/lucene_passes.pl

    r9918 r10163  
    3737
    3838use util;
     39use ghtml;
    3940
    4041
    4142sub open_java_lucene
    4243{
    43     my ($doc_tag_level,$full_builddir,$indexdir) = @_;
     44    my ($doc_tag_level,$full_builddir,$indexdir,$create,$verbosity) = @_;
    4445
    4546    my $bin_java = &util::filename_cat($ENV{'GSDLHOME'},"bin","java");
     
    4748
    4849    my $java_lucene = "java -classpath \"$classpath\" GS2LuceneIndexer";
    49     my $java_cmd = "$java_lucene $doc_tag_level \"$full_builddir\" $indexdir";
     50    my $cmd_options = "$create -verbosity $verbosity";
     51    my $java_cmd = "$java_lucene $cmd_options $doc_tag_level \"$full_builddir\" $indexdir";
    5052
    5153    if (!open (PIPEOUT, "| $java_cmd")) {
     
    7476    print DOCOUT $doc_xml;
    7577    close(DOCOUT);
     78
     79    my @secs =  ($doc_xml =~ m/<Sec\s+gs2:id="\d+"\s*>.*?<\/Sec>/sg);
     80
     81    foreach my $sec (@secs) {
     82    my ($docnum,$sec_text) = ($sec =~ m/<Sec\s+gs2:id="(\d+)"\s*>(.*?)<\/Sec>/s);
     83    my $docnum_filename
     84        = &util::filename_cat($full_textdir,"$docnum.xml");
     85   
     86    open(SECOUT,">$docnum_filename")
     87        || die "Unable to open $docnum_filename";
     88
     89    print SECOUT &ghtml::unescape_html($sec_text);
     90    close(SECOUT);
     91    }
     92
    7693}
    7794
     
    126143    my $argc = scalar(@argv);
    127144
    128     if ($argc<4) {
    129     print STDERR "Usage: $PROGNAME mode(text|index) doc-tag-level build-dir index-name\n";
     145    my $create = "";
     146    my $verbosity = 1;
     147
     148    my @filtered_argv = ();
     149
     150    my $i = 0;
     151    while ($i<$argc) {
     152    if ($argv[$i] =~ m/^-(.*)$/) {
     153
     154        my $option = $1;
     155
     156        # -create causes build to be incremental
     157        if ($option eq ("create")) {
     158        $create = "-create";
     159        }
     160
     161        # -verbosity num
     162        elsif ($option eq "verbosity") {
     163        $i++;
     164        if ($i<$argc) {
     165            $verbosity = $argv[$i];
     166        }
     167        }
     168        else {
     169        print STDERR "Unrecognised minus option: -$option\n";
     170        }
     171    }
     172    else {
     173        push(@filtered_argv,$argv[$i]);
     174    }
     175    $i++;
     176    }
     177
     178    my $filtered_argc = scalar(@filtered_argv);
     179
     180    if ($filtered_argc < 4) {
     181    print STDERR "Usage: $PROGNAME [-create|-verbosity num] \"text\"|\"index\" doc-tag-level build-dir index-name\n";
    130182    exit 1;
    131183    }
    132184
    133     my $mode = $argv[0];
    134     my $doc_tag_level = $argv[1];
    135     my $full_builddir = $argv[2];
    136     my $indexdir      = $argv[3];
    137     print STDERR "**** ARGS = ", join(" ", @argv), "\n";
     185    my $mode = $filtered_argv[0];
     186    my $doc_tag_level = $filtered_argv[1];
     187    my $full_builddir = $filtered_argv[2];
     188    my $indexdir      = $filtered_argv[3];
     189###    print STDERR "**** ARGS = ", join(" ", @argv), "\n";
    138190
    139191    my $full_textdir = &util::filename_cat($full_builddir,"text");
     
    141193    if ($mode eq "index") {
    142194    # don't need the lucene stuff if we are just storing the docs
    143     open_java_lucene($doc_tag_level,$full_builddir,$indexdir);
     195    open_java_lucene($doc_tag_level,$full_builddir,$indexdir,$create,$verbosity);
    144196    }
    145197    monitor_xml_stream($mode, $full_textdir);
Note: See TracChangeset for help on using the changeset viewer.