Changeset 9177


Ignore:
Timestamp:
2005-02-24T16:55:13+13:00 (19 years ago)
Author:
kjdon
Message:

separated doc storage and lucene indexing into two modes, mode passed in as a parameter

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/bin/script/lucene_passes.pl

    r8520 r9177  
    9191sub monitor_xml_stream
    9292{
    93     my ($full_textdir) = @_;
     93    my ($mode, $full_textdir) = @_;
    9494
    9595    my $doc_xml = "";
     
    104104       
    105105    }
     106   
    106107    if ($line =~ m/^<\/Doc>$/) {
    107108        # save as file
    108         save_xml_doc($full_textdir,$output_filename,$doc_xml);
    109        
    110         # notify lucene indexer
    111         # print STDERR $doc_xml;
     109        #save_xml_doc($full_textdir,$output_filename,$doc_xml);
     110        if ($mode eq "text") {
     111        save_xml_doc($full_textdir,$output_filename,$doc_xml);
     112        } elsif ($mode eq "index") {
     113        # notify lucene indexer
     114        # print STDERR $doc_xml;
    112115##      print PIPEOUT "$output_filename\n";
    113         print PIPEOUT "$doc_xml";
    114        
     116        print PIPEOUT "$doc_xml";
     117        }
    115118        # compress file
    116119###     compress_xml_doc($full_textdir,$output_filename);
     
    127130    my $argc = scalar(@argv);
    128131
    129     if ($argc<3) {
    130     print STDERR "Usage: $PROGNAME doc-tag-level build-dir index-name\n";
     132    if ($argc<4) {
     133    print STDERR "Usage: $PROGNAME mode(text|index) doc-tag-level build-dir index-name\n";
    131134    exit 1;
    132135    }
    133136
    134     my $doc_tag_level = $argv[0];
    135     my $full_builddir = $argv[1];
    136     my $indexdir      = $argv[2];
     137    my $mode = $argv[0];
     138    my $doc_tag_level = $argv[1];
     139    my $full_builddir = $argv[2];
     140    my $indexdir      = $argv[3];
     141    print STDERR "**** ARGS = ", join(" ", @argv), "\n";
    137142
    138143    my $full_textdir = &util::filename_cat($full_builddir,"text");
    139144
    140 
    141     print STDERR "**** ARGS = ", join(" ", @argv), "\n";
    142     open_java_lucene($doc_tag_level,$full_builddir,$indexdir);
    143     monitor_xml_stream($full_textdir);
    144     close_java_lucene();
     145    if ($mode eq "index") {
     146    # don't need the lucene stuff if we are just storing the docs
     147    open_java_lucene($doc_tag_level,$full_builddir,$indexdir);
     148    }
     149    monitor_xml_stream($mode, $full_textdir);
     150    if ($mode eq "index") {
     151    close_java_lucene();
     152    }
    145153}
    146154
Note: See TracChangeset for help on using the changeset viewer.