Ignore:
Timestamp:
2000-07-14T12:24:20+12:00 (24 years ago)
Author:
sjboddie
Message:

Implemented a -sortmeta option for import.pl to sort archives.inf file
(generated at end of import process) alphabetically by the given
metadata element. This may be useful for some collections as boolean
queries currently return matches in build (fairly random) order. Changing
the order of archives.inf changes the order that documents are built.
This option has a couple of important limitations:

  1. Can't be used in conjunction with the groupsize option as it would then only change the build order of groups of documents which doesn't seem very useful.
  2. Is of limited use when building indexes at a section level as the build order is only sorted by document, not by section.
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/bin/script/import.pl

    r1269 r1287  
    6060    print STDERR "   -maxdocs number        Maximum number of documents to import\n";
    6161    print STDERR "   -groupsize number      Number of GML documents to group into one file\n";
     62    print STDERR "   -sortmeta metadata     Sort documents alphabetically by metadata for\n";
     63    print STDERR "                          building. This will be disabled if groupsize > 1\n";
    6264    print STDERR "   -debug                 Print imported text to STDOUT\n\n";
    6365}
     
    6971    my ($verbosity, $importdir, $archivedir, $keepold,
    7072    $removeold, $gzip, $groupsize, $debug, $maxdocs, $collection,
    71     $configfilename, $collectcfg, $pluginfo,
     73    $configfilename, $collectcfg, $pluginfo, $sortmeta,
    7274    $archive_info_filename, $archive_info, $processor);
    7375    if (!parsargv::parse(\@ARGV,
     
    7981             'gzip', \$gzip,
    8082             'groupsize/\d+/1', \$groupsize,
     83             'sortmeta/.*/', \$sortmeta,
    8184             'debug', \$debug,
    8285             'maxdocs/^\-?\d+/-1', \$maxdocs)) {
     
    9295    &print_usage();
    9396    die "\n";
     97    }
     98
     99    # check sortmeta
     100    $sortmeta = undef unless defined $sortmeta && $sortmeta =~ /\S/;
     101    if (defined $sortmeta && $groupsize > 1) {
     102    print STDERR "WARNING: import.pl cannot sort documents when groupsize > 1\n";
     103    print STDERR "         sortmeta option will be ignored\n\n";
     104    $sortmeta = undef;
    94105    }
    95106
     
    160171    $processor = new docsave ($collection, $archive_info, $verbosity, $gzip, $groupsize);
    161172    $processor->setarchivedir ($archivedir);
     173    $processor->set_sortmeta ($sortmeta) if defined $sortmeta;
    162174    } else {
    163175    $processor = new docprint ();
     
    173185    # write out the archive information file
    174186    if (!$debug) {
    175     $processor->close_file_output();
     187    $processor->close_file_output() if $groupsize > 1;
    176188    $archive_info->save_info($archive_info_filename);
    177189    }
Note: See TracChangeset for help on using the changeset viewer.