Changeset 26240


Ignore:
Timestamp:
09/25/12 12:40:56 (8 years ago)
Author:
jmt12
Message:

Modifications to progress messages to improve extracting information from the logs in an automated fashion

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gs2-extensions/parallel-building/trunk/src/opt/Terrier/FileIndexer.java

    r26237 r26240  
    2020import java.util.Iterator;
    2121
     22import org.apache.log4j.Level;
    2223import org.apache.log4j.Logger;
    2324
     
    8182  public FileIndexer(String index_prefix)
    8283  {
    83     logger.info("FileIndexer::FileIndexer(" + index_prefix + ")");
     84    ///ogger.info("FileIndexer::FileIndexer(" + index_prefix + ")");
    8485    this.index_prefix = index_prefix;
    8586    this.supported_extensions = new HashSet<String>();
     
    134135  public boolean canIndex(String file_path)
    135136  {
    136     logger.info("FileIndexer::canIndex(" + file_path.toString() + ")");
    137137    // we may have specified a default Document class for all files encountered
    138138    if (!ApplicationSetup.getProperty("indexing.simplefilecollection.defaultparser","").equals(""))
     
    145145    String file_extension = file_path.substring(file_path.lastIndexOf(".") + 1);
    146146    // - see if it exists in the array of supported extensions
    147     return this.supported_extensions.contains(file_extension);
     147    boolean result = this.supported_extensions.contains(file_extension);
     148    logger.info("[P1] Can index \"" + file_path.toString() + "\"? => " + result);
     149    return result;
    148150  }
    149151  /** canIndex(String) **/
     
    153155  public void close()
    154156  {
    155     logger.info("FileIndexer::close()");
     157    ///ogger.info("FileIndexer::close()");
    156158    this.file_list.clear();
    157159    this.file_list = null;
     
    163165  /** @function deleteIndex
    164166   */
    165   public void deleteIndex(String prefix)
    166   {
    167     logger.info("FileIndexer::deleteIndex(" + prefix + ")");
     167  public void deleteIndex(String prefix, String logging_name)
     168  {
     169    logger.info("[" + logging_name + "] Delete index " + prefix);
    168170    // and that all old files for this index are removed
    169171    File index_path = new File(ApplicationSetup.TERRIER_INDEX_PATH);
     
    178180    }
    179181  }
    180   /** deleteIndex(String) **/
     182  /** deleteIndex(String, String) **/
     183
     184  /** @function epochTime
     185   *  Returns the current time in seconds since 1970JAN01
     186   */
     187  public long epochTime()
     188  {
     189    return System.currentTimeMillis()/1000;
     190  }
     191  /** epochTime() **/
    181192
    182193  /** @function listIndexes
     
    184195  public ArrayDeque<String> listIndexes(boolean include_default)
    185196  {
    186     logger.info("FileIndexer::listIndexes(" + include_default + ")");
     197    ///ogger.info("FileIndexer::listIndexes(" + include_default + ")");
    187198    File index_path = new File(ApplicationSetup.TERRIER_INDEX_PATH);
    188199    String default_index_prefix = ApplicationSetup.getProperty("terrier.index.prefix", "data");
     
    191202    HashSet<String> index_parts_set = new HashSet<String>();
    192203    File files[] = index_path.listFiles();
    193     for (int i = 0; i < files.length; i++)
    194     {
    195       String file_name = files[i].getName();
    196       if (include_default || !file_name.startsWith(default_index_prefix))
    197       {
    198         String prefix = file_name.substring(0, file_name.indexOf("."));
    199         index_parts_set.add(prefix);
    200       }
     204    if (files != null)
     205    {
     206      for (int i = 0; i < files.length; i++)
     207      {
     208        String file_name = files[i].getName();
     209        if (include_default || !file_name.startsWith(default_index_prefix))
     210        {
     211          String prefix = file_name.substring(0, file_name.indexOf("."));
     212          index_parts_set.add(prefix);
     213        }
     214      }
     215    }
     216    else
     217    {
     218      logger.error("[P3] Error! No indexes found - did indexing fail?");
    201219    }
    202220    // we then turn the hashset into a nice list (in this case a deque)
     
    213231  /**
    214232   */
    215   public void loadManifest(Path manifest_path)
    216   {
    217     logger.info("FileIndexer::loadManifest(" + manifest_path.toString() + ")");
     233  public void loadManifest(String index_prefix, Path manifest_path)
     234  {
     235    logger.info("[B" + index_prefix + "] Load manifest " + manifest_path.toString());
    218236    try
    219237    {
     
    238256  public boolean mergeIndexes(String prefix_one, String prefix_two)
    239257  {
    240     logger.info("FileIndexer::mergeIndexes(" + prefix_one + ", " + prefix_two + ")");
     258    ///ogger.info("FileIndexer::mergeIndexes(" + prefix_one + ", " + prefix_two + ")");
    241259    String prefix_default = ApplicationSetup.getProperty("terrier.index.prefix", "data");
    242260    return this.mergeIndexes(prefix_one, prefix_two, prefix_default);
     
    248266  public boolean mergeIndexes(String prefix_one, String prefix_two, String prefix_out)
    249267  {
    250     logger.info("FileIndexer::mergeIndexes(" + prefix_one + ", " + prefix_two + ", " + prefix_out + ")");
     268    logger.info("[P3] Merge indexes " + prefix_one + " and " + prefix_two + " => " + prefix_out);
    251269    // init
    252270    String index_path = ApplicationSetup.TERRIER_INDEX_PATH;
     
    257275    Index index_out = Index.createNewIndex(index_path, prefix_out);
    258276    StructureMerger structure_merger = new StructureMerger(index_one, index_two, index_out);
     277
     278    // quiet logger
     279    Logger root_logger = Logger.getRootLogger();
     280    Level log_level = root_logger.getLevel();
     281    root_logger.setLevel((Level) Level.OFF);
    259282    structure_merger.mergeStructures();
    260283    structure_merger = null;
    261     CollectionStatistics collection_statistics = index_out.getCollectionStatistics();
    262     logger.info("Number of Documents: " + collection_statistics.getNumberOfDocuments());
    263     logger.info("Number of Tokens: " + collection_statistics.getNumberOfTokens());
    264     logger.info("Number of Unique Terms: " + collection_statistics.getNumberOfUniqueTerms());
    265     logger.info("Number of Pointers: " + collection_statistics.getNumberOfPointers());
    266     collection_statistics = null;
     284    root_logger.setLevel(log_level);
     285
     286    // Only print out statistics for 'data'
     287    if (prefix_out.equals("data"))
     288    {
     289      CollectionStatistics collection_statistics = index_out.getCollectionStatistics();
     290      logger.info("[P3] Number of Documents: " + collection_statistics.getNumberOfDocuments());
     291      logger.info("[P3] Number of Tokens: " + collection_statistics.getNumberOfTokens());
     292      logger.info("[P3] Number of Unique Terms: " + collection_statistics.getNumberOfUniqueTerms());
     293      logger.info("[P3] Number of Pointers: " + collection_statistics.getNumberOfPointers());
     294      collection_statistics = null;
     295    }
     296
     297    // Cleanup and closedown
    267298    try
    268299    {
     
    279310    index_two = null;
    280311    index_out = null;
    281     this.deleteIndex(prefix_one);
    282     this.deleteIndex(prefix_two);
     312    this.deleteIndex(prefix_one, "P3");
     313    this.deleteIndex(prefix_two, "P3");
    283314    prefix_one = null;
    284315    prefix_two = null;
     
    291322  public void renameIndex(String prefix_in)
    292323  {
    293     logger.info("FileIndexer::renameIndex(" + prefix_in + ")");
     324    ///ogger.info("FileIndexer::renameIndex(" + prefix_in + ")");
    294325    String default_prefix = ApplicationSetup.getProperty("terrier.index.prefix", "data");
    295326    this.renameIndex(prefix_in, default_prefix);
     
    301332  public void renameIndex(String prefix_in, String prefix_out)
    302333  {
    303     logger.info("FileIndexer::renameIndex(" + prefix_in + ", " + prefix_out + ")");
     334    logger.info("[P3] Rename index " + prefix_in + " => " + prefix_out);
    304335    prefix_in = prefix_in + ".";
    305336    File index_path = new File(ApplicationSetup.TERRIER_INDEX_PATH);
     
    334365  /** @function runIndex
    335366   */
    336   public void runIndex()
    337   {
    338     logger.info("FileIndexer::runIndex()");
     367  public void runIndex(String batch_number)
     368  {
     369    logger.info("[B" + batch_number + "] create index");
    339370    if (this.file_list == null || this.file_list.size() == 0)
    340371    {
     
    353384        return;
    354385      }
    355       this.deleteIndex(this.index_prefix);
     386      this.deleteIndex(this.index_prefix, "F");
    356387      // create the appropriate indexer
    357388      Indexer indexer;
     
    470501    FileIndexer file_indexer = new FileIndexer(index_prefix);
    471502
     503    long epoch_time;
     504
    472505    switch(index_mode)
    473506    {
    474507    case PREPARE:
     508      logger.info("[P1:" + file_indexer.epochTime() + "] Starting manifest preparation");
    475509      // 3a. If we are in preparation mode, this is where we go through the
    476510      //     files in the search path, adding those that are indexable by
     
    567601      search_paths.clear();
    568602      search_paths = null;
     603      logger.info("[P1:" + file_indexer.epochTime() + "] Complete");
    569604      break;
    570605
    571606    case INDEX:
    572607      // 3b. Load the manifest specified by the search path and index files
    573       System.out.println("Mode: Index");
    574       System.out.println("Manifest Path: " + a_path.toString());
    575       System.out.println("Prefix: " + index_prefix);
     608      String manifest_path = a_path.toString();
     609      System.out.println("Mode:     Index");
     610      System.out.println("Prefix:   " + index_prefix);
     611      System.out.println("Manifest: " + manifest_path);
    576612      System.out.println("Indexing: " + ApplicationSetup.getProperty("indexer.meta.forward.keys", "docno,filename"));
    577613      System.out.println(divider);
    578       file_indexer.loadManifest(a_path);
    579       file_indexer.runIndex();
     614
     615      logger.info("[B" + index_prefix + ":" + file_indexer.epochTime() + "] Starting");
     616      file_indexer.loadManifest(index_prefix, a_path);
     617      file_indexer.runIndex(index_prefix);
     618      logger.info("[B" + index_prefix + ":" + file_indexer.epochTime() + "] Complete");
    580619      break;
    581620
    582621    case MERGE:
     622      logger.info("[P3:" + file_indexer.epochTime() + "] Starting index merging");
    583623      System.out.println("Mode: Merge");
    584624      System.out.println(divider);
    585625      // 3c. User has requested a merging process.
    586       logger.info("[" + (System.currentTimeMillis()/1000) + "] Merging started");
    587626      // Look in the default index path and locate all of the indexes to merge
    588627      // (all those not prefixed 'data')
     
    597636        String index_part_two_prefix = index_parts.remove();
    598637        String intermediate_index_prefix = "temp" + String.format("%03d", intermediate_file_counter);
    599         logger.info("Merging index parts \"" + index_part_one_prefix + "\" and \"" + index_part_two_prefix + "\" into index \"" + intermediate_index_prefix + "\"");
     638        ///ogger.info("Merging index parts \"" + index_part_one_prefix + "\" and \"" + index_part_two_prefix + "\" into index \"" + intermediate_index_prefix + "\"");
    600639        file_indexer.mergeIndexes(index_part_one_prefix, index_part_two_prefix, intermediate_index_prefix);
    601640        index_part_one_prefix = null;
     
    611650        String index_part_one_prefix = index_parts.remove();
    612651        String index_part_two_prefix = index_parts.remove();
    613         logger.info("Merging index parts \"" + index_part_one_prefix + "\" and \"" + index_part_two_prefix + "\" into final index");
     652        ///ogger.info("Merging index parts \"" + index_part_one_prefix + "\" and \"" + index_part_two_prefix + "\" into final index");
    614653        file_indexer.mergeIndexes(index_part_one_prefix, index_part_two_prefix);
    615654        index_part_one_prefix = null;
     
    621660      {
    622661        String index_part_prefix = index_parts.remove();
    623         logger.info("Renaming index \"" + index_part_prefix + "\" into final index");
     662        ///ogger.info("Renaming index \"" + index_part_prefix + "\" into final index");
    624663        file_indexer.renameIndex(index_part_prefix);
    625664        index_part_prefix = null;
     
    628667      else
    629668      {
    630         logger.warn("No index parts detected - merge has no effect");
     669        logger.warn("[P3] Warning! No index parts detected - merge has no effect");
    631670      }
    632671      // clean up
    633672      index_parts.clear();
    634673      index_parts = null;
    635       logger.info("[" + (System.currentTimeMillis()/1000) + "] The merging is complete!");
     674      logger.info("[P3:" + file_indexer.epochTime() + "] Complete");
    636675      break;
    637676
Note: See TracChangeset for help on using the changeset viewer.