Changeset 26240

Show
Ignore:
Timestamp:
25.09.2012 12:40:56 (7 years ago)
Author:
jmt12
Message:

Modifications to progress messages to improve extracting information from the logs in an automated fashion

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • gs2-extensions/parallel-building/trunk/src/opt/Terrier/FileIndexer.java

    r26237 r26240  
    2020import java.util.Iterator; 
    2121 
     22import org.apache.log4j.Level; 
    2223import org.apache.log4j.Logger; 
    2324 
     
    8182  public FileIndexer(String index_prefix) 
    8283  { 
    83     logger.info("FileIndexer::FileIndexer(" + index_prefix + ")"); 
     84    ///ogger.info("FileIndexer::FileIndexer(" + index_prefix + ")"); 
    8485    this.index_prefix = index_prefix; 
    8586    this.supported_extensions = new HashSet<String>(); 
     
    134135  public boolean canIndex(String file_path) 
    135136  { 
    136     logger.info("FileIndexer::canIndex(" + file_path.toString() + ")"); 
    137137    // we may have specified a default Document class for all files encountered 
    138138    if (!ApplicationSetup.getProperty("indexing.simplefilecollection.defaultparser","").equals("")) 
     
    145145    String file_extension = file_path.substring(file_path.lastIndexOf(".") + 1); 
    146146    // - see if it exists in the array of supported extensions 
    147     return this.supported_extensions.contains(file_extension); 
     147    boolean result = this.supported_extensions.contains(file_extension); 
     148    logger.info("[P1] Can index \"" + file_path.toString() + "\"? => " + result); 
     149    return result; 
    148150  } 
    149151  /** canIndex(String) **/ 
     
    153155  public void close() 
    154156  { 
    155     logger.info("FileIndexer::close()"); 
     157    ///ogger.info("FileIndexer::close()"); 
    156158    this.file_list.clear(); 
    157159    this.file_list = null; 
     
    163165  /** @function deleteIndex 
    164166   */ 
    165   public void deleteIndex(String prefix) 
    166   { 
    167     logger.info("FileIndexer::deleteIndex(" + prefix + ")"); 
     167  public void deleteIndex(String prefix, String logging_name) 
     168  { 
     169    logger.info("[" + logging_name + "] Delete index " + prefix); 
    168170    // and that all old files for this index are removed 
    169171    File index_path = new File(ApplicationSetup.TERRIER_INDEX_PATH); 
     
    178180    } 
    179181  } 
    180   /** deleteIndex(String) **/ 
     182  /** deleteIndex(String, String) **/ 
     183 
     184  /** @function epochTime 
     185   *  Returns the current time in seconds since 1970JAN01 
     186   */ 
     187  public long epochTime() 
     188  { 
     189    return System.currentTimeMillis()/1000; 
     190  } 
     191  /** epochTime() **/ 
    181192 
    182193  /** @function listIndexes 
     
    184195  public ArrayDeque<String> listIndexes(boolean include_default) 
    185196  { 
    186     logger.info("FileIndexer::listIndexes(" + include_default + ")"); 
     197    ///ogger.info("FileIndexer::listIndexes(" + include_default + ")"); 
    187198    File index_path = new File(ApplicationSetup.TERRIER_INDEX_PATH); 
    188199    String default_index_prefix = ApplicationSetup.getProperty("terrier.index.prefix", "data"); 
     
    191202    HashSet<String> index_parts_set = new HashSet<String>(); 
    192203    File files[] = index_path.listFiles(); 
    193     for (int i = 0; i < files.length; i++) 
    194     { 
    195       String file_name = files[i].getName(); 
    196       if (include_default || !file_name.startsWith(default_index_prefix)) 
    197       { 
    198         String prefix = file_name.substring(0, file_name.indexOf(".")); 
    199         index_parts_set.add(prefix); 
    200       } 
     204    if (files != null) 
     205    { 
     206      for (int i = 0; i < files.length; i++) 
     207      { 
     208        String file_name = files[i].getName(); 
     209        if (include_default || !file_name.startsWith(default_index_prefix)) 
     210        { 
     211          String prefix = file_name.substring(0, file_name.indexOf(".")); 
     212          index_parts_set.add(prefix); 
     213        } 
     214      } 
     215    } 
     216    else 
     217    { 
     218      logger.error("[P3] Error! No indexes found - did indexing fail?"); 
    201219    } 
    202220    // we then turn the hashset into a nice list (in this case a deque) 
     
    213231  /** 
    214232   */ 
    215   public void loadManifest(Path manifest_path) 
    216   { 
    217     logger.info("FileIndexer::loadManifest(" + manifest_path.toString() + ")"); 
     233  public void loadManifest(String index_prefix, Path manifest_path) 
     234  { 
     235    logger.info("[B" + index_prefix + "] Load manifest " + manifest_path.toString()); 
    218236    try 
    219237    { 
     
    238256  public boolean mergeIndexes(String prefix_one, String prefix_two) 
    239257  { 
    240     logger.info("FileIndexer::mergeIndexes(" + prefix_one + ", " + prefix_two + ")"); 
     258    ///ogger.info("FileIndexer::mergeIndexes(" + prefix_one + ", " + prefix_two + ")"); 
    241259    String prefix_default = ApplicationSetup.getProperty("terrier.index.prefix", "data"); 
    242260    return this.mergeIndexes(prefix_one, prefix_two, prefix_default); 
     
    248266  public boolean mergeIndexes(String prefix_one, String prefix_two, String prefix_out) 
    249267  { 
    250     logger.info("FileIndexer::mergeIndexes(" + prefix_one + ", " + prefix_two + ", " + prefix_out + ")"); 
     268    logger.info("[P3] Merge indexes " + prefix_one + " and " + prefix_two + " => " + prefix_out); 
    251269    // init 
    252270    String index_path = ApplicationSetup.TERRIER_INDEX_PATH; 
     
    257275    Index index_out = Index.createNewIndex(index_path, prefix_out); 
    258276    StructureMerger structure_merger = new StructureMerger(index_one, index_two, index_out); 
     277 
     278    // quiet logger 
     279    Logger root_logger = Logger.getRootLogger(); 
     280    Level log_level = root_logger.getLevel(); 
     281    root_logger.setLevel((Level) Level.OFF); 
    259282    structure_merger.mergeStructures(); 
    260283    structure_merger = null; 
    261     CollectionStatistics collection_statistics = index_out.getCollectionStatistics(); 
    262     logger.info("Number of Documents: " + collection_statistics.getNumberOfDocuments()); 
    263     logger.info("Number of Tokens: " + collection_statistics.getNumberOfTokens()); 
    264     logger.info("Number of Unique Terms: " + collection_statistics.getNumberOfUniqueTerms()); 
    265     logger.info("Number of Pointers: " + collection_statistics.getNumberOfPointers()); 
    266     collection_statistics = null; 
     284    root_logger.setLevel(log_level); 
     285 
     286    // Only print out statistics for 'data' 
     287    if (prefix_out.equals("data")) 
     288    { 
     289      CollectionStatistics collection_statistics = index_out.getCollectionStatistics(); 
     290      logger.info("[P3] Number of Documents: " + collection_statistics.getNumberOfDocuments()); 
     291      logger.info("[P3] Number of Tokens: " + collection_statistics.getNumberOfTokens()); 
     292      logger.info("[P3] Number of Unique Terms: " + collection_statistics.getNumberOfUniqueTerms()); 
     293      logger.info("[P3] Number of Pointers: " + collection_statistics.getNumberOfPointers()); 
     294      collection_statistics = null; 
     295    } 
     296 
     297    // Cleanup and closedown 
    267298    try 
    268299    { 
     
    279310    index_two = null; 
    280311    index_out = null; 
    281     this.deleteIndex(prefix_one); 
    282     this.deleteIndex(prefix_two); 
     312    this.deleteIndex(prefix_one, "P3"); 
     313    this.deleteIndex(prefix_two, "P3"); 
    283314    prefix_one = null; 
    284315    prefix_two = null; 
     
    291322  public void renameIndex(String prefix_in) 
    292323  { 
    293     logger.info("FileIndexer::renameIndex(" + prefix_in + ")"); 
     324    ///ogger.info("FileIndexer::renameIndex(" + prefix_in + ")"); 
    294325    String default_prefix = ApplicationSetup.getProperty("terrier.index.prefix", "data"); 
    295326    this.renameIndex(prefix_in, default_prefix); 
     
    301332  public void renameIndex(String prefix_in, String prefix_out) 
    302333  { 
    303     logger.info("FileIndexer::renameIndex(" + prefix_in + ", " + prefix_out + ")"); 
     334    logger.info("[P3] Rename index " + prefix_in + " => " + prefix_out); 
    304335    prefix_in = prefix_in + "."; 
    305336    File index_path = new File(ApplicationSetup.TERRIER_INDEX_PATH); 
     
    334365  /** @function runIndex 
    335366   */ 
    336   public void runIndex() 
    337   { 
    338     logger.info("FileIndexer::runIndex()"); 
     367  public void runIndex(String batch_number) 
     368  { 
     369    logger.info("[B" + batch_number + "] create index"); 
    339370    if (this.file_list == null || this.file_list.size() == 0) 
    340371    { 
     
    353384        return; 
    354385      } 
    355       this.deleteIndex(this.index_prefix); 
     386      this.deleteIndex(this.index_prefix, "F"); 
    356387      // create the appropriate indexer 
    357388      Indexer indexer; 
     
    470501    FileIndexer file_indexer = new FileIndexer(index_prefix); 
    471502 
     503    long epoch_time; 
     504 
    472505    switch(index_mode) 
    473506    { 
    474507    case PREPARE: 
     508      logger.info("[P1:" + file_indexer.epochTime() + "] Starting manifest preparation"); 
    475509      // 3a. If we are in preparation mode, this is where we go through the 
    476510      //     files in the search path, adding those that are indexable by 
     
    567601      search_paths.clear(); 
    568602      search_paths = null; 
     603      logger.info("[P1:" + file_indexer.epochTime() + "] Complete"); 
    569604      break; 
    570605 
    571606    case INDEX: 
    572607      // 3b. Load the manifest specified by the search path and index files 
    573       System.out.println("Mode: Index"); 
    574       System.out.println("Manifest Path: " + a_path.toString()); 
    575       System.out.println("Prefix: " + index_prefix); 
     608      String manifest_path = a_path.toString(); 
     609      System.out.println("Mode:     Index"); 
     610      System.out.println("Prefix:   " + index_prefix); 
     611      System.out.println("Manifest: " + manifest_path); 
    576612      System.out.println("Indexing: " + ApplicationSetup.getProperty("indexer.meta.forward.keys", "docno,filename")); 
    577613      System.out.println(divider); 
    578       file_indexer.loadManifest(a_path); 
    579       file_indexer.runIndex(); 
     614 
     615      logger.info("[B" + index_prefix + ":" + file_indexer.epochTime() + "] Starting"); 
     616      file_indexer.loadManifest(index_prefix, a_path); 
     617      file_indexer.runIndex(index_prefix); 
     618      logger.info("[B" + index_prefix + ":" + file_indexer.epochTime() + "] Complete"); 
    580619      break; 
    581620 
    582621    case MERGE: 
     622      logger.info("[P3:" + file_indexer.epochTime() + "] Starting index merging"); 
    583623      System.out.println("Mode: Merge"); 
    584624      System.out.println(divider); 
    585625      // 3c. User has requested a merging process. 
    586       logger.info("[" + (System.currentTimeMillis()/1000) + "] Merging started"); 
    587626      // Look in the default index path and locate all of the indexes to merge 
    588627      // (all those not prefixed 'data') 
     
    597636        String index_part_two_prefix = index_parts.remove(); 
    598637        String intermediate_index_prefix = "temp" + String.format("%03d", intermediate_file_counter); 
    599         logger.info("Merging index parts \"" + index_part_one_prefix + "\" and \"" + index_part_two_prefix + "\" into index \"" + intermediate_index_prefix + "\""); 
     638        ///ogger.info("Merging index parts \"" + index_part_one_prefix + "\" and \"" + index_part_two_prefix + "\" into index \"" + intermediate_index_prefix + "\""); 
    600639        file_indexer.mergeIndexes(index_part_one_prefix, index_part_two_prefix, intermediate_index_prefix); 
    601640        index_part_one_prefix = null; 
     
    611650        String index_part_one_prefix = index_parts.remove(); 
    612651        String index_part_two_prefix = index_parts.remove(); 
    613         logger.info("Merging index parts \"" + index_part_one_prefix + "\" and \"" + index_part_two_prefix + "\" into final index"); 
     652        ///ogger.info("Merging index parts \"" + index_part_one_prefix + "\" and \"" + index_part_two_prefix + "\" into final index"); 
    614653        file_indexer.mergeIndexes(index_part_one_prefix, index_part_two_prefix); 
    615654        index_part_one_prefix = null; 
     
    621660      { 
    622661        String index_part_prefix = index_parts.remove(); 
    623         logger.info("Renaming index \"" + index_part_prefix + "\" into final index"); 
     662        ///ogger.info("Renaming index \"" + index_part_prefix + "\" into final index"); 
    624663        file_indexer.renameIndex(index_part_prefix); 
    625664        index_part_prefix = null; 
     
    628667      else 
    629668      { 
    630         logger.warn("No index parts detected - merge has no effect"); 
     669        logger.warn("[P3] Warning! No index parts detected - merge has no effect"); 
    631670      } 
    632671      // clean up 
    633672      index_parts.clear(); 
    634673      index_parts = null; 
    635       logger.info("[" + (System.currentTimeMillis()/1000) + "] The merging is complete!"); 
     674      logger.info("[P3:" + file_indexer.epochTime() + "] Complete"); 
    636675      break; 
    637676