Ignore:
Timestamp:
2012-09-19T12:06:28+12:00 (12 years ago)
Author:
jmt12
Message:

New hash based generation for associated files directory - so docno is no longer essential

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gs2-extensions/video-and-audio/trunk/src/opt/Terrier/ImageDocument.java

    r26208 r26214  
    2020import java.io.StringReader;
    2121import java.io.Reader;
     22import java.nio.charset.Charset;
    2223import java.nio.file.Files;
    2324import java.nio.file.Path;
    2425import java.nio.file.Paths;
     26import java.security.MessageDigest;
     27import java.security.NoSuchAlgorithmException;
    2528import java.util.Collections;
    2629import java.util.Arrays;
     
    7982    this.properties.put("abstract", "This is an image so here is some dummy text to prevent indexer failing.");
    8083    // B. Properties derived from filename
    81     String title = this.properties.get("filename").substring(properties.get("filename").lastIndexOf(System.getProperty("file.separator")) + 1);
     84    // - A simple title for the document
     85    String filepath = this.properties.get("filename");
     86    String title = filepath.substring(filepath.lastIndexOf(System.getProperty("file.separator")) + 1);
    8287    this.properties.put("title", title);
    83     String ext = this.properties.get("filename").substring(properties.get("filename").lastIndexOf(".") + 1);
     88    String ext = filepath.substring(filepath.lastIndexOf(".") + 1);
     89    // - The name of the copy of the original document
    8490    String target_filename = "doc." + ext;
    8591    this.properties.put("source","doc." + ext);
    86     String assoc_filename = "D" + this.properties.get("docno");
    87     if (assoc_filename.equals("Dnull"))
    88     {
    89       System.err.println("Error! Bogus assoc dir: " + this.properties.get("docno"));
     92    // - A unique associated directory. This gets a little tricky as we need
     93    //   to create the directory at the same time if an effort to promote
     94    //   synchronous behaviour
     95    String unique_id = this.generateHash(filepath);
     96    //   - we start with the first 4 characters
     97    int offset = 0;
     98    String assoc_filename = "D" + unique_id.substring(offset, offset + 4);
     99    //   - we add ".dir" as a suffix to the directory that actually contains
     100    //     files (so the non-suffixed version contains nested directories)
     101    Path assoc_path = Paths.get(ApplicationSetup.TERRIER_SHARE, "images", "assoc", assoc_filename + ".dir");
     102    //   - then we continue adding blocks of 4 characters until we get a
     103    //     directory that doesn't already exist
     104    while (assoc_path.toFile().exists() && offset < unique_id.length())
     105    {
     106      offset += 4;
     107      assoc_filename += System.getProperty("file.separator") + unique_id.substring(offset, offset + 4);
     108      assoc_path = Paths.get(ApplicationSetup.TERRIER_SHARE, "images", "assoc", assoc_filename + ".dir");
     109    }
     110    //   - still not unique? but run out of unique_id... time to complain
     111    if (assoc_path.toFile().exists())
     112    {
     113      logger.error("ImageDoument - can't determine unique assocfilepath");
    90114      System.exit(0);
    91115    }
    92 
     116    //   - create the directories quick... hopefully before someone else does
     117    assoc_path.toFile().mkdirs(); // bet there is a nice nio way to do this
    93118    this.properties.put("assocfile", assoc_filename);
    94119
    95120    // Copy (symlink) the file into place in the shared directory
    96121    Path source_path = Paths.get(properties.get("filename"));
    97     Path assoc_path = Paths.get(ApplicationSetup.TERRIER_SHARE, "images", "assoc", assoc_filename);
    98     assoc_path.toFile().mkdirs(); // bet there is a nice nio way to do this
    99122    Path target_path = assoc_path.resolve(target_filename);
    100123    if (target_path.toFile().exists())
     
    264287  }
    265288  /** getReader() **/
     289
     290  /**
     291   */
     292  private String generateHash(String string)
     293  {
     294    StringBuffer sb = new StringBuffer();
     295    try
     296    {
     297      final MessageDigest message_digest = MessageDigest.getInstance("MD5");
     298      message_digest.reset();
     299      message_digest.update(string.getBytes(Charset.forName("UTF8")));
     300      final byte[] result_bytes = message_digest.digest();
     301      for (int i = 0; i < result_bytes.length; ++i)
     302      {
     303        sb.append(Integer.toHexString((result_bytes[i] & 0xFF) | 0x100).substring(1,3));
     304      }
     305    }
     306    catch (NoSuchAlgorithmException e)
     307    {
     308      System.err.println("Exception: " + e);
     309      System.exit(0);
     310    }
     311    return sb.toString();
     312  }
     313  /** generateHash(String) **/
    266314}
    267315
Note: See TracChangeset for help on using the changeset viewer.