Changeset 26214


Ignore:
Timestamp:
2012-09-19T12:06:28+12:00 (12 years ago)
Author:
jmt12
Message:

New hash based generation for associated files directory - so docno is no longer essential

Location:
gs2-extensions/video-and-audio/trunk/src/opt/Terrier
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • gs2-extensions/video-and-audio/trunk/src/opt/Terrier/ImageDocument.java

    r26208 r26214  
    2020import java.io.StringReader;
    2121import java.io.Reader;
     22import java.nio.charset.Charset;
    2223import java.nio.file.Files;
    2324import java.nio.file.Path;
    2425import java.nio.file.Paths;
     26import java.security.MessageDigest;
     27import java.security.NoSuchAlgorithmException;
    2528import java.util.Collections;
    2629import java.util.Arrays;
     
    7982    this.properties.put("abstract", "This is an image so here is some dummy text to prevent indexer failing.");
    8083    // B. Properties derived from filename
    81     String title = this.properties.get("filename").substring(properties.get("filename").lastIndexOf(System.getProperty("file.separator")) + 1);
     84    // - A simple title for the document
     85    String filepath = this.properties.get("filename");
     86    String title = filepath.substring(filepath.lastIndexOf(System.getProperty("file.separator")) + 1);
    8287    this.properties.put("title", title);
    83     String ext = this.properties.get("filename").substring(properties.get("filename").lastIndexOf(".") + 1);
     88    String ext = filepath.substring(filepath.lastIndexOf(".") + 1);
     89    // - The name of the copy of the original document
    8490    String target_filename = "doc." + ext;
    8591    this.properties.put("source","doc." + ext);
    86     String assoc_filename = "D" + this.properties.get("docno");
    87     if (assoc_filename.equals("Dnull"))
    88     {
    89       System.err.println("Error! Bogus assoc dir: " + this.properties.get("docno"));
     92    // - A unique associated directory. This gets a little tricky as we need
     93    //   to create the directory at the same time if an effort to promote
     94    //   synchronous behaviour
     95    String unique_id = this.generateHash(filepath);
     96    //   - we start with the first 4 characters
     97    int offset = 0;
     98    String assoc_filename = "D" + unique_id.substring(offset, offset + 4);
     99    //   - we add ".dir" as a suffix to the directory that actually contains
     100    //     files (so the non-suffixed version contains nested directories)
     101    Path assoc_path = Paths.get(ApplicationSetup.TERRIER_SHARE, "images", "assoc", assoc_filename + ".dir");
     102    //   - then we continue adding blocks of 4 characters until we get a
     103    //     directory that doesn't already exist
     104    while (assoc_path.toFile().exists() && offset < unique_id.length())
     105    {
     106      offset += 4;
     107      assoc_filename += System.getProperty("file.separator") + unique_id.substring(offset, offset + 4);
     108      assoc_path = Paths.get(ApplicationSetup.TERRIER_SHARE, "images", "assoc", assoc_filename + ".dir");
     109    }
     110    //   - still not unique? but run out of unique_id... time to complain
     111    if (assoc_path.toFile().exists())
     112    {
     113      logger.error("ImageDoument - can't determine unique assocfilepath");
    90114      System.exit(0);
    91115    }
    92 
     116    //   - create the directories quick... hopefully before someone else does
     117    assoc_path.toFile().mkdirs(); // bet there is a nice nio way to do this
    93118    this.properties.put("assocfile", assoc_filename);
    94119
    95120    // Copy (symlink) the file into place in the shared directory
    96121    Path source_path = Paths.get(properties.get("filename"));
    97     Path assoc_path = Paths.get(ApplicationSetup.TERRIER_SHARE, "images", "assoc", assoc_filename);
    98     assoc_path.toFile().mkdirs(); // bet there is a nice nio way to do this
    99122    Path target_path = assoc_path.resolve(target_filename);
    100123    if (target_path.toFile().exists())
     
    264287  }
    265288  /** getReader() **/
     289
     290  /**
     291   */
     292  private String generateHash(String string)
     293  {
     294    StringBuffer sb = new StringBuffer();
     295    try
     296    {
     297      final MessageDigest message_digest = MessageDigest.getInstance("MD5");
     298      message_digest.reset();
     299      message_digest.update(string.getBytes(Charset.forName("UTF8")));
     300      final byte[] result_bytes = message_digest.digest();
     301      for (int i = 0; i < result_bytes.length; ++i)
     302      {
     303        sb.append(Integer.toHexString((result_bytes[i] & 0xFF) | 0x100).substring(1,3));
     304      }
     305    }
     306    catch (NoSuchAlgorithmException e)
     307    {
     308      System.err.println("Exception: " + e);
     309      System.exit(0);
     310    }
     311    return sb.toString();
     312  }
     313  /** generateHash(String) **/
    266314}
    267315
  • gs2-extensions/video-and-audio/trunk/src/opt/Terrier/VideoDocument.java

    r26190 r26214  
    2424import java.io.StringReader;
    2525import java.io.Reader;
    26 import java.lang.Thread;
     26import java.nio.charset.Charset;
    2727import java.nio.file.Files;
    28 import java.nio.file.FileVisitResult;
    29 import static java.nio.file.FileVisitResult.*;
    3028import java.nio.file.Path;
    3129import java.nio.file.Paths;
    3230import java.nio.file.SimpleFileVisitor;
    3331import java.nio.file.attribute.BasicFileAttributes;
     32import java.security.MessageDigest;
     33import java.security.NoSuchAlgorithmException;
    3434import java.util.Collections;
    3535import java.util.Arrays;
     
    8888    this.properties.put("abstract", "This is a video so here is some dummy text to prevent indexer failing.");
    8989    // B. Properties derived from filename
    90     String title = this.properties.get("filename").substring(properties.get("filename").lastIndexOf(System.getProperty("file.separator")) + 1);
     90    String filepath = this.properties.get("filename");
     91    String title = filepath.substring(filepath.lastIndexOf(System.getProperty("file.separator")) + 1);
    9192    this.properties.put("title", title);
    92     String ext = this.properties.get("filename").substring(properties.get("filename").lastIndexOf(".") + 1);
     93    String ext = filepath.substring(filepath.lastIndexOf(".") + 1);
    9394    String target_filename = "doc." + ext;
    9495    this.properties.put("source","doc." + ext);
    95     String assoc_filename = "D" + properties.get("docno");
     96    // - A unique associated directory. This gets a little tricky as we need
     97    //   to create the directory at the same time if an effort to promote
     98    //   synchronous behaviour
     99    String unique_id = this.generateHash(filepath);
     100    //   - we start with the first 4 characters
     101    int offset = 0;
     102    String assoc_filename = "D" + unique_id.substring(offset, offset + 4);
     103    //   - we add ".dir" as a suffix to the directory that actually contains
     104    //     files (so the non-suffixed version contains nested directories)
     105    Path assoc_path = Paths.get(ApplicationSetup.TERRIER_SHARE, "images", "assoc", assoc_filename + ".dir");
     106    //   - then we continue adding blocks of 4 characters until we get a
     107    //     directory that doesn't already exist
     108    while (assoc_path.toFile().exists() && offset < unique_id.length())
     109    {
     110      offset += 4;
     111      assoc_filename += System.getProperty("file.separator") + unique_id.substring(offset, offset + 4);
     112      assoc_path = Paths.get(ApplicationSetup.TERRIER_SHARE, "images", "assoc", assoc_filename + ".dir");
     113    }
     114    //   - still not unique? but run out of unique_id... time to complain
     115    if (assoc_path.toFile().exists())
     116    {
     117      logger.error("ImageDoument - can't determine unique assocfilepath");
     118      System.exit(0);
     119    }
     120    //   - create the directories quick... hopefully before someone else does
     121    assoc_path.toFile().mkdirs(); // bet there is a nice nio way to do this
    96122    this.properties.put("assocfile", assoc_filename);
    97123
    98124    // Copy (symlink) the file into place in the shared directory
    99125    Path raw_video_path = Paths.get(properties.get("filename"));
    100     Path assoc_path = Paths.get(ApplicationSetup.TERRIER_SHARE, "images", "assoc", assoc_filename);
    101     // - if the assoc path already exists, we need to recursively delete it and
    102     //   its contents
    103     if (Files.exists(assoc_path))
    104     {
    105       logger.info("VideoDocument - removing existing (old) associated files");
    106       try
    107       {
    108         Files.walkFileTree(assoc_path, new SimpleFileVisitor<Path>()
    109         {
    110           @Override
    111           public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
    112             throws IOException
    113           {
    114             ///ystem.out.println("Deleting file: " + file);
    115             Files.delete(file);
    116             return CONTINUE;
    117           }
    118           @Override
    119           public FileVisitResult postVisitDirectory(Path dir, IOException exc)
    120             throws IOException
    121           {
    122             ///ystem.out.println("Deleting dir: " + dir);
    123             if (exc == null)
    124             {
    125               Files.delete(dir);
    126               return CONTINUE;
    127             }
    128             else
    129             {
    130               throw exc;
    131             }
    132           }
    133         });
    134       }
    135       catch (Exception e)
    136       {
    137         logger.error("Exception while recursively deleting assoc folder:", e);
    138       }
    139     }
    140     assoc_path.toFile().mkdirs(); // bet there is a nice nio way to do this
    141126    Path target_path = assoc_path.resolve(target_filename);
    142127    logger.info("VideoDocument - symlinking original video into assoc directory");
     
    389374  }
    390375  /** getReader() **/
     376
     377  /**
     378   */
     379  private String generateHash(String string)
     380  {
     381    StringBuffer sb = new StringBuffer();
     382    try
     383    {
     384      final MessageDigest message_digest = MessageDigest.getInstance("MD5");
     385      message_digest.reset();
     386      message_digest.update(string.getBytes(Charset.forName("UTF8")));
     387      final byte[] result_bytes = message_digest.digest();
     388      for (int i = 0; i < result_bytes.length; ++i)
     389      {
     390        sb.append(Integer.toHexString((result_bytes[i] & 0xFF) | 0x100).substring(1,3));
     391      }
     392    }
     393    catch (NoSuchAlgorithmException e)
     394    {
     395      System.err.println("Exception: " + e);
     396      System.exit(0);
     397    }
     398    return sb.toString();
     399  }
     400  /** generateHash(String) **/
    391401}
Note: See TracChangeset for help on using the changeset viewer.