Ignore:
Timestamp:
2012-09-19T12:06:28+12:00 (12 years ago)
Author:
jmt12
Message:

New hash based generation for associated files directory - so docno is no longer essential

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gs2-extensions/video-and-audio/trunk/src/opt/Terrier/VideoDocument.java

    r26190 r26214  
    2424import java.io.StringReader;
    2525import java.io.Reader;
    26 import java.lang.Thread;
     26import java.nio.charset.Charset;
    2727import java.nio.file.Files;
    28 import java.nio.file.FileVisitResult;
    29 import static java.nio.file.FileVisitResult.*;
    3028import java.nio.file.Path;
    3129import java.nio.file.Paths;
    3230import java.nio.file.SimpleFileVisitor;
    3331import java.nio.file.attribute.BasicFileAttributes;
     32import java.security.MessageDigest;
     33import java.security.NoSuchAlgorithmException;
    3434import java.util.Collections;
    3535import java.util.Arrays;
     
    8888    this.properties.put("abstract", "This is a video so here is some dummy text to prevent indexer failing.");
    8989    // B. Properties derived from filename
    90     String title = this.properties.get("filename").substring(properties.get("filename").lastIndexOf(System.getProperty("file.separator")) + 1);
     90    String filepath = this.properties.get("filename");
     91    String title = filepath.substring(filepath.lastIndexOf(System.getProperty("file.separator")) + 1);
    9192    this.properties.put("title", title);
    92     String ext = this.properties.get("filename").substring(properties.get("filename").lastIndexOf(".") + 1);
     93    String ext = filepath.substring(filepath.lastIndexOf(".") + 1);
    9394    String target_filename = "doc." + ext;
    9495    this.properties.put("source","doc." + ext);
    95     String assoc_filename = "D" + properties.get("docno");
     96    // - A unique associated directory. This gets a little tricky as we need
     97    //   to create the directory at the same time if an effort to promote
     98    //   synchronous behaviour
     99    String unique_id = this.generateHash(filepath);
     100    //   - we start with the first 4 characters
     101    int offset = 0;
     102    String assoc_filename = "D" + unique_id.substring(offset, offset + 4);
     103    //   - we add ".dir" as a suffix to the directory that actually contains
     104    //     files (so the non-suffixed version contains nested directories)
     105    Path assoc_path = Paths.get(ApplicationSetup.TERRIER_SHARE, "images", "assoc", assoc_filename + ".dir");
     106    //   - then we continue adding blocks of 4 characters until we get a
     107    //     directory that doesn't already exist
     108    while (assoc_path.toFile().exists() && offset < unique_id.length())
     109    {
     110      offset += 4;
     111      assoc_filename += System.getProperty("file.separator") + unique_id.substring(offset, offset + 4);
     112      assoc_path = Paths.get(ApplicationSetup.TERRIER_SHARE, "images", "assoc", assoc_filename + ".dir");
     113    }
     114    //   - still not unique? but run out of unique_id... time to complain
     115    if (assoc_path.toFile().exists())
     116    {
     117      logger.error("ImageDoument - can't determine unique assocfilepath");
     118      System.exit(0);
     119    }
     120    //   - create the directories quick... hopefully before someone else does
     121    assoc_path.toFile().mkdirs(); // bet there is a nice nio way to do this
    96122    this.properties.put("assocfile", assoc_filename);
    97123
    98124    // Copy (symlink) the file into place in the shared directory
    99125    Path raw_video_path = Paths.get(properties.get("filename"));
    100     Path assoc_path = Paths.get(ApplicationSetup.TERRIER_SHARE, "images", "assoc", assoc_filename);
    101     // - if the assoc path already exists, we need to recursively delete it and
    102     //   its contents
    103     if (Files.exists(assoc_path))
    104     {
    105       logger.info("VideoDocument - removing existing (old) associated files");
    106       try
    107       {
    108         Files.walkFileTree(assoc_path, new SimpleFileVisitor<Path>()
    109         {
    110           @Override
    111           public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
    112             throws IOException
    113           {
    114             ///ystem.out.println("Deleting file: " + file);
    115             Files.delete(file);
    116             return CONTINUE;
    117           }
    118           @Override
    119           public FileVisitResult postVisitDirectory(Path dir, IOException exc)
    120             throws IOException
    121           {
    122             ///ystem.out.println("Deleting dir: " + dir);
    123             if (exc == null)
    124             {
    125               Files.delete(dir);
    126               return CONTINUE;
    127             }
    128             else
    129             {
    130               throw exc;
    131             }
    132           }
    133         });
    134       }
    135       catch (Exception e)
    136       {
    137         logger.error("Exception while recursively deleting assoc folder:", e);
    138       }
    139     }
    140     assoc_path.toFile().mkdirs(); // bet there is a nice nio way to do this
    141126    Path target_path = assoc_path.resolve(target_filename);
    142127    logger.info("VideoDocument - symlinking original video into assoc directory");
     
    389374  }
    390375  /** getReader() **/
     376
     377  /**
     378   */
     379  private String generateHash(String string)
     380  {
     381    StringBuffer sb = new StringBuffer();
     382    try
     383    {
     384      final MessageDigest message_digest = MessageDigest.getInstance("MD5");
     385      message_digest.reset();
     386      message_digest.update(string.getBytes(Charset.forName("UTF8")));
     387      final byte[] result_bytes = message_digest.digest();
     388      for (int i = 0; i < result_bytes.length; ++i)
     389      {
     390        sb.append(Integer.toHexString((result_bytes[i] & 0xFF) | 0x100).substring(1,3));
     391      }
     392    }
     393    catch (NoSuchAlgorithmException e)
     394    {
     395      System.err.println("Exception: " + e);
     396      System.exit(0);
     397    }
     398    return sb.toString();
     399  }
     400  /** generateHash(String) **/
    391401}
Note: See TracChangeset for help on using the changeset viewer.