Changeset 26214

Show
Ignore:
Timestamp:
19.09.2012 12:06:28 (7 years ago)
Author:
jmt12
Message:

New hash based generation for associated files directory - so docno is no longer essential

Location:
gs2-extensions/video-and-audio/trunk/src/opt/Terrier
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • gs2-extensions/video-and-audio/trunk/src/opt/Terrier/ImageDocument.java

    r26208 r26214  
    2020import java.io.StringReader; 
    2121import java.io.Reader; 
     22import java.nio.charset.Charset; 
    2223import java.nio.file.Files; 
    2324import java.nio.file.Path; 
    2425import java.nio.file.Paths; 
     26import java.security.MessageDigest; 
     27import java.security.NoSuchAlgorithmException; 
    2528import java.util.Collections; 
    2629import java.util.Arrays; 
     
    7982    this.properties.put("abstract", "This is an image so here is some dummy text to prevent indexer failing."); 
    8083    // B. Properties derived from filename 
    81     String title = this.properties.get("filename").substring(properties.get("filename").lastIndexOf(System.getProperty("file.separator")) + 1); 
     84    // - A simple title for the document 
     85    String filepath = this.properties.get("filename"); 
     86    String title = filepath.substring(filepath.lastIndexOf(System.getProperty("file.separator")) + 1); 
    8287    this.properties.put("title", title); 
    83     String ext = this.properties.get("filename").substring(properties.get("filename").lastIndexOf(".") + 1); 
     88    String ext = filepath.substring(filepath.lastIndexOf(".") + 1); 
     89    // - The name of the copy of the original document 
    8490    String target_filename = "doc." + ext; 
    8591    this.properties.put("source","doc." + ext); 
    86     String assoc_filename = "D" + this.properties.get("docno"); 
    87     if (assoc_filename.equals("Dnull")) 
    88     { 
    89       System.err.println("Error! Bogus assoc dir: " + this.properties.get("docno")); 
     92    // - A unique associated directory. This gets a little tricky as we need 
     93    //   to create the directory at the same time if an effort to promote 
     94    //   synchronous behaviour 
     95    String unique_id = this.generateHash(filepath); 
     96    //   - we start with the first 4 characters 
     97    int offset = 0; 
     98    String assoc_filename = "D" + unique_id.substring(offset, offset + 4); 
     99    //   - we add ".dir" as a suffix to the directory that actually contains 
     100    //     files (so the non-suffixed version contains nested directories) 
     101    Path assoc_path = Paths.get(ApplicationSetup.TERRIER_SHARE, "images", "assoc", assoc_filename + ".dir"); 
     102    //   - then we continue adding blocks of 4 characters until we get a 
     103    //     directory that doesn't already exist 
     104    while (assoc_path.toFile().exists() && offset < unique_id.length()) 
     105    { 
     106      offset += 4; 
     107      assoc_filename += System.getProperty("file.separator") + unique_id.substring(offset, offset + 4); 
     108      assoc_path = Paths.get(ApplicationSetup.TERRIER_SHARE, "images", "assoc", assoc_filename + ".dir"); 
     109    } 
     110    //   - still not unique? but run out of unique_id... time to complain 
     111    if (assoc_path.toFile().exists()) 
     112    { 
     113      logger.error("ImageDoument - can't determine unique assocfilepath"); 
    90114      System.exit(0); 
    91115    } 
    92  
     116    //   - create the directories quick... hopefully before someone else does 
     117    assoc_path.toFile().mkdirs(); // bet there is a nice nio way to do this 
    93118    this.properties.put("assocfile", assoc_filename); 
    94119 
    95120    // Copy (symlink) the file into place in the shared directory 
    96121    Path source_path = Paths.get(properties.get("filename")); 
    97     Path assoc_path = Paths.get(ApplicationSetup.TERRIER_SHARE, "images", "assoc", assoc_filename); 
    98     assoc_path.toFile().mkdirs(); // bet there is a nice nio way to do this 
    99122    Path target_path = assoc_path.resolve(target_filename); 
    100123    if (target_path.toFile().exists()) 
     
    264287  } 
    265288  /** getReader() **/ 
     289 
     290  /** 
     291   */ 
     292  private String generateHash(String string) 
     293  { 
     294    StringBuffer sb = new StringBuffer(); 
     295    try 
     296    { 
     297      final MessageDigest message_digest = MessageDigest.getInstance("MD5"); 
     298      message_digest.reset(); 
     299      message_digest.update(string.getBytes(Charset.forName("UTF8"))); 
     300      final byte[] result_bytes = message_digest.digest(); 
     301      for (int i = 0; i < result_bytes.length; ++i) 
     302      { 
     303        sb.append(Integer.toHexString((result_bytes[i] & 0xFF) | 0x100).substring(1,3)); 
     304      } 
     305    } 
     306    catch (NoSuchAlgorithmException e) 
     307    { 
     308      System.err.println("Exception: " + e); 
     309      System.exit(0); 
     310    } 
     311    return sb.toString(); 
     312  } 
     313  /** generateHash(String) **/ 
    266314} 
    267315 
  • gs2-extensions/video-and-audio/trunk/src/opt/Terrier/VideoDocument.java

    r26190 r26214  
    2424import java.io.StringReader; 
    2525import java.io.Reader; 
    26 import java.lang.Thread; 
     26import java.nio.charset.Charset; 
    2727import java.nio.file.Files; 
    28 import java.nio.file.FileVisitResult; 
    29 import static java.nio.file.FileVisitResult.*; 
    3028import java.nio.file.Path; 
    3129import java.nio.file.Paths; 
    3230import java.nio.file.SimpleFileVisitor; 
    3331import java.nio.file.attribute.BasicFileAttributes; 
     32import java.security.MessageDigest; 
     33import java.security.NoSuchAlgorithmException; 
    3434import java.util.Collections; 
    3535import java.util.Arrays; 
     
    8888    this.properties.put("abstract", "This is a video so here is some dummy text to prevent indexer failing."); 
    8989    // B. Properties derived from filename 
    90     String title = this.properties.get("filename").substring(properties.get("filename").lastIndexOf(System.getProperty("file.separator")) + 1); 
     90    String filepath = this.properties.get("filename"); 
     91    String title = filepath.substring(filepath.lastIndexOf(System.getProperty("file.separator")) + 1); 
    9192    this.properties.put("title", title); 
    92     String ext = this.properties.get("filename").substring(properties.get("filename").lastIndexOf(".") + 1); 
     93    String ext = filepath.substring(filepath.lastIndexOf(".") + 1); 
    9394    String target_filename = "doc." + ext; 
    9495    this.properties.put("source","doc." + ext); 
    95     String assoc_filename = "D" + properties.get("docno"); 
     96    // - A unique associated directory. This gets a little tricky as we need 
     97    //   to create the directory at the same time if an effort to promote 
     98    //   synchronous behaviour 
     99    String unique_id = this.generateHash(filepath); 
     100    //   - we start with the first 4 characters 
     101    int offset = 0; 
     102    String assoc_filename = "D" + unique_id.substring(offset, offset + 4); 
     103    //   - we add ".dir" as a suffix to the directory that actually contains 
     104    //     files (so the non-suffixed version contains nested directories) 
     105    Path assoc_path = Paths.get(ApplicationSetup.TERRIER_SHARE, "images", "assoc", assoc_filename + ".dir"); 
     106    //   - then we continue adding blocks of 4 characters until we get a 
     107    //     directory that doesn't already exist 
     108    while (assoc_path.toFile().exists() && offset < unique_id.length()) 
     109    { 
     110      offset += 4; 
     111      assoc_filename += System.getProperty("file.separator") + unique_id.substring(offset, offset + 4); 
     112      assoc_path = Paths.get(ApplicationSetup.TERRIER_SHARE, "images", "assoc", assoc_filename + ".dir"); 
     113    } 
     114    //   - still not unique? but run out of unique_id... time to complain 
     115    if (assoc_path.toFile().exists()) 
     116    { 
     117      logger.error("ImageDoument - can't determine unique assocfilepath"); 
     118      System.exit(0); 
     119    } 
     120    //   - create the directories quick... hopefully before someone else does 
     121    assoc_path.toFile().mkdirs(); // bet there is a nice nio way to do this 
    96122    this.properties.put("assocfile", assoc_filename); 
    97123 
    98124    // Copy (symlink) the file into place in the shared directory 
    99125    Path raw_video_path = Paths.get(properties.get("filename")); 
    100     Path assoc_path = Paths.get(ApplicationSetup.TERRIER_SHARE, "images", "assoc", assoc_filename); 
    101     // - if the assoc path already exists, we need to recursively delete it and 
    102     //   its contents 
    103     if (Files.exists(assoc_path)) 
    104     { 
    105       logger.info("VideoDocument - removing existing (old) associated files"); 
    106       try 
    107       { 
    108         Files.walkFileTree(assoc_path, new SimpleFileVisitor<Path>() 
    109         { 
    110           @Override 
    111           public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) 
    112             throws IOException 
    113           { 
    114             ///ystem.out.println("Deleting file: " + file); 
    115             Files.delete(file); 
    116             return CONTINUE; 
    117           } 
    118           @Override 
    119           public FileVisitResult postVisitDirectory(Path dir, IOException exc) 
    120             throws IOException 
    121           { 
    122             ///ystem.out.println("Deleting dir: " + dir); 
    123             if (exc == null) 
    124             { 
    125               Files.delete(dir); 
    126               return CONTINUE; 
    127             } 
    128             else 
    129             { 
    130               throw exc; 
    131             } 
    132           } 
    133         }); 
    134       } 
    135       catch (Exception e) 
    136       { 
    137         logger.error("Exception while recursively deleting assoc folder:", e); 
    138       } 
    139     } 
    140     assoc_path.toFile().mkdirs(); // bet there is a nice nio way to do this 
    141126    Path target_path = assoc_path.resolve(target_filename); 
    142127    logger.info("VideoDocument - symlinking original video into assoc directory"); 
     
    389374  } 
    390375  /** getReader() **/ 
     376 
     377  /** 
     378   */ 
     379  private String generateHash(String string) 
     380  { 
     381    StringBuffer sb = new StringBuffer(); 
     382    try 
     383    { 
     384      final MessageDigest message_digest = MessageDigest.getInstance("MD5"); 
     385      message_digest.reset(); 
     386      message_digest.update(string.getBytes(Charset.forName("UTF8"))); 
     387      final byte[] result_bytes = message_digest.digest(); 
     388      for (int i = 0; i < result_bytes.length; ++i) 
     389      { 
     390        sb.append(Integer.toHexString((result_bytes[i] & 0xFF) | 0x100).substring(1,3)); 
     391      } 
     392    } 
     393    catch (NoSuchAlgorithmException e) 
     394    { 
     395      System.err.println("Exception: " + e); 
     396      System.exit(0); 
     397    } 
     398    return sb.toString(); 
     399  } 
     400  /** generateHash(String) **/ 
    391401}