Changeset 33469

Show
Ignore:
Timestamp:
13.09.2019 21:46:09 (5 weeks ago)
Author:
ak19
Message:

Don't want URLs with the word product(s) in them (but production should be acceptable). As products URLs tend to be autotranslated and poorly so into Maori.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • gs3-extensions/maori-lang-detection/src/org/greenstone/atea/WETProcessor.java

    r33468 r33469  
    200200     
    201201    File parentFolder = null; 
    202      
    203     if(lineCount >= MIN_LINE_COUNT && contentLength >= MIN_CONTENT_LENGTH) { 
     202 
     203    // want to match "product(s)" but not "production" 
     204     
     205    //if(recordURI.matches(".*/?product[^a-rt-z].*")) {//if(recordURI.matches(".*/?products?/?.*")) { 
     206    if(recordURI.contains("product") && !recordURI.contains("production")) { 
     207 
     208        // don't want a "translated" product site/online store 
     209        // These curiously often tend to have "product(s)" in the URL 
     210        parentFolder = WETProcessor.discardFolder; 
     211    }    
     212    else if(lineCount >= MIN_LINE_COUNT && contentLength >= MIN_CONTENT_LENGTH) { 
    204213        parentFolder = WETProcessor.keepFolder; 
    205214        System.err.println("@@@KEEPING");