Changeset 33652

Show
Ignore:
Timestamp:
12.11.2019 20:41:13 (3 weeks ago)
Author:
ak19
Message:

Introducing morphia subpackage

Location:
other-projects/maori-lang-detection/src/org/greenstone/atea
Files:
1 added
4 modified

Legend:

Unmodified
Added
Removed
  • other-projects/maori-lang-detection/src/org/greenstone/atea/MongoDBAccess.java

    r33651 r33652  
    2424import org.apache.log4j.Logger; 
    2525 
     26import org.greenstone.atea.morphia.*; 
     27import dev.morphia.*; 
    2628 
    2729/**  
     
    6870    private MongoClient mongo = null; 
    6971    private MongoDatabase database = null; 
    70      
    71      
     72 
     73    /**  
     74     * Mongodb Client handle via morphia, which handles the ODM (object document mapper) 
     75     * for MongoDB 
     76    */ 
     77    public Datastore datastore = null; 
     78     
    7279    public MongoDBAccess() throws Exception { 
    7380    boolean success = false; 
     
    141148    this.database = mongo.getDatabase(DB_NAME);  
    142149    */ 
     150 
     151    Morphia morphia = new Morphia(); 
     152    morphia.mapPackage("com.greenstone.atea.morphia"); 
     153    datastore = morphia.createDatastore(mongo, DB_NAME); 
     154    datastore.ensureIndexes(); 
    143155     
    144156    } 
  • other-projects/maori-lang-detection/src/org/greenstone/atea/NutchTextDumpToMongoDB.java

    r33634 r33652  
    99import org.apache.commons.csv.*; 
    1010import org.apache.log4j.Logger; 
     11 
     12//import org.bson.types.ObjectId; 
     13     
     14import org.greenstone.atea.morphia.*; 
    1115 
    1216 
     
    223227        String[] sentences = maoriTxtDetector.getAllSentences(text); 
    224228        int totalSentences = sentences.length; 
     229        int numSentencesInMRI = 0; 
    225230        ArrayList<SentenceInfo> singleSentences = maoriTxtDetector.getAllSentencesInfo(sentences); 
    226231        ArrayList<SentenceInfo> overlappingSentences = maoriTxtDetector.getAllOverlappingSentencesInfo(sentences); 
    227          
    228         WebpageInfo webpage = page.convertStoredDataToWebpageInfo(WEBPAGE_COUNTER, 
    229                                      SITE_COUNTER, 
     232 
     233        WebpageInfo webpage = page.convertStoredDataToWebpageInfo(WEBPAGE_COUNTER/*new ObjectId()*/, 
     234                                     this.siteID/*SITE_COUNTER*/, 
    230235                                     isMRI, 
    231236                                     totalSentences, 
     
    233238                                     overlappingSentences); 
    234239 
    235          
    236         mongodbAccess.insertWebpageInfo(webpage); 
     240        for(SentenceInfo si : singleSentences) { 
     241            if(si.langCode.equals(MaoriTextDetector.MAORI_3LETTER_CODE)) { 
     242            numSentencesInMRI++; 
     243            } 
     244        } 
     245        webpage.setMRISentenceCount(numSentencesInMRI); 
     246        webpage.setContainsMRI((numSentencesInMRI > 0)); 
     247         
     248        //mongodbAccess.insertWebpageInfo(webpage); 
     249        mongodbAccess.datastore.save(webpage); 
    237250        } 
    238251    } 
     
    291304    } 
    292305     
    293     //File geoLiteCityDatFile = new File(this.getClass().getResource("GeoLiteCity.dat").getFile()); 
    294     //this.geoLocationCountryCode = getCountryCodeOfDomain(this.domainOfSite, geoLiteCityDatFile); 
     306    File geoLiteCityDatFile = new File(this.getClass().getClassLoader().getResource("GeoLiteCity.dat").getFile()); 
     307    try { 
     308        this.geoLocationCountryCode = Utility.getCountryCodeOfDomain(this.domainOfSite, geoLiteCityDatFile); 
     309    } catch(Exception e) { 
     310        e.printStackTrace(); 
     311        this.geoLocationCountryCode = null; 
     312    } 
    295313 
    296314    int totalPages = pages.size();   
    297315 
    298     WebsiteInfo website = new WebsiteInfo(SITE_COUNTER, this.siteID, this.domainOfSite, 
     316    WebsiteInfo website = new WebsiteInfo(/*SITE_COUNTER,*/ this.siteID, this.domainOfSite, 
    299317          totalPages, this.countOfWebPagesWithBodyText, this.numPagesInMRI,  
    300318          this.siteCrawledTimestamp, this.siteCrawlUnfinished, redoCrawl, 
    301319          this.geoLocationCountryCode, this.urlContainsLangCodeInPath); 
    302320 
    303     mongodbAccess.insertWebsiteInfo(website); 
    304      
     321    //mongodbAccess.insertWebsiteInfo(website); 
     322    mongodbAccess.datastore.save(website); 
    305323    } 
    306324 
  • other-projects/maori-lang-detection/src/org/greenstone/atea/TextDumpPage.java

    r33634 r33652  
    88import org.apache.log4j.Logger; 
    99 
     10import org.greenstone.atea.morphia.*; 
    1011 
    1112public class TextDumpPage { 
     
    178179     */ 
    179180    public WebpageInfo convertStoredDataToWebpageInfo( 
    180       long webpageID, int websiteID, boolean isMRI, int totalSentences, 
     181      long webpageID, String siteID /*int websiteID*/, boolean isMRI, int totalSentences, 
    181182      ArrayList<SentenceInfo> singleSentences, ArrayList<SentenceInfo> overlappingSentences) 
    182183    { 
     
    188189    String fetchTime = getFetchTime(); 
    189190 
    190     WebpageInfo webpage = new WebpageInfo(webpageID, websiteID, 
     191    WebpageInfo webpage = new WebpageInfo(webpageID, siteID/*websiteID,*/, 
    191192                          pageText, pageURL, isMRI, totalSentences, 
    192193                          charEncoding, modifiedTime, fetchTime, 
  • other-projects/maori-lang-detection/src/org/greenstone/atea/TextLanguageDetector.java

    r33651 r33652  
    2929import java.util.ArrayList; 
    3030 
     31import org.greenstone.atea.morphia.*; 
     32 
    3133/** 
    3234 * EXPORT OPENNLP_HOME environment variable to be your apache OpenNLP installation.