Changeset 33653 for other-projects
- Timestamp:
- 2019-11-12T20:51:48+13:00 (4 years ago)
- Location:
- other-projects/maori-lang-detection
- Files:
-
- 4 added
- 2 edited
- 3 moved
Legend:
- Unmodified
- Added
- Removed
-
other-projects/maori-lang-detection/MoreReading/mongodb.txt
r33646 r33653 316 316 db.getCollection('webpages').find({"isMRI": true, "singleSentences.langCode": "mri"}) 317 317 db.getCollection('webpages').find({"singleSentences": { $elemMatch: {"langCode":"mri"} } }, {"singleSentences.$": "mri"}) 318 319 318 db.getCollection('Webpages').find({"isMRI": true, "singleSentences": { $elemMatch: {"langCode":"eng"} } }, {"singleSentences.$": "eng"}) [single English lang sentence] 319 db.getCollection('Webpages').find({"containsMRI": true, "singleSentences": { $elemMatch: {"langCode":"mri"} } }, {"singleSentences.$": "mri"}) [gets 1st sentence of docs which have sentences containing MRI] 320 320 321 321 -
other-projects/maori-lang-detection/src/org/greenstone/atea/MongoDBAccess.java
r33652 r33653 165 165 } 166 166 167 167 /* 168 168 public void insertWebsiteInfo(WebsiteInfo website) 169 169 { … … 188 188 + " inserted successfully into " + WEBSITES_COLLECTION); 189 189 } 190 191 /* TODO:192 https://stackoverflow.com/questions/39433775/mongodb-java-inserting-throws-org-bson-codecs-configuration-codecconfigurationex193 190 */ 191 194 192 /** 195 193 * Inserts a web page into the mongodb. Besides page related metadata and full body text 196 194 * the language information per sentence and per 2 adjacent sentences also get stored 197 195 * into the mongodb. 198 */ 196 */ 197 /* 199 198 public void insertWebpageInfo(WebpageInfo webpage) 200 199 { … … 255 254 logger.debug("\nwebpage info for " + webpage.webpageID + " inserted successfully into " + WEBPAGES_COLLECTION); 256 255 } 256 */ 257 257 258 258 /** https://stackoverflow.com/questions/19938153/do-i-need-to-explicitly-close-connection */ -
other-projects/maori-lang-detection/src/org/greenstone/atea/morphia/SentenceInfo.java
r33652 r33653 1 package org.greenstone.atea ;1 package org.greenstone.atea.morphia; 2 2 3 import dev.morphia.annotations.*; 4 5 @Entity("Sentences") 3 6 public class SentenceInfo { 4 7 public final double confidenceLevel; -
other-projects/maori-lang-detection/src/org/greenstone/atea/morphia/WebpageInfo.java
r33652 r33653 1 package org.greenstone.atea ;1 package org.greenstone.atea.morphia; 2 2 3 import dev.morphia.annotations.*; 3 4 import java.util.ArrayList; 5 import java.util.List; 4 6 7 /** 8 * Morphia provides the Object Document Mapper for MongoDB 9 * https://www.baeldung.com/mongodb-morphia 10 * 11 */ 12 @Entity("Webpages") 5 13 public class WebpageInfo { 6 14 7 private int mriSentenceCount;8 9 15 /** db table ids */ 16 @Id 10 17 public final long webpageID; 11 public final int websiteID; 18 // TODO: should this be a "Reference" to the WebsiteInfo object instead? 19 // See section 5.2 of https://www.baeldung.com/mongodb-morphia 20 public final String websiteID; //int websiteID; 12 21 13 22 public final int totalSentences; … … 20 29 public final String modifiedTime; 21 30 public final String fetchTime; 22 public final ArrayList<SentenceInfo> singleSentences; 23 public final ArrayList<SentenceInfo> overlappingSentences; 24 25 public WebpageInfo (long webpageID, int websiteID, 31 32 @Embedded 33 public final List<SentenceInfo> singleSentences; 34 @Embedded 35 public final List<SentenceInfo> overlappingSentences; 36 37 private int mriSentenceCount; 38 private boolean containsMRI; 39 40 public WebpageInfo (long webpageID, String siteID/*int websiteID,*/, 26 41 String pageText, String pageURL, boolean isMRI, int totalSentences, 27 42 String charEncoding, String modifiedTime, String fetchTime, 28 ArrayList<SentenceInfo> singleSentences,29 ArrayList<SentenceInfo> overlappingSentences)43 List<SentenceInfo> singleSentences, 44 List<SentenceInfo> overlappingSentences) 30 45 { 31 46 32 47 this.webpageID = webpageID; 33 this.websiteID = websiteID; 48 //this.websiteID = websiteID; 49 this.websiteID = siteID; 34 50 35 51 this.totalSentences = totalSentences; … … 51 67 this.mriSentenceCount = count; 52 68 } 69 public void setContainsMRI(boolean containsMRI) { 70 this.containsMRI = containsMRI; 71 } 53 72 54 73 public int getMRISentenceCount() { return this.mriSentenceCount; } -
other-projects/maori-lang-detection/src/org/greenstone/atea/morphia/WebsiteInfo.java
r33652 r33653 1 package org.greenstone.atea ;1 package org.greenstone.atea.morphia; 2 2 3 import dev.morphia.annotations.*; 4 5 @Entity("Websites") 3 6 public class WebsiteInfo { 4 5 public final int id;7 //public final int id; 8 @Id 6 9 public final String siteFolderName; 7 10 public final String domain; … … 18 21 public final boolean urlContainsLangCodeInpath; 19 22 20 public WebsiteInfo( int siteCount,String siteFolderName, String domainOfSite,23 public WebsiteInfo(/*int siteCount,*/ String siteFolderName, String domainOfSite, 21 24 int totalPages, int countOfWebPagesWithBodyText, int numPagesInMRI, 22 25 long siteCrawledTimestamp, boolean siteCrawlUnfinished, boolean redoCrawl, 23 26 String geoLocationCountryCode, boolean urlContainsLangCodeInpath) 24 27 { 25 this.id = siteCount;28 //this.id = siteCount; 26 29 this.siteFolderName = siteFolderName; 27 30 this.domain = domainOfSite;
Note:
See TracChangeset
for help on using the changeset viewer.