Changeset 33909 for other-projects/maori-lang-detection/src/org/greenstone/atea/NutchTextDumpToMongoDB.java
- Timestamp:
- 2020-02-12T19:02:44+13:00 (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/maori-lang-detection/src/org/greenstone/atea/NutchTextDumpToMongoDB.java
r33906 r33909 77 77 78 78 private String domainOfSite; 79 private String baseSiteDomain; // domainOfSite stripped of any http(s)://www.79 //private String baseSiteDomain; // domainOfSite stripped of any http(s)://www. 80 80 private int numPagesInMRI = 0; 81 81 private int numPagesContainingMRI = 0; … … 203 203 String url = firstPage.getPageURL(); 204 204 this.domainOfSite = Utility.getDomainForURL(url, true); 205 this.baseSiteDomain = Utility.stripProtocolAndWWWFromURL(this.domainOfSite);205 //this.baseSiteDomain = Utility.stripProtocolAndWWWFromURL(this.domainOfSite); 206 206 } 207 207 else { 208 208 this.domainOfSite = "UNKNOWN"; 209 this.baseSiteDomain = "UNKNOWN";209 //this.baseSiteDomain = "UNKNOWN"; 210 210 } 211 211 … … 343 343 344 344 WebsiteInfo website = new WebsiteInfo(/*SITE_COUNTER,*/ this.siteID, 345 this.domainOfSite, this.baseSiteDomain,345 this.domainOfSite, //this.baseSiteDomain, 346 346 totalPages, this.countOfWebPagesWithBodyText, 347 347 this.numPagesInMRI, this.numPagesContainingMRI,
Note:
See TracChangeset
for help on using the changeset viewer.