source: other-projects/maori-lang-detection/src/org/greenstone/atea/morphia/WebsiteInfo.java@ 33801

Last change on this file since 33801 was 33801, checked in by ak19, 4 years ago
  1. NutchTextDumpToMongoDB Added an extra field to each document in Websites mongodb collection: numPagesContainingMRI. 2. Bugfix to yesterday's commit: performing a substring() was off by one.
File size: 1.5 KB
Line 
1package org.greenstone.atea.morphia;
2
3import dev.morphia.annotations.*;
4
5@Entity("Websites")
6public class WebsiteInfo {
7 //public final int id;
8 @Id
9 public final String siteFolderName;
10 public final String domain;
11
12 public final int totalPages;
13 public final int countOfWebPagesWithBodyText;
14
15 public final int numPagesInMRI;
16 public final int numPagesContainingMRI;
17
18 public final long siteCrawledTimestamp;
19 public final boolean siteCrawlUnfinished;
20 public final boolean redoCrawl;
21
22 public final String geoLocationCountryCode;
23 public final boolean urlContainsLangCodeInPath;
24
25 public WebsiteInfo(/*int siteCount,*/ String siteFolderName, String domainOfSite,
26 int totalPages, int countOfWebPagesWithBodyText,
27 int numPagesInMRI, int numPagesContainingMRI,
28 long siteCrawledTimestamp, boolean siteCrawlUnfinished, boolean redoCrawl,
29 String geoLocationCountryCode, boolean urlContainsLangCodeInPath)
30 {
31 //this.id = siteCount;
32 this.siteFolderName = siteFolderName;
33 this.domain = domainOfSite;
34
35 this.totalPages = totalPages;
36 this.countOfWebPagesWithBodyText = countOfWebPagesWithBodyText;
37
38 this.numPagesInMRI = numPagesInMRI;
39 this.numPagesContainingMRI = numPagesContainingMRI;
40
41 this.siteCrawledTimestamp = siteCrawledTimestamp;
42 this.siteCrawlUnfinished = siteCrawlUnfinished;
43 this.redoCrawl = redoCrawl;
44
45 this.geoLocationCountryCode = geoLocationCountryCode;
46 this.urlContainsLangCodeInPath = urlContainsLangCodeInPath;
47 }
48}
Note: See TracBrowser for help on using the repository browser.