source: other-projects/maori-lang-detection/src/org/greenstone/atea/morphia/WebsiteInfo.java@ 33808

Last change on this file since 33808 was 33808, checked in by ak19, 4 years ago

Storing not just whether /mi(/) suffix is in path, but also whether http(s):mi. is in path, as storing these can help reducing number of auto-translated sites too in a similar way.

File size: 1.7 KB
Line 
1package org.greenstone.atea.morphia;
2
3import dev.morphia.annotations.*;
4
5@Entity("Websites")
6public class WebsiteInfo {
7 //public final int id;
8 @Id
9 public final String siteFolderName;
10 public final String domain;
11
12 public final int totalPages;
13 public final int countOfWebPagesWithBodyText;
14
15 public final int numPagesInMRI;
16 public final int numPagesContainingMRI;
17
18 public final long siteCrawledTimestamp;
19 public final boolean siteCrawlUnfinished;
20 public final boolean redoCrawl;
21
22 public final String geoLocationCountryCode;
23 public final boolean urlContainsLangCodeInPathSuffix;
24 public final boolean urlContainsLangCodeInPathPrefix;
25
26 public WebsiteInfo(/*int siteCount,*/ String siteFolderName, String domainOfSite,
27 int totalPages, int countOfWebPagesWithBodyText,
28 int numPagesInMRI, int numPagesContainingMRI,
29 long siteCrawledTimestamp, boolean siteCrawlUnfinished, boolean redoCrawl,
30 String geoLocationCountryCode, boolean urlContainsLangCodeInPathSuffix, boolean urlContainsLangCodeInPathPrefix)
31 {
32 //this.id = siteCount;
33 this.siteFolderName = siteFolderName;
34 this.domain = domainOfSite;
35
36 this.totalPages = totalPages;
37 this.countOfWebPagesWithBodyText = countOfWebPagesWithBodyText;
38
39 this.numPagesInMRI = numPagesInMRI;
40 this.numPagesContainingMRI = numPagesContainingMRI;
41
42 this.siteCrawledTimestamp = siteCrawledTimestamp;
43 this.siteCrawlUnfinished = siteCrawlUnfinished;
44 this.redoCrawl = redoCrawl;
45
46 this.geoLocationCountryCode = geoLocationCountryCode;
47 this.urlContainsLangCodeInPathSuffix = urlContainsLangCodeInPathSuffix;
48 this.urlContainsLangCodeInPathPrefix = urlContainsLangCodeInPathPrefix;
49 }
50}
Note: See TracBrowser for help on using the repository browser.