- Timestamp:
- 2019-11-08T19:43:39+13:00 (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gs3-extensions/maori-lang-detection/src/org/greenstone/atea/MongoDBAccess.java
r33626 r33633 47 47 private static Logger logger = Logger.getLogger(org.greenstone.atea.MongoDBAccess.class.getName()); 48 48 49 String HOST = "localhost";50 int PORT = 27017; // mongodb port51 String PROPS_FILENAME = "config.properties";52 String DB_NAME = "ateacrawldata";49 static final String PROPS_FILENAME = "config.properties"; 50 public static final String DB_NAME = "anupama"; //"ateacrawldata"; 51 public static final String WEBPAGES_COLLECTION = "webpages"; 52 public static final String WEBSITES_COLLECTION = "websites"; 53 53 54 private String HOST = "localhost"; 55 private int PORT = 27017; // mongodb port 54 56 private String USERNAME; 55 private String PASSWORD; 56 57 private String PASSWORD; 57 58 58 59 private MongoClient mongo = null; 59 60 private MongoDatabase database = null; 61 60 62 61 63 public MongoDBAccess() throws Exception { … … 118 120 logger.info("Credentials: "+ credential); 119 121 } 122 120 123 124 public void insertWebSiteInfo(int SITE_COUNTER, int siteID, String domainOfSite, 125 int numPages, int numPagesInMRI, int numPagesContainingMRI, 126 /* TODO: String geoLocationCountryCode, boolean miURL */ 127 String siteCrawledTimestamp, String siteCrawlUnfinished, boolean redoCrawl) 128 { 129 MongoCollection<Document> collection = this.database.getCollection(WEBSITES_COLLECTION); 130 Document document = new Document("id", SITE_COUNTER) 131 .append("siteFolderName", siteID) 132 .append("domain", domainOfSite) 133 .append("totalPages", numPages) 134 .append("numPagesInMRI", numPagesInMRI) 135 .append("numPagesContainingMRI", numPagesContainingMRI) 136 .append("siteCrawledTimestamp", siteCrawledTimestamp) 137 .append("siteCrawlUnfinished", siteCrawlUnfinished) 138 .append("redoCrawl", redoCrawl); 139 collection.insertOne(document); 140 System.out.println("website info inserted successfully into " + WEBSITES_COLLECTION); 141 } 121 142 122 /* 123 public void insertDocument() { 124 MongoCollection<Document> collection = this.database.getCollection("sampleCollection"); 143 144 public void insertWebPage(int WEBPAGE_COUNTER, int site_id, /* id of websites_collection*/ 145 String url, String charEncoding, String modTime, String fetchTime, 146 boolean isMRI, int totalSentences, int numSentencesInMRI, 147 ArrayList<SentenceInfo> singleSentences, 148 ArrayList<SentenceInfo> overlappingSentences) 149 { 150 // load the webpages db 'table' 151 // in mongodb, the equivalent of db tables are called 'collections' 152 MongoCollection<Document> collection = this.database.getCollection(WEBPAGES_COLLECTION); 153 154 Document document = new Document("id", WEBPAGE_COUNTER) 155 .append("siteid", site_id) 156 .append("url", url) 157 .append("charEncoding", charEncoding) 158 .append("modTime", modTime) 159 .append("fetchTime", fetchTime) 160 .append("isMRI", isMRI) 161 .append("totalSentences", totalSentences) 162 .append("numSentencesInMRI", numSentencesInMRI); 163 164 document.put("singleSentences", singleSentences); 165 document.put("overlappingSentences", overlappingSentences); 166 167 collection.insertOne(document); 168 System.out.println("website info inserted successfully into " + WEBPAGES_COLLECTION); 125 169 } 126 */170 127 171 128 172 // TODO:
Note:
See TracChangeset
for help on using the changeset viewer.