source: other-projects/maori-lang-detection/src/org/greenstone/atea/MongoDBAccess.java@ 33883

Last change on this file since 33883 was 33883, checked in by ak19, 4 years ago

Clarifications

File size: 20.9 KB
RevLine 
[33616]1package org.greenstone.atea;
2
[33645]3//import org.bson.BSONObject;
[33869]4
[33876]5import com.mongodb.client.AggregateIterable;
[33623]6import com.mongodb.client.MongoCollection;
[33616]7import com.mongodb.client.MongoDatabase;
[33634]8//import com.mongodb.client.MongoIterable;
[33869]9
10// to use collection.find() filters like eq(), regex() etc
11import static com.mongodb.client.model.Filters.*;
12// to use collection.find().projection() filters like include() etc
13import static com.mongodb.client.model.Projections.*;
[33876]14// to use aggregation functions like unwind(), match(), sort() etc
15import static com.mongodb.client.model.Aggregates.*;
16// to use functions like sum() and addToSet() within aggregation functions
17import static com.mongodb.client.model.Accumulators.*;
[33869]18
[33880]19
[33869]20//import org.bson.conversions.Bson;
[33634]21import com.mongodb.BasicDBObject;
[33616]22import com.mongodb.MongoClient;
[33634]23import com.mongodb.MongoCredential;
24import com.mongodb.ServerAddress;
25import com.mongodb.MongoClientOptions;
[33616]26
[33869]27import com.mongodb.Block;
28
[33623]29import org.bson.Document;
[33876]30import org.bson.conversions.Bson;
[33880]31import org.bson.json.JsonMode;
32import org.bson.json.JsonWriterSettings;
[33876]33
34import com.mongodb.util.JSON;
35//import com.mongodb.DBObject;
[33623]36
[33880]37
38import com.google.gson.*; // for pretty printing
39
[33616]40import java.io.BufferedReader;
41import java.io.File;
42import java.io.FileReader;
[33880]43import java.io.IOException;
[33881]44import java.io.UncheckedIOException;
[33880]45import java.io.Writer;
46
[33876]47import java.util.Arrays;
[33634]48import java.util.ArrayList;
49import java.util.List;
[33616]50import java.util.Properties;
[33869]51import java.util.regex.Pattern;
[33616]52
53import org.apache.log4j.Logger;
54
[33652]55import org.greenstone.atea.morphia.*;
56import dev.morphia.*;
[33616]57
58/**
59 * https://www.tutorialspoint.com/mongodb/mongodb_java.htm
60 *
61 * TO COMPILE:
62 * maori-lang-detection/src$
[33622]63 * javac -cp ".:../conf:../lib/*" org/greenstone/atea/MongoDBAccess.java
[33616]64 *
65 * TO RUN:
[33622]66 * java -cp ".:../conf:../lib/*" org.greenstone.atea.MongoDBAccess
[33623]67 *
68 * Manually connecting to mongodb from client:
69 * mongo 'mongodb://mongodb.cms.waikato.ac.nz:27017' -u USERNAME -p
70 * Then after connecting with pwd, type:
71 * use DBNAME
72 *
73 * Or connect to mongodb and specify db in one statement:
74 * mongo 'mongodb://mongodb.cms.waikato.ac.nz:27017/DBNAME?authSource=admin' -u USERNAME -p
75 *
76 * Some links:
77 * - https://stackoverflow.com/questions/19938153/do-i-need-to-explicitly-close-connection
78 * - https://docs.mongodb.com/manual/reference/glossary/ (particularly "collection")
79 * - https://tecadmin.net/tutorial/mongodb/drop-collection/
80 * IMPORTANT LINK:
81 * - https://www.mongodb.com/blog/post/6-rules-of-thumb-for-mongodb-schema-design-part-1
82 *
[33616]83 */
[33634]84public class MongoDBAccess implements AutoCloseable {
[33616]85
[33622]86 private static Logger logger = Logger.getLogger(org.greenstone.atea.MongoDBAccess.class.getName());
[33616]87
[33633]88 static final String PROPS_FILENAME = "config.properties";
[33869]89 public static final String WEBPAGES_COLLECTION = "Webpages";
[33876]90 public static final String WEBSITES_COLLECTION = "Websites";
[33880]91
92 public static final String NEWLINE = System.getProperty("line.separator");
[33876]93
94 /** mongodb filter types to execute */
95 public static final int IS_MRI = 0;
96 public static final int CONTAINS_MRI = 1;
[33634]97
[33882]98 /** Some reused fieldnames in the Websites collection */
99 private static final String FILTER_NUMPAGES_IN_MRI = "numPagesInMRI";
100 private static final String FILTER_NUMPAGES_CONTAINING_MRI = "numPagesContainingMRI";
101
[33634]102 // configuration details, some with fallback values
[33633]103 private String HOST = "localhost";
104 private int PORT = 27017; // mongodb port
[33616]105 private String USERNAME;
[33633]106 private String PASSWORD;
[33634]107 private String DB_NAME ="ateacrawldata";
108
[33623]109 private MongoClient mongo = null;
110 private MongoDatabase database = null;
[33652]111
112 /**
113 * Mongodb Client handle via morphia, which handles the ODM (object document mapper)
114 * for MongoDB
115 */
116 public Datastore datastore = null;
117
[33622]118 public MongoDBAccess() throws Exception {
[33616]119 boolean success = false;
120
121 // Read in the username and password from our props file
122 Properties props = new Properties();
123
124 //File propsFile = new File(PROPS_FILENAME);
125 //logger.debug("*** Conf props filename: " + propsFile.getAbsolutePath());
126 try {
127 props.load(getClass().getClassLoader().getResourceAsStream(PROPS_FILENAME));
128 } catch(Exception e) {
129 logger.error(e);
130 }
131
132
133 USERNAME = props.getProperty("mongodb.user", "");
134 if(USERNAME.equals("")) {
135 USERNAME = "root";
[33623]136 logger.warn("WARNING: No sensible value for mongodb.user specified in " + PROPS_FILENAME + ". Attempting to use: " + USERNAME);
[33616]137 }
138 PASSWORD = props.getProperty("mongodb.pwd");
139
140 logger.debug("Got pwd: " + PASSWORD);
141
142 if(PASSWORD != null && PASSWORD.equals("CHANGEME")) {
143
144 success = false;
145 throw new Exception("************ FATAL ERROR: Change DB password in properties file " + PROPS_FILENAME);
146 }
[33623]147
148 HOST = props.getProperty("mongodb.host", HOST);
149 String port = props.getProperty("mongodb.port", Integer.toString(PORT));
150 PORT = Integer.parseInt(port);
151 DB_NAME = props.getProperty("mongodb.dbname", DB_NAME);
152
153 logger.info("Connecting to mongodb with:");
154 logger.info(" - host: " + HOST);
155 logger.info(" - port: " + PORT);
156 logger.info(" - user: " + USERNAME);
157 logger.info(" - db name: " + DB_NAME);
[33616]158 }
159
[33623]160 /**
161 * Since we have only a single MongoClient, don't need to call close/disconnect on it as per
162 * https://stackoverflow.com/questions/19938153/do-i-need-to-explicitly-close-connection
163 */
[33616]164 public void connectToDB() throws Exception {
[33634]165
[33616]166 // Creating a Mongo client
[33623]167 mongo = new MongoClient( HOST, PORT );
[33616]168
169 // Creating Credentials
170 MongoCredential credential;
171 credential = MongoCredential.createCredential(USERNAME, DB_NAME, PASSWORD.toCharArray());
172 System.out.println("Connected to the database successfully");
173
174 // Accessing the database
[33634]175 this.database = mongo.getDatabase(DB_NAME);
[33623]176 logger.info("Credentials: "+ credential);
[33634]177
178 /*
179 MongoCredential credential;
180 credential = MongoCredential.createCredential(USERNAME, DB_NAME, PASSWORD.toCharArray());
181 logger.info("Credentials: "+ credential);
182
183 // Create our Mongo client
184 mongo = new MongoClient( new ServerAddress(HOST, PORT), credential, new MongoClientOptions.Builder().build());
185 System.out.println("Connected to the database successfully");
186
187 this.database = mongo.getDatabase(DB_NAME);
188 */
[33652]189
190 Morphia morphia = new Morphia();
191 morphia.mapPackage("com.greenstone.atea.morphia");
192 datastore = morphia.createDatastore(mongo, DB_NAME);
193 datastore.ensureIndexes();
[33634]194
[33616]195 }
[33633]196
[33634]197 // TODO: which fields should be indexed?
198
199 public void showCollections() {
200 //MongoIterable<String> colls = this.database.listCollectionNames();
201 for(String coll : this.database.listCollectionNames()) {
202 System.err.println("coll: " + coll);
203 }
204 }
[33616]205
[33653]206 /*
[33634]207 public void insertWebsiteInfo(WebsiteInfo website)
[33633]208 {
209 MongoCollection<Document> collection = this.database.getCollection(WEBSITES_COLLECTION);
[33634]210 Document document = new Document("_id", website.id)
211 .append("siteFolderName", website.siteFolderName)
212 .append("domain", website.domain)
213 .append("totalPages", website.totalPages)
214 .append("numPagesWithBodyText", website.countOfWebPagesWithBodyText)
215 .append("numPagesInMRI", website.numPagesInMRI)
216 .append("siteCrawledTimestamp", website.siteCrawledTimestamp)
217 .append("siteCrawlUnfinished", website.siteCrawlUnfinished)
218 .append("redoCrawl", website.redoCrawl);
219
220 document.put("urlContainsLangCodeInpath", website.urlContainsLangCodeInpath);
221 if(website.geoLocationCountryCode != null && !website.geoLocationCountryCode.equals("")) {
222 document.put("countryCode", website.geoLocationCountryCode);
223 }
224
[33633]225 collection.insertOne(document);
[33634]226 logger.debug("Website info for " + website.id + "(" + website.siteFolderName + ")"
227 + " inserted successfully into " + WEBSITES_COLLECTION);
[33633]228 }
[33653]229 */
[33616]230
[33651]231 /**
232 * Inserts a web page into the mongodb. Besides page related metadata and full body text
233 * the language information per sentence and per 2 adjacent sentences also get stored
234 * into the mongodb.
[33653]235 */
236 /*
[33634]237 public void insertWebpageInfo(WebpageInfo webpage)
[33633]238 {
[33651]239 int mri_sentence_count = 0;
240
[33633]241 // load the webpages db 'table'
242 // in mongodb, the equivalent of db tables are called 'collections'
243 MongoCollection<Document> collection = this.database.getCollection(WEBPAGES_COLLECTION);
244
[33634]245 Document document = new Document("_id", webpage.webpageID)
246 .append("siteid", webpage.websiteID)
247 .append("url", webpage.URL)
248 .append("isMRI", webpage.isMRI)
249 .append("totalSentences", webpage.totalSentences)
250 .append("charEncoding", webpage.charEncoding)
251 .append("modTime", webpage.modifiedTime)
252 .append("fetchTime", webpage.fetchTime);
[33633]253
[33634]254 // INSTEAD, ARRAY OF OBJECTS TO BE INSERTED AS PER:
255 // https://stackoverflow.com/questions/15371839/how-to-add-an-array-to-a-mongodb-document-using-java
256 List<BasicDBObject> sentencesList = new ArrayList<>();
[33645]257 for(SentenceInfo sentenceInfo : webpage.singleSentences) {
[33651]258
[33645]259 BasicDBObject bsonRecord = new BasicDBObject("langCode", sentenceInfo.langCode);
[33651]260
[33645]261 bsonRecord.put("confidence", sentenceInfo.confidenceLevel);
262 bsonRecord.put("sentence", sentenceInfo.sentence);
263
264 sentencesList.add(bsonRecord);
[33651]265
266 if(sentenceInfo.langCode.equals(MaoriTextDetector.MAORI_3LETTER_CODE)) {
267 mri_sentence_count++;
268 }
269
[33634]270 }
271 document.put("singleSentences", sentencesList);
[33633]272
[33634]273 List<BasicDBObject> overlappingSentencesList = new ArrayList<>();
[33645]274 for(SentenceInfo sentenceInfo : webpage.overlappingSentences) {
[33651]275
[33645]276 BasicDBObject bsonRecord = new BasicDBObject("langCode", sentenceInfo.langCode);
277 bsonRecord.put("confidence", sentenceInfo.confidenceLevel);
278 bsonRecord.put("sentence", sentenceInfo.sentence);
[33651]279
280 overlappingSentencesList.add(bsonRecord);
[33634]281 }
[33645]282 document.put("overlappingSentences", overlappingSentencesList);
283
[33634]284 // also put the full text in there
285 document.put("text", webpage.text);
[33651]286
287 // also store the count of sentences in MRI
288 webpage.setMRISentenceCount(mri_sentence_count);
289 document.put("mriSentenceCount", mri_sentence_count);
290
[33634]291
[33633]292 collection.insertOne(document);
[33634]293 logger.debug("\nwebpage info for " + webpage.webpageID + " inserted successfully into " + WEBPAGES_COLLECTION);
[33623]294 }
[33653]295 */
[33876]296
297 public ArrayList<String> queryAllMatchingIsMRIURLs(String domain) {
298 return queryAllMatchingURLsFilteredBy(domain, IS_MRI);
299 }
300 public ArrayList<String> queryAllMatchingcontainsMRIURLs(String domain) {
301 return queryAllMatchingURLsFilteredBy(domain, CONTAINS_MRI);
302 }
303
[33869]304 /**
305 * Java mongodb find: https://mongodb.github.io/mongo-java-driver/3.4/driver/getting-started/quick-start/
306 * Java mongodb find filters: https://mongodb.github.io/mongo-java-driver/3.4/javadoc/?com/mongodb/client/model/Filters.html
307 * Java mongodb projection: https://stackoverflow.com/questions/44894497/retrieving-data-with-mongodb-java-driver-3-4-using-find-method-with-projection
308 * mongodb projection: https://docs.mongodb.com/v3.2/reference/method/db.collection.find/#db.collection.find
[33870]309 *
310 * Parse MongoDB query into Java: https://stackoverflow.com/questions/17326747/parsing-strings-to-mongodb-query-documents-with-operators-in-java
311 * Maybe also https://stackoverflow.com/questions/48000891/parse-mongodb-json-query-in-java-with-multiple-criteria
312 * https://stackoverflow.com/questions/55029222/parse-mongodb-query-to-java
313 * http://pingax.com/trick-convert-mongo-shell-query-equivalent-java-objects/
[33876]314 */
315 public ArrayList<String> queryAllMatchingURLsFilteredBy(String domain, int filterType) {
[33869]316
317 final ArrayList<String> urlsList = new ArrayList<String>();
[33870]318
319 // remove any http(s)://(www.) from the start of URL first
320 // since it goes into a regex
321 domain = Utility.stripProtocolAndWWWFromURL(domain);
[33869]322
323 // load the "webpages" db table
[33871]324 // in mongodb, the equivalent of db tables are called 'collections'
[33869]325 MongoCollection<Document> collection = this.database.getCollection(WEBPAGES_COLLECTION);
326
[33871]327 // code we'll execute in Iterable.forEach() below
[33881]328 // see also https://www.baeldung.com/foreach-java
[33869]329 Block<Document> storeURL = new Block<Document>() {
330 @Override
331 public void apply(final Document document) {
332 //System.out.println(document.toJson());
333 String url = document.getString("URL");
334 // add to our urlsList
[33871]335 //System.out.println(url);
[33869]336 urlsList.add(url);
337 }
338 };
339
340
[33871]341 // Run the following mongodb query:
342 // db.getCollection('Webpages').find({URL: /domain/, isMRI: true}, {URL: 1, _id: 0})
[33869]343
[33871]344 // 1. One way that works:
345 //collection.find(and(eq("isMRI", true), regex("URL", pattern))).projection(fields(include("URL"), excludeId())).forEach(storeURL);
[33870]346
[33871]347 // 2. Another way:
[33876]348 //String query = "{URL: /DOMAIN/, isMRI: true}";
349 String query = "{URL: /DOMAIN/, ";
350 if(filterType == IS_MRI) {
351 query += "isMRI: true}";
352 } else if(filterType == CONTAINS_MRI) {
353 query += "containsMRI: true}";
354 }
355
[33871]356 domain = domain.replace(".", "\\."); // escape dots in domain for regex
[33870]357 query = query.replace("DOMAIN", domain);
[33871]358
359 //System.err.println("Executing find query: " + query);
[33870]360
361 BasicDBObject findObj = BasicDBObject.parse(query);
362 BasicDBObject projectionObj = BasicDBObject.parse("{URL: 1, _id: 0}");
363
[33871]364
[33870]365 collection.find(findObj).projection(projectionObj).forEach(storeURL);
366
367 return urlsList;
368 }
369
[33881]370 /**
371 * RUNNING A MONGODB COLLECTION.AGGREGATE() in JAVA:
372 *
373 * https://stackoverflow.com/questions/31643109/mongodb-aggregation-with-java-driver
374 * https://stackoverflow.com/questions/48000891/parse-mongodb-json-query-in-java-with-multiple-criteria
375 * Not Java: https://stackoverflow.com/questions/39060221/a-pipeline-stage-specification-object-must-contain-exactly-one-field-with-php-mo
376 *
377 * (https://stackoverflow.com/questions/55029222/parse-mongodb-query-to-java)
378 * https://www.programcreek.com/java-api-examples/?api=com.mongodb.client.model.Aggregates
379 * On using group(TExpression) inside collection.aggregate().
380 *
381 * For forEach lamba expressions, see also https://www.baeldung.com/foreach-java
382 * and https://www.javatpoint.com/java-8-foreach
383 * and https://stackoverflow.com/questions/47979978/ambiguous-reference-to-foreach-when-listing-mongodbs-database-in-java
384 *
385 *
386 * The mongodb aggregate() we want to run this time:
387 *
[33880]388 db.Websites.aggregate([
389 {
390 $match: {
391 $and: [
392 {numPagesContainingMRI: {$gt: 0}},
393 {$or: [{geoLocationCountryCode:"NZ"},{domain: /\.nz/}]}
394 ]
395 }
396 },
397 { $unwind: "$geoLocationCountryCode" },
398 {
399 $group: {
400 _id: "nz",
401 count: { $sum: 1 },
402 domain: { $addToSet: '$domain' }
403 }
404 },
405 { $sort : { count : -1} }
406 ]);
407 */
[33882]408 public void aggregateContainsMRIForNZ(Writer writer, int filterType) throws IOException {
[33880]409 // working with the WebSites collection, not WebPages collection!
410 MongoCollection<Document> collection = this.database.getCollection(WEBSITES_COLLECTION);
411
[33882]412 String mriFilterString = (filterType == CONTAINS_MRI) ? "{numPagesContainingMRI: {$gt: 0}}" : "{numPagesInMRI: {$gt: 0}}";
[33880]413
414 Bson orQuery = or(
415 BasicDBObject.parse("{geoLocationCountryCode: \"NZ\"}"),
416 BasicDBObject.parse("{domain: /\\.nz/}")
417 );
418 Bson andQuery = and(
[33882]419 BasicDBObject.parse(mriFilterString),
[33880]420 orQuery);
421
[33881]422 // Hopefully the lambda expression (forEach()) at end means
423 // we write out each result Document as we get it
424 collection.aggregate(Arrays.asList(
[33880]425 match(andQuery),
426 unwind("$geoLocationCountryCode"),
427 group("NZ", Arrays.asList(sum("count", 1), addToSet("domain", "$domain"))),
428 sort(BasicDBObject.parse("{count : -1}"))
[33881]429 )).forEach((Block<Document>)doc -> writeDoc(doc, writer));
[33880]430
[33881]431 // should only have one doc for NZ since it's a count by geolocation.
[33880]432
433 return;
434 }
435
[33876]436 /**
[33881]437 * The aggregate() we want to run this time:
438 *
[33876]439 db.Websites.aggregate([
[33879]440 {
441 $match: {
[33876]442 $and: [
443 {geoLocationCountryCode: {$ne: "NZ"}},
444 {domain: {$not: /\.nz/}},
445 {numPagesContainingMRI: {$gt: 0}},
446 {$or: [{geoLocationCountryCode: "AU"}, {urlContainsLangCodeInPath: false}]}
447 ]
[33879]448 }
449 },
450 { $unwind: "$geoLocationCountryCode" },
451 {
452 $group: {
[33876]453 _id: {$toLower: '$geoLocationCountryCode'},
454 count: { $sum: 1 },
455 domain: { $addToSet: '$domain' }
[33879]456 }
457 },
458 { $sort : { count : -1} }
459 ]);
[33876]460 */
[33882]461 public void aggregateContainsMRIForOverseas(Writer writer, int filterType,
462 boolean isMiInURLPath) throws UncheckedIOException
463 {
[33876]464 // working with the WebSites collection, not WebPages collection!
465 MongoCollection<Document> collection = this.database.getCollection(WEBSITES_COLLECTION);
[33882]466
467 String mriFilterString = (filterType == CONTAINS_MRI) ? "{numPagesContainingMRI: {$gt: 0}}" : "{numPagesInMRI: {$gt: 0}}";
[33869]468
[33876]469 Bson orQuery = or(
470 BasicDBObject.parse("{geoLocationCountryCode: \"AU\"}"),
[33882]471 BasicDBObject.parse("{urlContainsLangCodeInPath: "+ isMiInURLPath +"}")
472 // e.g. "{urlContainsLangCodeInPath: false}"
[33876]473 );
474 Bson andQuery = and(
475 BasicDBObject.parse("{geoLocationCountryCode: {$ne: \"NZ\"}}"),
476 BasicDBObject.parse("{domain: {$not: /\\.nz/}}"),
[33882]477 BasicDBObject.parse(mriFilterString),
[33876]478 orQuery);
[33881]479
480
481 collection.aggregate(Arrays.asList(
482 match(andQuery), //match(BasicDBObject.parse(matchQuery))
483 // match((List<DBObject>)JSON.parse(matchQuery)),
484 unwind("$geoLocationCountryCode"),
485 group("$geoLocationCountryCode", Arrays.asList(sum("count", 1), addToSet("domain", "$domain"))),
486 sort(BasicDBObject.parse("{count : -1}"))
487 )).forEach((Block<Document>)doc -> writeDoc(doc, writer));
488
489 // casting to Block<Document> necessary because otherwise we see the error at
490 // https://stackoverflow.com/questions/47979978/ambiguous-reference-to-foreach-when-listing-mongodbs-database-in-java
491
492 // Less efficient way is to keep all the results in memory and then
493 // write them out one at a time
494 /*
[33876]495 AggregateIterable<Document> output
496 = collection.aggregate(Arrays.asList(
497 match(andQuery), //match(BasicDBObject.parse(matchQuery))
498 // match((List<DBObject>)JSON.parse(matchQuery)),
499 unwind("$geoLocationCountryCode"),
500 group("$geoLocationCountryCode", Arrays.asList(sum("count", 1), addToSet("domain", "$domain"))),
501 sort(BasicDBObject.parse("{count : -1}"))
[33879]502 ));
[33881]503
[33876]504
505 for (Document doc : output) {
506 //System.out.println(doc);
507 System.out.println(doc.toJson());
[33881]508
[33876]509 }
[33881]510 */
[33880]511 return;
[33876]512 }
[33879]513
[33881]514 /**
515 * called by lambda forEach() call on Document objects to write them out to a file.
516 * Have to deal with unreported exceptions here that can't be dealt with when doing
517 * the actual forEach(). See
518 * https://stackoverflow.com/questions/39090292/how-to-cleanly-deal-with-unreported-exception-ioexception-in-stream-foreach
519 */
520
521 public void writeDoc(Document doc, Writer writer) throws UncheckedIOException {
522 //OLD WAY: writer.write(doc.toJson(new JsonWriterSettings(JsonMode.STRICT, true)) + NEWLINE);
523 // Can't control json output to add newlines after each array element,
524 // no matter which JsonMode is used.
525
526 // https://mongodb.github.io/mongo-java-driver/3.9/javadoc/index.html?org/bson/json/JsonWriterSettings.html
527 // Still can't control array element output,
528 // but this way uses newer mongo java driver 3.9(.1). Tried its various JsonModes too:
529 //JsonWriterSettings writeSettings = new JsonWriterSettings();
530 //writeSettings.builder().outputMode(JsonMode.SHELL).indent(true).build();
531 //writer.write(doc.toJson(writeSettings) + NEWLINE);
[33879]532
[33881]533 // Not the JsonWriter of mongodb java driver:
534 // https://stackoverflow.com/questions/54746814/jsonwriter-add-a-new-line
535
536 // Have to use gson's pretty print to produce a json string that contains
537 // newlines after every array element in the json:
538 String jsonStr = prettyPrintJson(doc.toJson());
[33883]539 //System.err.println(jsonStr);
[33881]540 try {
541 writer.write(jsonStr + NEWLINE);
542 } catch (IOException ex) {
543 //throw ex;
544 throw new UncheckedIOException(ex);
545 }
546 }
[33880]547 public String prettyPrintJson(String jsonStr) {
548 Gson gson = new GsonBuilder().setPrettyPrinting().create();
549 JsonParser jp = new JsonParser();
550 JsonElement je = jp.parse(jsonStr);
551 String prettyJsonString = gson.toJson(je);
552 return prettyJsonString;
553 }
[33879]554
555
[33634]556 /** https://stackoverflow.com/questions/19938153/do-i-need-to-explicitly-close-connection */
557 public void close() {}
[33633]558
[33623]559
[33626]560 // TODO:
561 // In the database, need to ensure we have else
562 // create collection (table in RDBMS) websites, create collection webpages.
563 // The webpages collection will have sentences embedded based on my decisions from
564 // reading the series
565 // https://www.mongodb.com/blog/post/6-rules-of-thumb-for-mongodb-schema-design-part-1
566 // Then need functions:
567 // insertWebsiteDocument()
568 // insertWebpageDocument()
[33623]569
[33616]570 public static void main(String args[]) {
571 try {
[33622]572 MongoDBAccess mongodbCon = new MongoDBAccess();
[33623]573 mongodbCon.connectToDB();
[33634]574 mongodbCon.showCollections();
575
576 } catch(Exception e) {
[33616]577 e.printStackTrace();
578 }
579 }
580}
Note: See TracBrowser for help on using the repository browser.