source: gs3-extensions/maori-lang-detection/src/org/greenstone/atea/MongoDBAccess.java@ 33623

Last change on this file since 33623 was 33623, checked in by ak19, 4 years ago
  1. Incorporated Dr Nichols earlier suggestion of storing page modified time and char-encoding metadata if present in the crawl dump output. Have done so, but neither modifiedTime nor fetchTime metadata of the dump file appear to be a webpage's actual modified time, as they're from 2019 and set around the period we've been crawling. 2. Moved getDomainFromURL() function from CCWETProcessor.java to Utility.java since it's been reused. 3. MongoDBAccess class successfully connects (at least, no exceptions) and uses the newly added properties in config.properties to make the connection.
File size: 4.3 KB
Line 
1package org.greenstone.atea;
2
3
4import com.mongodb.client.MongoCollection;
5import com.mongodb.client.MongoDatabase;
6import com.mongodb.MongoClient;
7import com.mongodb.MongoCredential;
8
9import org.bson.Document;
10
11import java.io.BufferedReader;
12import java.io.File;
13import java.io.FileReader;
14import java.util.Properties;
15
16import org.apache.log4j.Logger;
17
18
19/**
20 * https://www.tutorialspoint.com/mongodb/mongodb_java.htm
21 *
22 * TO COMPILE:
23 * maori-lang-detection/src$
24 * javac -cp ".:../conf:../lib/*" org/greenstone/atea/MongoDBAccess.java
25 *
26 * TO RUN:
27 * java -cp ".:../conf:../lib/*" org.greenstone.atea.MongoDBAccess
28 *
29 * Manually connecting to mongodb from client:
30 * mongo 'mongodb://mongodb.cms.waikato.ac.nz:27017' -u USERNAME -p
31 * Then after connecting with pwd, type:
32 * use DBNAME
33 *
34 * Or connect to mongodb and specify db in one statement:
35 * mongo 'mongodb://mongodb.cms.waikato.ac.nz:27017/DBNAME?authSource=admin' -u USERNAME -p
36 *
37 * Some links:
38 * - https://stackoverflow.com/questions/19938153/do-i-need-to-explicitly-close-connection
39 * - https://docs.mongodb.com/manual/reference/glossary/ (particularly "collection")
40 * - https://tecadmin.net/tutorial/mongodb/drop-collection/
41 * IMPORTANT LINK:
42 * - https://www.mongodb.com/blog/post/6-rules-of-thumb-for-mongodb-schema-design-part-1
43 *
44 */
45public class MongoDBAccess {
46
47 private static Logger logger = Logger.getLogger(org.greenstone.atea.MongoDBAccess.class.getName());
48
49 String HOST = "localhost";
50 int PORT = 27017; // mongodb port
51 String PROPS_FILENAME = "config.properties";
52 String DB_NAME = "ateacrawldata";
53
54 private String USERNAME;
55 private String PASSWORD;
56
57
58 private MongoClient mongo = null;
59 private MongoDatabase database = null;
60
61 public MongoDBAccess() throws Exception {
62 boolean success = false;
63
64 // Read in the username and password from our props file
65 Properties props = new Properties();
66
67 //File propsFile = new File(PROPS_FILENAME);
68 //logger.debug("*** Conf props filename: " + propsFile.getAbsolutePath());
69 try {
70 props.load(getClass().getClassLoader().getResourceAsStream(PROPS_FILENAME));
71 } catch(Exception e) {
72 logger.error(e);
73 }
74
75
76 USERNAME = props.getProperty("mongodb.user", "");
77 if(USERNAME.equals("")) {
78 USERNAME = "root";
79 logger.warn("WARNING: No sensible value for mongodb.user specified in " + PROPS_FILENAME + ". Attempting to use: " + USERNAME);
80 }
81 PASSWORD = props.getProperty("mongodb.pwd");
82
83 logger.debug("Got pwd: " + PASSWORD);
84
85 if(PASSWORD != null && PASSWORD.equals("CHANGEME")) {
86
87 success = false;
88 throw new Exception("************ FATAL ERROR: Change DB password in properties file " + PROPS_FILENAME);
89 }
90
91 HOST = props.getProperty("mongodb.host", HOST);
92 String port = props.getProperty("mongodb.port", Integer.toString(PORT));
93 PORT = Integer.parseInt(port);
94 DB_NAME = props.getProperty("mongodb.dbname", DB_NAME);
95
96 logger.info("Connecting to mongodb with:");
97 logger.info(" - host: " + HOST);
98 logger.info(" - port: " + PORT);
99 logger.info(" - user: " + USERNAME);
100 logger.info(" - db name: " + DB_NAME);
101 }
102
103 /**
104 * Since we have only a single MongoClient, don't need to call close/disconnect on it as per
105 * https://stackoverflow.com/questions/19938153/do-i-need-to-explicitly-close-connection
106 */
107 public void connectToDB() throws Exception {
108 // Creating a Mongo client
109 mongo = new MongoClient( HOST, PORT );
110
111 // Creating Credentials
112 MongoCredential credential;
113 credential = MongoCredential.createCredential(USERNAME, DB_NAME, PASSWORD.toCharArray());
114 System.out.println("Connected to the database successfully");
115
116 // Accessing the database
117 database = mongo.getDatabase(DB_NAME);
118 logger.info("Credentials: "+ credential);
119 }
120
121
122 /*
123 public void insertDocument() {
124 MongoCollection<Document> collection = this.database.getCollection("sampleCollection");
125 }
126 */
127
128 // create collection (table in RDBMS) websites, create collection webpages
129 // webpages collection will have sentences embedded
130
131 public static void main(String args[]) {
132 try {
133 MongoDBAccess mongodbCon = new MongoDBAccess();
134 mongodbCon.connectToDB();
135 //mongodbCon.insertDocument();
136 }catch(Exception e) {
137 e.printStackTrace();
138 }
139 }
140}
Note: See TracBrowser for help on using the repository browser.