Ignore:
Timestamp:
2019-11-05T21:04:09+13:00 (4 years ago)
Author:
ak19
Message:
  1. Incorporated Dr Nichols earlier suggestion of storing page modified time and char-encoding metadata if present in the crawl dump output. Have done so, but neither modifiedTime nor fetchTime metadata of the dump file appear to be a webpage's actual modified time, as they're from 2019 and set around the period we've been crawling. 2. Moved getDomainFromURL() function from CCWETProcessor.java to Utility.java since it's been reused. 3. MongoDBAccess class successfully connects (at least, no exceptions) and uses the newly added properties in config.properties to make the connection.
File:
1 edited

Legend:

Unmodified
Added
Removed
  • gs3-extensions/maori-lang-detection/src/org/greenstone/atea/MongoDBAccess.java

    r33622 r33623  
    22
    33
     4import com.mongodb.client.MongoCollection;
    45import com.mongodb.client.MongoDatabase;
    56import com.mongodb.MongoClient;
    67import com.mongodb.MongoCredential; 
     8
     9import org.bson.Document;
    710
    811import java.io.BufferedReader;
     
    2326 * TO RUN:
    2427 *       java -cp ".:../conf:../lib/*" org.greenstone.atea.MongoDBAccess
     28 *
     29 * Manually connecting to mongodb from client:
     30 *    mongo 'mongodb://mongodb.cms.waikato.ac.nz:27017' -u USERNAME -p
     31 * Then after connecting with pwd, type:
     32 *    use DBNAME
     33 *
     34 * Or connect to mongodb and specify db in one statement:
     35 *    mongo 'mongodb://mongodb.cms.waikato.ac.nz:27017/DBNAME?authSource=admin' -u USERNAME -p
     36 *
     37 * Some links:
     38 *   - https://stackoverflow.com/questions/19938153/do-i-need-to-explicitly-close-connection
     39 *   - https://docs.mongodb.com/manual/reference/glossary/ (particularly "collection")
     40 *   - https://tecadmin.net/tutorial/mongodb/drop-collection/
     41 * IMPORTANT LINK:
     42 *   - https://www.mongodb.com/blog/post/6-rules-of-thumb-for-mongodb-schema-design-part-1
     43 *
    2544 */
    2645public class MongoDBAccess {
     
    2847    private static Logger logger = Logger.getLogger(org.greenstone.atea.MongoDBAccess.class.getName());
    2948   
    30     final static String HOST = "localhost";
    31     final static int PORT = 27017; // mongodb port
    32     final static String PROPS_FILENAME = "config.properties";
    33     final static String DB_NAME = "ateacrawldata";
     49    String HOST = "localhost";
     50    int PORT = 27017; // mongodb port
     51    String PROPS_FILENAME = "config.properties";
     52    String DB_NAME = "ateacrawldata";
    3453   
    3554    private String USERNAME;
     
    3756
    3857
     58    private MongoClient mongo = null;
     59    private MongoDatabase database = null;
     60   
    3961    public MongoDBAccess() throws Exception {
    4062    boolean success = false;
     
    5577    if(USERNAME.equals("")) {
    5678        USERNAME = "root";
    57         logger.warn("WARNING: No sensible value for mongodb.user specified in " + PROPS_FILENAME + " defaulting to: " + USERNAME);
     79        logger.warn("WARNING: No sensible value for mongodb.user specified in " + PROPS_FILENAME + ". Attempting to use: " + USERNAME);
    5880            }
    5981    PASSWORD = props.getProperty("mongodb.pwd");
     
    6688        throw new Exception("************ FATAL ERROR: Change DB password in properties file " + PROPS_FILENAME);       
    6789    }
     90
     91    HOST = props.getProperty("mongodb.host", HOST);
     92    String port = props.getProperty("mongodb.port", Integer.toString(PORT));
     93    PORT = Integer.parseInt(port);
     94    DB_NAME = props.getProperty("mongodb.dbname", DB_NAME);
     95
     96    logger.info("Connecting to mongodb with:");
     97    logger.info(" - host:    " + HOST);
     98    logger.info(" - port:    " + PORT);
     99    logger.info(" - user:    " + USERNAME);
     100    logger.info(" - db name: " + DB_NAME); 
    68101    }
    69102
    70 
     103    /**
     104     * Since we have only a single MongoClient, don't need to call close/disconnect on it as per
     105     * https://stackoverflow.com/questions/19938153/do-i-need-to-explicitly-close-connection
     106     */
    71107    public void connectToDB() throws Exception {
    72108    // Creating a Mongo client
    73     MongoClient mongo = new MongoClient( HOST, PORT );
     109    mongo = new MongoClient( HOST, PORT );
    74110   
    75111    // Creating Credentials
     
    79115   
    80116    // Accessing the database
    81     MongoDatabase database = mongo.getDatabase(DB_NAME);
    82     //System.out.println("Credentials: "+ credential);
     117    database = mongo.getDatabase(DB_NAME);
     118    logger.info("Credentials: "+ credential);
    83119    }
    84120   
    85121
     122    /*
     123    public void insertDocument() {
     124    MongoCollection<Document> collection = this.database.getCollection("sampleCollection");
     125    }
     126    */
     127
     128    // create collection (table in RDBMS) websites, create collection webpages
     129    // webpages collection will have sentences embedded
     130   
    86131    public static void main(String args[]) {
    87132    try {
    88133        MongoDBAccess mongodbCon = new MongoDBAccess();
    89         //mongodbCon.connectToDB();
     134        mongodbCon.connectToDB();
     135        //mongodbCon.insertDocument();
    90136    }catch(Exception e) {
    91137        e.printStackTrace();
Note: See TracChangeset for help on using the changeset viewer.