package org.greenstone.gsdl3.gs3build; import java.util.Date; import java.util.Calendar; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.util.HashMap; import java.util.Iterator; import java.util.GregorianCalendar; import java.io.File; import java.io.IOException; import java.io.FileOutputStream; import java.io.BufferedWriter; import java.io.OutputStreamWriter; import java.net.URL; import java.net.URLEncoder; import java.sql.*; import javax.xml.parsers.*; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.w3c.dom.Text; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import org.greenstone.gsdl3.gs3build.database.*; import org.greenstone.gsdl3.gs3build.collection.*; import org.greenstone.gsdl3.gs3build.classifier.*; import org.greenstone.gsdl3.gs3build.indexers.*; import org.greenstone.gsdl3.gs3build.doctypes.RecogniserInterface; import org.greenstone.gsdl3.gs3build.util.DOMUtils; import org.greenstone.gsdl3.gs3build.util.URLTools; import org.greenstone.gsdl3.util.GSFile; import org.greenstone.gsdl3.util.GSXML; import org.greenstone.gsdl3.util.XMLConverter; /** * Store and hold collection-level configuration information for a collection. * This should be used by BuildManager to work out which classes, etc. to load * at build time, and as a repository for the collection-level metadata, and * a means of loading and saving the same to a file or database, as is seen * fit in the final development of gs3. */ public class CollectionManager { GregorianCalendar lastBuildDate; // pretty obvious String adminEmail; // the email address of the administrator of the // collection int buildDocNo; // used to generate document identifiers CollectionMetadata metadata; // collection-level metadata GS3SQLConnection database; // the database to store everything in String collectionHome; String siteHome; String collectionName; String qualifiedCollectionName; // used as the database name String notifyHost; BuildManager buildManager; class CollectionClassifier { URL file; String type; List fields; String sort; String className; public CollectionClassifier(URL parentURL, String type, String className, Node node) { this.type = type; this.className = className; this.fields = new ArrayList(); NodeList children = node.getChildNodes(); for (int c = 0; c < children.getLength(); c ++) { Node child = children.item(c); if (child.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) { String name = child.getNodeName(); if (name.equals("file")) { NamedNodeMap atts = children.item(c).getAttributes(); Node attribute = atts.getNamedItem("URL"); String urlString = attribute.getNodeValue(); if (urlString == null) continue; System.out.println("Path is" + parentURL.getPath()); System.out.println("Host is" + parentURL.getHost()); System.out.println(urlString); try { URL url = new URL(parentURL, urlString); this.file = url; System.out.println(url); } catch (java.net.MalformedURLException malEx) { System.out.println(malEx); } } else if (name.equals("field")) { String fieldName = DOMUtils.getNodeChildText(children.item(c)); this.fields.add(fieldName.toString()); } else if (name.equals("sort")) { String sortName = DOMUtils.getNodeChildText(children.item(c)); this.sort = sortName; } } } } public ClassifierInterface getClassifier() { ClassifierInterface classifier = null; if (this.type == null) { return null; } System.out.println("Creating a classifier of type " + this.type.toLowerCase()); if (this.type.toLowerCase().equals("hierarchy")) { System.out.println(" hierarchy file is " + this.file); classifier = new HierarchyClassifier(this.className, this.file, this.fields, this.sort); } else if (this.type.toLowerCase().equals("azlist")) { classifier = new AZListClassifier(this.className, this.fields); } return classifier; } } /** * Create the collection manager for a given collection * * @param site the name of the site * @param collection String the name of the collection */ public CollectionManager(String site, String collection) { String gsdl3Root = System.getProperty("GSDL3HOME"); if (gsdl3Root == null) { System.out.println("Error: Unable to locate GSDL3HOME"); System.exit(1); //return; } this.siteHome = GSFile.siteHome(gsdl3Root, site); File site_dir = new File(this.siteHome); System.out.println(site_dir); if (!site_dir.exists()) { System.out.println("Error: Non-existant site ("+site+") specified"); System.exit(1); } site_dir = null; this.collectionHome = GSFile.collectionBaseDir(this.siteHome, collection); File collect_dir = new File(this.collectionHome); if (!collect_dir.exists()) { System.out.println("Error: Non-existant collection ("+collection+") specified in site "+site); System.exit(1); } collect_dir = null; this.collectionName = collection; this.qualifiedCollectionName = site+"_"+collection; this.database = GS3SQLConnectionFactory.getGS3SQLConnection(this.qualifiedCollectionName); /* if (this.database != null) { this.database.clearCollection(collection); this.database = null; } */ if (this.database == null) { this.database = GS3SQLConnectionFactory.getGS3SQLConnection("test"); boolean success = this.database.initCollection(this.qualifiedCollectionName); if (!success) { System.err.println("couldn't init collection " + this.qualifiedCollectionName); System.exit(1); } } this.metadata = new CollectionMetadata(); File buildDirectory = new File(getBuildDirectory()); if (!buildDirectory.exists()) { buildDirectory.mkdir(); } if (!buildDirectory.isDirectory()) { System.err.println("Unable to open directory " + buildDirectory + " for writing"); System.exit(1); } File archiveDirectory = new File(getArchiveDirectory()); if (!archiveDirectory.exists()) { archiveDirectory.mkdir(); } if (!archiveDirectory.isDirectory()) { System.err.println("Unable to open directory " + archiveDirectory + " for writing"); System.exit(1); } this.buildDocNo = 1; try { GS3SQLSelect select = new GS3SQLSelect("build"); select.addField("*"); this.database.execute(select.toString()); ResultSet results = this.database.getResultSet(); if (results != null && results.first()) { System.out.println("Reading all keys"); do { String key = results.getString("buildKey"); String value = results.getString("buildValue"); if (key.equals("NextSeqNo")) { this.buildDocNo = Integer.parseInt(value); } else if (key.equals("lastBuildDate")) { int year, month, day; year = Integer.parseInt(value.substring(0, 4)); month = Integer.parseInt(value.substring(4, 6)) - 1; // -1 because Gregorian Calendar perversely treats January as 0, etc. day = Integer.parseInt(value.substring(6, 8)); this.lastBuildDate = new GregorianCalendar(year, month, day); } } while (results.next()); } } catch (SQLException ex) { System.out.println(ex); } } public void setBuildManager(BuildManager build_man) { this.buildManager = build_man; } private void configureBrowsers(Node node, File etcFile) { CollectionClassifier classifier = null; URL etcURL = null; etcURL = URLTools.getFileURL(etcFile); NodeList children = node.getChildNodes(); for (int c = 0; c < children.getLength(); c ++) { // assume that non-element children are irrelevant if (children.item(c).getNodeType() != org.w3c.dom.Node.ELEMENT_NODE) { continue; } String name = children.item(c).getNodeName(); System.out.println(name); if (name.equals(GSXML.CLASSIFIER_ELEM)) { NamedNodeMap atts = children.item(c).getAttributes(); // get the type attribute Node attribute = atts.getNamedItem(GSXML.TYPE_ATT); if (attribute == null) { continue; } String type = attribute.getNodeValue(); // get the type attribute attribute = atts.getNamedItem(GSXML.NAME_ATT); if (attribute == null) { continue; } String className = attribute.getNodeValue(); classifier = new CollectionClassifier(etcURL, type, className, children.item(c)); System.out.println("Found classifier " + type); // attach the classifier ClassifierInterface classify = classifier.getClassifier(); this.buildManager.getClassifierManager().addClassifier(classify); } } } public void configureCollection() { File collectionConfig = new File(GSFile.collectionConfigFile(this.collectionHome)); // get the File and read it in try { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); Document document = builder.parse(collectionConfig); // TODO: report an error if (document == null) { } // now parse the manager file... Element rootElement = document.getDocumentElement(); if (rootElement.getTagName() != GSXML.COLLECTION_CONFIG_ELEM) { // TODO: throw exception } System.out.println("Configuring collection"); NodeList children = rootElement.getChildNodes(); for (int c = 0; c < children.getLength(); c ++) { // assume that non-element children are irrelevant if (children.item(c).getNodeType() != org.w3c.dom.Node.ELEMENT_NODE) { continue; } String name = children.item(c).getNodeName(); // the name is a plugin element if (name.equals(GSXML.RECOGNISE_ELEM)) { System.out.println("recognise elem"); NodeList doc_types = ((Element)children.item(c)).getElementsByTagName(GSXML.DOC_TYPE_ELEM); for(int i=0; i tag now... NamedNodeMap searchAttributes = children.item(c).getAttributes(); Node searchAttribute = searchAttributes.getNamedItem(GSXML.TYPE_ATT); String searchType = null; if (searchAttribute != null) { searchType = searchAttribute.getNodeValue(); } else { System.out.println("no "+GSXML.TYPE_ATT+" attribute found for the "+GSXML.SEARCH_ELEM+" element, assuming mg"); searchType = MGIndexer.MG_INDEX_TYPE; } searchAttribute = searchAttributes.getNamedItem(GSXML.NAME_ATT); String searchName = null; if (searchAttribute != null) { searchName = searchAttribute.getNodeValue(); } if (searchName == null) { searchName = "idx"; // need to modify this if we have two search elements with no names } // create the pertinent indexer... IndexerInterface indexer = IndexerFactory.makeIndexer(searchType, searchName); if (indexer == null) { continue; } // configure the indexer indexer.configure(children.item(c)); // install it into the build manager this.buildManager.addIndexer(indexer); } else if (name.equals(GSXML.BROWSE_ELEM)) { this.configureBrowsers(children.item(c), collectionConfig); } else if (name.equals(GSXML.NOTIFY_ELEM)) { this.notifyHost = ((Element) children.item(c)).getAttribute(GSXML.NOTIFY_HOST_ATT); } // TODO: other elements - make a factory-method approach here... else { } } } catch (FactoryConfigurationError e) { System.out.println(e); } catch (ParserConfigurationException ex) { System.out.println(ex); } catch (SAXException ex) { System.out.println(ex); } catch (IOException ex) { System.out.println(ex); } System.out.println("<<>>>"); } public String getEtcDirectory() { return GSFile.collectionEtcDir(this.collectionHome); } public String getImportDirectory() { return GSFile.collectionImportDir(this.collectionHome); } public String getBuildDirectory() { return GSFile.collectionBuildDir(this.collectionHome); } public String getArchiveDirectory() { return GSFile.collectionArchiveDir(this.collectionHome); } public GS3SQLConnection getDatabase() { return this.database; } public long getBuildTimestamp() { return this.lastBuildDate.getTime().getTime(); } public Date getBuildDate() { return this.lastBuildDate.getTime(); } public void startBuild() { GregorianCalendar today = new GregorianCalendar(); if (this.lastBuildDate != null) { // if the build date is different to the last build date, then reset the build // document number if (today.get(Calendar.YEAR) != this.lastBuildDate.get(Calendar.YEAR) || today.get(Calendar.MONTH) != this.lastBuildDate.get(Calendar.MONTH) || today.get(Calendar.DAY_OF_MONTH) != this.lastBuildDate.get(Calendar.DAY_OF_MONTH)) { this.buildDocNo = 1; } else { System.out.println("Continuing build sequence from " + this.buildDocNo); } } this.lastBuildDate = today; } public void endBuild() { // here we write out the build config file // create the xml for the buildConfig XMLConverter converter = new XMLConverter(); Document doc = converter.newDOM(); Element build_config = doc.createElement(GSXML.COLLECTION_BUILD_ELEM); Element meta_list = doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER); build_config.appendChild(meta_list); Element service_list = doc.createElement(GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER); build_config.appendChild(service_list); // ask the indexers and classifiers to add stuff into the service rack list this.buildManager.getIndexerManager().addServiceDescriptions(service_list); this.buildManager.getClassifierManager().addServiceDescriptions(service_list); // get the String String build_config_string = converter.getString(build_config); // write it to the file try { File build_config_file = new File(GSFile.collectionBuildConfigFileBuilding(this.collectionHome)); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(build_config_file), "UTF-8")); writer.write(build_config_string, 0, build_config_string.length()); writer.close(); } catch (Exception e) { System.err.println("CollectionManager.endBuild() Error while trying to output the buildConfig.xml file."); System.err.println(e.getMessage()); } // Update build date information GS3SQLDelete remove = new GS3SQLDelete("build"); // GS3SQLWhere where = new GS3SQLWhere(new GS3SQLWhereItem("buildKey", "=", "NextSeqNo")); // rem this.database.execute(remove.toString()); GS3SQLInsert insert = new GS3SQLInsert("build"); insert.addValue("buildKey", "NextSeqNo"); insert.addValue("buildValue", Integer.toString(this.buildDocNo)); this.database.execute(insert.toString()); insert = new GS3SQLInsert("build"); insert.addValue("buildKey", "lastBuildDate"); insert.addValue("buildValue", getDateString(this.lastBuildDate)); this.database.execute(insert.toString()); // Do tail of build output Date startDate = this.lastBuildDate.getTime(); Date date = new Date(); long startTime = startDate.getTime(); long endTime = date.getTime(); long difference = ((endTime - startTime) + 500) / 1000; System.out.println("Build completed"); System.out.println("---------------"); System.out.println("Total Documents: " + this.getCollectionMetadata("gsdl3", "documentCount")); System.out.println("Total Time : " + (difference / 60) + " min. " + (difference % 60) + " secs."); } private static String getDateString(GregorianCalendar date) { StringBuffer dateString = new StringBuffer(); int value; dateString.append(date.get(Calendar.YEAR)); // the use of month is a little odd, hence the following // code. Calendar.MONTH yields 0 = January, 1 = February, // etc. hence there is a '+1' added to the month to make // it into January = 1, etc., and the padding is altered // correspondingly. value = date.get(Calendar.MONTH); if (value < 9) { dateString.append("0"); } dateString.append(value + 1); value = date.get(Calendar.DAY_OF_MONTH); if (value < 10) dateString.append("0"); dateString.append(value); return dateString.toString(); } public String getNextDocumentID() { StringBuffer ID = new StringBuffer(getDateString(this.lastBuildDate)); int value = this.buildDocNo; this.buildDocNo ++; ID.append(":"); ID.append(Integer.toString(value)); return ID.toString(); } public int getDocumentNumber() { this.buildDocNo ++; return this.buildDocNo - 1; } /** * Get the collection metadata item in the given namespace * * @param String the namespace * @param String the label of the metadata */ public String getCollectionMetadata(String namespace, String label) { return this.metadata.getCollectionMetadata(namespace, label).get(0).toString(); } /** * Set the collection metadata item in the given namespace * * @param String the namespace * @param String the label * @param String the value */ public void setCollectionMetadata(String namespace, String label, String value) { this.metadata.setCollectionMetadata(namespace, label, value); } public String getCollectionName() { return collectionName; } /** * @return */ public String getNotifyHost() { return notifyHost; } }