package org.greenstone.gsdl3.gs3build; import java.util.Date; import java.util.Calendar; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.util.HashMap; import java.util.Iterator; import java.util.GregorianCalendar; import java.io.File; import java.io.IOException; import java.io.FileOutputStream; import java.io.BufferedWriter; import java.io.OutputStreamWriter; import java.net.URL; import java.net.URLEncoder; import javax.xml.parsers.*; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.w3c.dom.Text; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import org.greenstone.gsdl3.gs3build.collection.*; import org.greenstone.gsdl3.gs3build.classifier.*; import org.greenstone.gsdl3.gs3build.indexers.*; import org.greenstone.gsdl3.gs3build.util.GS3SQLConnection; import org.greenstone.gsdl3.gs3build.util.GS3SQLConnectionFactory; import org.greenstone.gsdl3.gs3build.util.DOMUtils; import org.greenstone.gsdl3.gs3build.util.URLTools; import org.greenstone.gsdl3.util.GSFile; import org.greenstone.gsdl3.util.GSXML; import org.greenstone.gsdl3.util.XMLConverter; /** * Store and hold collection-level configuration information for a collection. * This should be used by BuildManager to work out which classes, etc. to load * at build time, and as a repository for the collection-level metadata, and * a means of loading and saving the same to a file or database, as is seen * fit in the final development of gs3. */ public class CollectionManager { GregorianCalendar lastBuildDate; // pretty obvious String adminEmail; // the email address of the administrator of the // collection int buildDocNo; // used to generate document identifiers CollectionMetadata metadata; // collection-level metadata GS3SQLConnection database; // the database to store everything in String collectionHome; String siteHome; String collectionName; String qualifiedCollectionName; // used as the database name BuildManager buildManager; class CollectionClassifier { URL file; String type; List fields; String sort; String className; public CollectionClassifier(URL parentURL, String type, String className, Node node) { this.type = type; this.className = className; this.fields = new ArrayList(); NodeList children = node.getChildNodes(); for (int c = 0; c < children.getLength(); c ++) { Node child = children.item(c); if (child.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) { String name = child.getNodeName(); if (name.equals("file")) { NamedNodeMap atts = children.item(c).getAttributes(); Node attribute = atts.getNamedItem("URL"); String urlString = attribute.getNodeValue(); if (urlString == null) continue; System.out.println("Path is" + parentURL.getPath()); System.out.println("Host is" + parentURL.getHost()); System.out.println(urlString); try { URL url = new URL(parentURL, urlString); this.file = url; System.out.println(url); } catch (java.net.MalformedURLException malEx) { System.out.println(malEx); } } else if (name.equals("field")) { String fieldName = DOMUtils.getNodeChildText(children.item(c)); this.fields.add(fieldName.toString()); } else if (name.equals("sort")) { String sortName = DOMUtils.getNodeChildText(children.item(c)); this.sort = sortName; } } } } public ClassifierInterface getClassifier() { ClassifierInterface classifier = null; if (this.type == null) { return null; } System.out.println(this.type.toLowerCase()); if (this.type.toLowerCase().equals("hierarchy")) { System.out.println(this.file); classifier = new HierarchyClassifier(this.className, this.file, this.fields, this.sort); } else if (this.type.toLowerCase().equals("azlist")) { classifier = new AZListClassifier(this.className, this.fields); } return classifier; } } /** * Create the collection manager for a given collection * * @param site the name of the site * @param collection String the name of the collection */ public CollectionManager(String site, String collection) { String gsdl3Root = System.getProperty("GSDL3HOME"); if (gsdl3Root == null) { System.out.println("Error: Unable to locate GSDL3HOME"); System.exit(1); //return; } this.siteHome = GSFile.siteHome(gsdl3Root, site); File site_dir = new File(this.siteHome); System.out.println(site_dir); if (!site_dir.exists()) { System.out.println("Error: Non-existant site ("+site+") specified"); System.exit(1); } site_dir = null; this.collectionHome = GSFile.collectionBaseDir(this.siteHome, collection); File collect_dir = new File(this.collectionHome); if (!collect_dir.exists()) { System.out.println("Error: Non-existant collection ("+collection+") specified in site "+site); System.exit(1); } collect_dir = null; this.collectionName = collection; this.qualifiedCollectionName = site+"_"+collection; this.database = GS3SQLConnectionFactory.createConnection(this.qualifiedCollectionName); /* if (this.database != null) { this.database.clearCollection(collection); this.database = null; } */ if (this.database == null) { this.database = GS3SQLConnectionFactory.createConnection("test"); this.database.initCollection(this.qualifiedCollectionName); } this.metadata = new CollectionMetadata(); File buildDirectory = new File(GSFile.collectionBuildDir(this.collectionHome)); if (!buildDirectory.exists()) { buildDirectory.mkdir(); } File archiveDirectory = new File(GSFile.collectionArchiveDir(this.collectionHome)); if (!archiveDirectory.exists()) { archiveDirectory.mkdir(); } this.buildDocNo = 1; } public void setBuildManager(BuildManager build_man) { this.buildManager = build_man; } private void configureBrowsers(Node node, File etcFile) { CollectionClassifier classifier = null; URL etcURL = null; etcURL = URLTools.getFileURL(etcFile); NodeList children = node.getChildNodes(); for (int c = 0; c < children.getLength(); c ++) { // assume that non-element children are irrelevant if (children.item(c).getNodeType() != org.w3c.dom.Node.ELEMENT_NODE) { continue; } String name = children.item(c).getNodeName(); System.out.println(name); if (name.equals(GSXML.CLASSIFIER_ELEM)) { NamedNodeMap atts = children.item(c).getAttributes(); // get the type attribute Node attribute = atts.getNamedItem(GSXML.TYPE_ATT); if (attribute == null) { continue; } String type = attribute.getNodeValue(); // get the type attribute attribute = atts.getNamedItem(GSXML.NAME_ATT); if (attribute == null) { continue; } String className = attribute.getNodeValue(); classifier = new CollectionClassifier(etcURL, type, className, children.item(c)); System.out.println("Found classifier " + type); // attach the classifier ClassifierInterface classify = classifier.getClassifier(); this.buildManager.getClassifierManager().addClassifier(classify); } } } public void configureCollection() { File collectionConfig = new File(GSFile.collectionConfigFile(this.collectionHome)); // get the File and read it in try { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); Document document = builder.parse(collectionConfig); // TODO: report an error if (document == null) { } // now parse the manager file... Element rootElement = document.getDocumentElement(); if (rootElement.getTagName() != GSXML.COLLECTION_CONFIG_ELEM) { // TODO: throw exception } System.out.println("Configuring collection"); NodeList children = rootElement.getChildNodes(); for (int c = 0; c < children.getLength(); c ++) { // assume that non-element children are irrelevant if (children.item(c).getNodeType() != org.w3c.dom.Node.ELEMENT_NODE) { continue; } String name = children.item(c).getNodeName(); // the name is a plugin element if (name.equals("doctype")) { System.out.println("document type"); NamedNodeMap typeAttributes = children.item(c).getAttributes(); Node typeAttribute = typeAttributes.getNamedItem("type"); String documentType = null; NodeList childNodes = children.item(c).getChildNodes(); for (int n = 0; n < childNodes.getLength(); n ++) { if (childNodes.item(n).getNodeType() == org.w3c.dom.Node.TEXT_NODE) { String label = childNodes.item(n).getNodeValue(); label.trim(); if (label.length() > 0) { documentType = label; System.out.println("Document type " + documentType); } } } } else if (name.equals(GSXML.SEARCH_ELEM)) { // pick up attributes from the tag now... NamedNodeMap searchAttributes = children.item(c).getAttributes(); Node searchAttribute = searchAttributes.getNamedItem(GSXML.TYPE_ATT); String searchType = null; if (searchAttribute != null) { searchType = searchAttribute.getNodeValue(); } else { System.out.println("no "+GSXML.TYPE_ATT+" attribute found for the "+GSXML.SEARCH_ELEM+" element, assuming mg"); searchType = MGIndexer.MG_INDEX_TYPE; } searchAttribute = searchAttributes.getNamedItem(GSXML.NAME_ATT); String searchName = null; if (searchAttribute != null) { searchName = searchAttribute.getNodeValue(); } if (searchName == null) { searchName = "idx"; // need to modify this if we have two search elements with no names } // create the pertinent indexer... IndexerInterface indexer = IndexerFactory.makeIndexer(searchType, searchName); if (indexer == null) { continue; } // configure the indexer indexer.configure(children.item(c)); // install it into the build manager this.buildManager.addIndexer(indexer); } else if (name.equals(GSXML.BROWSE_ELEM)) { this.configureBrowsers(children.item(c), collectionConfig); } // TODO: other elements - make a factory-method approach here... else { } } } catch (FactoryConfigurationError e) { System.out.println(e); } catch (ParserConfigurationException ex) { System.out.println(ex); } catch (SAXException ex) { System.out.println(ex); } catch (IOException ex) { System.out.println(ex); } System.out.println("<<>>>"); } public String getEtcDirectory() { return GSFile.collectionEtcDir(this.collectionHome); } public String getImportDirectory() { return GSFile.collectionImportDir(this.collectionHome); } public String getBuildDirectory() { return GSFile.collectionBuildDir(this.collectionHome); } public String getArchiveDirectory() { return GSFile.collectionArchiveDir(this.collectionHome); } public GS3SQLConnection getDatabase() { return this.database; } public Date getBuildDate() { return this.lastBuildDate.getTime(); } public void startBuild() { GregorianCalendar today = new GregorianCalendar(); if (this.lastBuildDate != null) { // if the build date is different to the last build date, then reset the build // document number if (today.get(Calendar.YEAR) != this.lastBuildDate.get(Calendar.YEAR) || today.get(Calendar.MONTH) != this.lastBuildDate.get(Calendar.MONTH) || today.get(Calendar.DAY_OF_MONTH) != this.lastBuildDate.get(Calendar.DAY_OF_MONTH)) { this.buildDocNo = 1; } } this.lastBuildDate = today; } public void endBuild() { // here we write out the build config file // create the xml for the buildConfig XMLConverter converter = new XMLConverter(); Document doc = converter.newDOM(); Element build_config = doc.createElement(GSXML.COLLECTION_BUILD_ELEM); Element meta_list = doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER); build_config.appendChild(meta_list); Element service_list = doc.createElement(GSXML.SERVICE_CLASS_ELEM+GSXML.LIST_MODIFIER); build_config.appendChild(service_list); // the document structure and metadata retrieval will use GS3REtrieve service, so add it in here Element base_retrieve_service = doc.createElement(GSXML.SERVICE_CLASS_ELEM); base_retrieve_service.setAttribute(GSXML.NAME_ATT, "GS3Retrieve"); service_list.appendChild(base_retrieve_service); // ask the indexers to add stuff into the service rack list this.buildManager.getIndexerManager().addServiceDescriptions(service_list); this.buildManager.getClassifierManager().addServiceDescriptions(service_list); // get the String String build_config_string = converter.getString(build_config); // write it to the file try { File build_config_file = new File(GSFile.collectionBuildConfigFileBuilding(this.collectionHome)); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(build_config_file), "UTF-8")); writer.write(build_config_string, 0, build_config_string.length()); writer.close(); } catch (Exception e) { System.err.println("CollectionManager.endBuild() Error while trying to output the buildConfig.xml file."); System.err.println(e.getMessage()); } Date startDate = this.lastBuildDate.getTime(); Date date = new Date(); long startTime = startDate.getTime(); long endTime = date.getTime(); long difference = ((endTime - startTime) + 500) / 1000; System.out.println("Build completed"); System.out.println("---------------"); System.out.println("Total Documents: " + this.getCollectionMetadata("gsdl3", "documentCount")); System.out.println("Total Time : " + (difference / 60) + " min. " + (difference % 60) + " secs."); } public String getNextDocumentID() { StringBuffer ID = new StringBuffer(); int value; ID.append(lastBuildDate.get(Calendar.YEAR)); // the use of month is a little odd, hence the following // code. Calendar.MONTH yields 0 = January, 1 = February, // etc. hence there is a '+1' added to the month to make // it into January = 1, etc., and the padding is altered // correspondingly. value = lastBuildDate.get(Calendar.MONTH); if (value < 9) { ID.append("0"); } ID.append(value + 1); value = lastBuildDate.get(Calendar.DAY_OF_MONTH); if (value < 10) ID.append("0"); ID.append(value); value = this.buildDocNo; this.buildDocNo ++; ID.append(":"); ID.append(Integer.toString(value)); return ID.toString(); } public int getDocumentNumber() { this.buildDocNo ++; return this.buildDocNo - 1; } /** * Get the collection metadata item in the given namespace * * @param String the namespace * @param String the label of the metadata */ public String getCollectionMetadata(String namespace, String label) { return this.metadata.getCollectionMetadata(namespace, label).get(0).toString(); } /** * Set the collection metadata item in the given namespace * * @param String the namespace * @param String the label * @param String the value */ public void setCollectionMetadata(String namespace, String label, String value) { this.metadata.setCollectionMetadata(namespace, label, value); } }