package org.greenstone.gsdl3.gs3build.classifier;
import java.io.*;
import java.net.URL;
import java.util.List;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.xerces.parsers.SAXParser;
import org.xml.sax.XMLReader;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.Attributes;
import org.xml.sax.helpers.XMLReaderFactory;
import org.xml.sax.helpers.DefaultHandler;
import org.w3c.dom.Element;
import org.greenstone.gsdl3.gs3build.doctypes.DocumentID;
import org.greenstone.gsdl3.gs3build.doctypes.DocumentInterface;
import org.greenstone.gsdl3.gs3build.util.XMLTools;
import org.greenstone.gsdl3.gs3build.util.GS3SQLConnection;
import org.greenstone.gsdl3.util.GSXML;
public class HierarchyClassifier extends AbstractClassifier
{
class HierarchyClassifierObserver implements ClassifierObserverInterface
{ List classifications;
DocumentInterface document;
public HierarchyClassifierObserver(DocumentInterface document)
{ this.document = document;
}
public void recordClassification(String label)
{ this.document.addDocumentMetadata("gsdl3", "classified", label);
}
public List getClassifications()
{ return this.classifications;
}
}
class HierarchyNode extends AbstractHierarchyNode
{ /**
* Simple node
*/
public HierarchyNode()
{ super();
}
public HierarchyNode(String prefix, String name, String id, String descriptor)
{ super(prefix, name, id, descriptor);
}
public void getClassifications(DocumentID documentID, List values, String sortKey,
ClassifierObserverInterface observer)
{ Iterator valueList = values.iterator();
while (valueList.hasNext())
{ if (this.isMatch(valueList.next().toString()))
{ observer.recordClassification(this.name);
this.addDocument(documentID, sortKey);
}
}
// recurse into the child nodes for them to do the same...
Iterator childList = this.childNodes.iterator();
while (childList.hasNext())
{ ((HierarchyNode) childList.next()).getClassifications(documentID, values, sortKey, observer);
}
}
}
/**
* An inner class to handle Classifier files
*/
class HierarchyHandler extends DefaultHandler
{ StringBuffer description;
StringBuffer name;
StringBuffer path;
StringBuffer match;
String prefix;
boolean inElement;
AbstractHierarchyNode rootNode = null;
AbstractHierarchyNode currentNode = null;
HierarchyHandler(String classifierName)
{ super();
this.name = null;
this.description = null;
this.path = null;
this.match = null;
this.prefix = classifierName;
}
public void startElement(String URI, String localName, String qName, Attributes attributes)
{ if (localName.equals("Hierarchy"))
{ // create a new node in the hierarchy
HierarchyNode node = new HierarchyNode(prefix, "", "", "Classifier");
this.rootNode = node;
this.currentNode = node;
}
else if (localName.equals("Classification"))
{ // create a new node in the hierarchy
HierarchyNode node = new HierarchyNode(this.prefix, "", "", "");
// Initialise the tree
if (this.currentNode == null)
{ this.rootNode = node;
}
// Add the node
else
{ this.currentNode.addChild(node);
}
this.currentNode = node;
}
else if (localName.equals("Name"))
{ this.name = new StringBuffer();
}
else if (localName.equals("Path"))
{ this.path = new StringBuffer();
}
else if (localName.equals("Description"))
{ this.description = new StringBuffer();
}
else if (localName.equals("Matches"))
{ this.match = new StringBuffer();
}
else if (localName.equals("Children"))
{
}
}
public void endElement(String URI, String localName, String qName)
{ if (localName.equals("Classification"))
{ if (this.currentNode.getParent() == null) {
// TODO: Error state
// this.rootNodes.add(this.currentNode);
}
this.currentNode = this.currentNode.getParent();
}
else if (localName.equals("Name"))
{ this.currentNode.setName(XMLTools.cleanString(this.name.toString()));
System.out.println("Name is " + this.name);
this.name = null;
}
else if (localName.equals("Path"))
{ this.currentNode.setID(XMLTools.cleanString(this.path.toString()));
this.path = null;
}
else if (localName.equals("Description"))
{ this.currentNode.setDescriptor(XMLTools.cleanString(this.description.toString()));
this.description = null;
}
else if (localName.equals("Matches"))
{ this.currentNode.addMatch(XMLTools.cleanString(this.match.toString()));
this.match = null;
}
else if (localName.equals("Children"))
{
}
}
public void characters(char c[], int start, int length)
{ String string = new String(c, start, length);
if (this.path != null)
{ this.path.append(string);
}
else if (this.name != null)
{ this.name.append(string);
}
else if (this.description != null)
{ this.description.append(string);
}
else if (this.match != null)
{ this.match.append(string);
}
}
public AbstractHierarchyNode getHierarchy()
{ return this.rootNode;
}
}
private GS3SQLConnection database;
private AbstractHierarchyNode hierarchy;
private List fields;
private String sortBy;
private String name;
public HierarchyClassifier(String name, URL basefile, List fields, String sortBy)
{
try
{ SAXParser parser = new SAXParser();
HierarchyHandler handler = new HierarchyHandler(name);
/*
XMLReader reader = XMLReaderFactory.createXMLReader();
reader.setContentHandler(handler);
reader.setErrorHandler(handler);*/
parser.setContentHandler(handler);
// A Classifier consists of one file only - get it.
/*
FileReader fileReader = new FileReader(basefile);
reader.parse(new InputSource(fileReader));
*/
parser.parse(basefile.toString());
this.hierarchy = handler.getHierarchy();
this.fields = fields;
this.sortBy = sortBy;
this.name = name;
System.out.println("Counting " + this.fields.size() + " fields");
}
catch (SAXException saxException)
{ // TODO: log error
System.out.println("unable to process hierarchy " + saxException.toString());
}
catch (java.io.FileNotFoundException fileException)
{
System.out.println("unable to process hierarchy " + fileException.toString());
}
catch (java.io.IOException ioException)
{
System.out.println("unable to process hierarchy " + ioException.toString());
}
}
public void configure(List parameters)
{
Iterator iterator = parameters.iterator();
while (iterator.hasNext()) {
String param = iterator.next().toString();
if (param.equals("-metadata")) {
if (iterator.hasNext()) {
String field = iterator.next().toString();
if (field != null && field.length() > 0) {
this.fields.add(field);
}
}
}
}
}
/**
* Indicate the database to be used for saving the hierarchy to.
*
* @param GS3SQLConnection
the database
*/
public void setDatabase(GS3SQLConnection database)
{ this.database = database;
}
/**
* Classify a particular document - the document id will be used to record its
* membership of any classifier nodes, and the document interface will also be
* used to assign back knowledge of any classifiers to which the document is
* assigned.
*
* @param DocumentID
the document identifier
* @param DocumentInterface
the document interface
*
* @return boolean
a return value - currently always
* true
for success
*/
public boolean classifyDocument(DocumentID documentID, DocumentInterface document)
{ // the observer records the assignment of documents to classifications
HierarchyClassifierObserver classifyObserver = new HierarchyClassifierObserver(document);
// if we have no fields, there is nothing to do...
if (this.fields == null)
{ return true;
}
// get the metadata item from the document
Iterator thisField = this.fields.iterator();
// for every field listed as being a scannable field...
while (thisField.hasNext())
{ String fieldName = thisField.next().toString();
// ...get the values for that field...
List values = document.getDocumentMetadataItem(fieldName);
if (values == null)
{ continue;
}
if (documentID == null) {
System.out.println("Bad documentID");
continue;
}
// get the sort key for the metadata item if possible
String sortKey = null;
if (this.sortBy != null) {
List sortKeys = document.getDocumentMetadataItem(this.sortBy);
if (sortKeys != null && sortKeys.size() > 0) {
sortKey = sortKeys.get(0).toString();
}
}
// ...and send them to the classifier
this.hierarchy.getClassifications(documentID, values, sortKey, classifyObserver);
}
return true;
}
/**
* @see ClassifierInterface:getClassifierPasses
*/
public int getClassifierPasses()
{ return 1;
}
public void startClassifierPass(int pass)
{
}
public void endClassifierPass(int pass)
{
}
public void completeClassification()
{ // TODO: store the classifications to file...
if (this.hierarchy != null) {
this.hierarchy.writeSQL(this.database);
}
}
public boolean addClassifierDescription(Element classifier_list) {
// TODO check that there are some docs in the classification
Element classifier = classifier_list.getOwnerDocument().createElement(GSXML.CLASSIFIER_ELEM);
classifier.setAttribute(GSXML.NAME_ATT, this.name);
classifier_list.appendChild(classifier);
return true;
}
}