package org.greenstone.gsdl3.gs3build.extractor;

import java.io.FileReader;

import java.net.URL;

import java.util.List;
import java.util.ArrayList;
import java.util.Iterator;
import org.greenstone.gsdl3.gs3build.util.GS2TextFileHandler;

import org.apache.xerces.parsers.SAXParser;
import org.xml.sax.XMLReader;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.Attributes;
import org.xml.sax.helpers.XMLReaderFactory;
import org.xml.sax.helpers.DefaultHandler;

import org.greenstone.gsdl3.gs3build.doctypes.DocumentID;
import org.greenstone.gsdl3.gs3build.doctypes.DocumentInterface;
import org.greenstone.gsdl3.gs3build.doctypes.MetadataDocument;
import org.greenstone.gsdl3.gs3build.doctypes.DocumentLoader;
import org.greenstone.gsdl3.gs3build.doctypes.DocumentList;

import org.greenstone.gsdl3.gs3build.metadata.MetadataLabel;

public class MetaXMLExtractor implements ExtractorInterface
{
  /**
   *  An inner class to handle Metadata files
   */
  class MetadataHandler extends DefaultHandler
  { List         files;
    String       label;
    StringBuffer value;
    URL          url;
    boolean      inElement;
    boolean      accumulate;
    DocumentList documentList;
    List         documentIds;
    List         documents;

    MetadataHandler(DocumentList documentList)
    { super();
    
      this.label = null;
      this.value = null;
      this.documentList = documentList;
    }

    public void startElement(String URI, String localName, String qName, Attributes attributes)
    { if (localName.equals("FileName"))
      {	this.value = new StringBuffer();
      }
      else if (localName.equals("FileSet"))
      { this.files = new ArrayList();
      }
      else if (localName.equals("Description"))
      { this.documentIds = this.documentList.findDocumentIdsUsingFiles(this.files, this.url.toString());	

	if (documentIds != null && documentIds.size() > 0) {
	  this.documents = new ArrayList();
	  
	  Iterator idIterator = documentIds.iterator();
	  while (idIterator.hasNext()) {
	    String docIdString = idIterator.next().toString();
	    DocumentID docId   = new DocumentID(docIdString);
	    DocumentInterface document = documentList.getDocument(docId);
	    if (document != null) {
	      documents.add(document);
	    }
	  }
	}
      }
      else if (localName.equals("Metadata"))
      { this.label = attributes.getValue("name");
        this.value = new StringBuffer();

	String mode = attributes.getValue("mode");
	this.accumulate = mode.equals(ExtractorManager.ACCUMULATE_MODE);	
      }
    }

    public void endElement(String URI, String localName, String qName)
    { if (localName.equals("FileName"))
      {	String file = this.value.toString();
        this.value = null;
	this.files.add(file);
      }
      else if (localName.equals("FileSet"))
      { // post the existing files item...
      }
      else if (localName.equals("Description"))
      {
	if (this.documents != null && documents.size() > 0) {
	  // write out the modified documents
	  // TODO: nicer/more generalised interface for this and related activity in
	  //       extractor manager (actually, enricher manager);
	  Iterator docIterator = documents.iterator();
	  while (docIterator.hasNext()) {
	    DocumentInterface document = (DocumentInterface) docIterator.next();

	    //  System.out.println("Writing modified document " + document.getID());
	    documentList.storeChangedDocument(document);
	  }
	}
      }
      else if (localName.equals("Metadata"))
      { MetaXMLExtractor.postMetadata(this.url, this.files, 
				      this.label, this.value.toString(), 
				      this.accumulate);
	/*
	if (documentIds != null) {
	  Iterator iterator = documentIds.iterator();
	  while (iterator.hasNext()) {
	    System.out.println("Matches file " + iterator.next().toString());
	  }
	}
	*/
	
	if (documentIds != null && documentIds.size() > 0) {
	  Iterator docIterator = this.documents.iterator();
	  while (docIterator.hasNext()) {
	    DocumentInterface document = (DocumentInterface) docIterator.next();
	    
	    // Post to document
	    // TODO: tailor this to posting documents to *sections* as required...
	    if (accumulate) {
 	      document.addDocumentMetadata(new MetadataLabel(this.label), this.value.toString());
	    }
	    else {
 	      document.setDocumentMetadata(new MetadataLabel(this.label), this.value.toString());
	    }
	  }	
	}

	// flatten the metadata items again...
        this.value = null;
	this.label = null;
      }
    }

    public void characters(char c[], int start, int length)
    { if (this.value != null)
      { String string = new String(c, start, length);
        this.value.append(string);
      }
    }

    public void setUrl(URL url)
    { this.url = url;
    }
  }

  private DocumentList documentList;

  /**
   *  Construct of extractor
   */
  public MetaXMLExtractor()
  { // Intentionally left blank
  }

  /**
   *  This extractor doesn't need to do any preparation/completion work,
   *  so this member function is empty.
   */
  public void configure(String outputDir)
  { // Intentionally left blank
  }

  public void configure(DocumentList list)
  { this.documentList = list;
  }

  /**
   *  This extractor doesn't need to do any preparation/completion work,
   *  so this member function is empty.
   */
  public void startPass(int passNo)
  { // Intentionally left blank
  }

  /**
   *  Process the document - for a metadata document, this results in the
   *  decoration of other files, for other documents, it does nothing.
   */
  public void extractDocument(DocumentID docID, DocumentInterface document)
  { if (document.getDocumentType().equals(MetadataDocument.METADATA_DOCUMENT_TYPE))
    { // Extract the content from the metadata file
      URL url;

      try {
	SAXParser parser = new SAXParser();
	MetadataHandler handler = new MetadataHandler(this.documentList);
        /*
	XMLReader reader = XMLReaderFactory.createXMLReader();
	reader.setContentHandler(handler);
	reader.setErrorHandler(handler);*/
	parser.setContentHandler(handler);
	  
	// Get path of file; we cheat here by assuming that the url is a file - this
	// really ought to be done better [TODO: fix to handle full paths & URLs]
	url = document.getDocumentFiles().getFile(0).getURL();
	String filePath = url.getPath();
	handler.setUrl(new URL(url, "."));

	// A metadata document consists of one file only - get it from the 'default'
	// file group
	/*
	FileReader fileReader = new FileReader(filePath);
	reader.parse(new InputSource(fileReader));
	*/
	parser.parse(filePath);
      }
      catch (SAXException saxException)
      { // TODO: log error
	System.err.println(saxException);
      }
      catch (java.io.FileNotFoundException fileException)
      { System.err.println(fileException);
      }
      catch (java.io.IOException ioException)
      { System.err.println(ioException);
      }
      /*      catch (java.net.MalformedURLException malEx) {
	System.err.println("Unable to get parent of URL "+url.toString()+" in metadata extraction.");
	return;
      }
      */

      // for each document post it to the corresponding document
    }
  }

  protected static void postMetadata(URL url, List files, String label, String value, boolean accumulate)
  { String file;

    Iterator fileIter = files.iterator();
    while (fileIter.hasNext()) {
      file = fileIter.next().toString();

      System.out.println(url.toString() + " " + file + ": " + label + "=" + value);
    }
  }

  /**
   *  This extractor doesn't need to do any preparation/completion work,
   *  so this member function is empty.
   */
  public void endPass(int passNo)
  { // Intentionally left blank
  }

  /**
   *  This extractor is a simple, single-pass extractor
   *
   *  @see: org.greenstone.gsdl3.gs3build.extractor.ExtractorInterface:getNumberOfPasses
   */
  public int getNumberOfPasses()
  { return 1;
  }
}