Changeset 25649 for main


Ignore:
Timestamp:
2012-05-23T16:47:11+12:00 (12 years ago)
Author:
sjm84
Message:

Reformatting this file

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GS2LuceneRetrieve.java

    r20817 r25649  
    3636import org.apache.log4j.Logger;
    3737
    38 /** Retrieve documents from a gs2 lucene collection. Note that this doesn't
    39     actually use lucene, as the documents are stored in XML files */
    40 public class GS2LuceneRetrieve
    41     extends AbstractGS2DocumentRetrieve
     38/**
     39 * Retrieve documents from a gs2 lucene collection. Note that this doesn't
     40 * actually use lucene, as the documents are stored in XML files
     41 */
     42public class GS2LuceneRetrieve extends AbstractGS2DocumentRetrieve
    4243{
    43    
    44     static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneRetrieve.class.getName());
    45    
    46    
    47     protected static final String DOC_LEVEL="Doc";
    48     protected static final String SEC_LEVEL="Sec";
    49     protected static final String ID_ATT = "gs2:docOID";
    5044
    51     // Parameters used
    52     private static final String LEVEL_PARAM = "level";
    53    
    54     // Elements used in the config file that are specific to this class
    55     private static final String DEFAULT_LEVEL_ELEM = "defaultLevel";
    56    
    57     private String default_level = null;
    58     private String text_dir = null;
     45    static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneRetrieve.class.getName());
    5946
    60     private boolean text_available = true;
    61    
    62     public GS2LuceneRetrieve() {
    63     }
    64    
    65     public void cleanUp() {
    66     super.cleanUp();
    67     }
    68    
    69     /** configure this service */
    70     public boolean configure(Element info, Element extra_info)
    71     {
    72     if (!super.configure(info, extra_info)){
    73         return false;
     47    protected static final String DOC_LEVEL = "Doc";
     48    protected static final String SEC_LEVEL = "Sec";
     49    protected static final String ID_ATT = "gs2:docOID";
     50
     51    // Parameters used
     52    private static final String LEVEL_PARAM = "level";
     53
     54    // Elements used in the config file that are specific to this class
     55    private static final String DEFAULT_LEVEL_ELEM = "defaultLevel";
     56
     57    private String default_level = null;
     58    private String text_dir = null;
     59
     60    private boolean text_available = true;
     61
     62    public GS2LuceneRetrieve()
     63    {
    7464    }
    75    
    76     // Do specific configuration
    77     logger.info("Configuring GS2LuceneRetrieve...");
    78    
    79     text_dir = GSFile.collectionIndexDir(this.site_home, this.cluster_name) + File.separatorChar+"text"+File.separatorChar;
    80     if (!(new File(text_dir).isDirectory())) {
    81         logger.error("Text directory "+text_dir+" does not exist, will be unable to retrieve text for "+cluster_name);
    82         text_available = false;
    83         return true; // return true so that we still get the other services for the collection
     65
     66    public void cleanUp()
     67    {
     68        super.cleanUp();
    8469    }
    85     // Get the default level out of <defaultLevel> (buildConfig.xml)
    86     Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_LEVEL_ELEM);
    87     if (def != null) {
    88         this.default_level = def.getAttribute(GSXML.SHORTNAME_ATT);
     70
     71    /** configure this service */
     72    public boolean configure(Element info, Element extra_info)
     73    {
     74        if (!super.configure(info, extra_info))
     75        {
     76            return false;
     77        }
     78
     79        // Do specific configuration
     80        logger.info("Configuring GS2LuceneRetrieve...");
     81
     82        text_dir = GSFile.collectionIndexDir(this.site_home, this.cluster_name) + File.separatorChar + "text" + File.separatorChar;
     83        if (!(new File(text_dir).isDirectory()))
     84        {
     85            logger.error("Text directory " + text_dir + " does not exist, will be unable to retrieve text for " + cluster_name);
     86            text_available = false;
     87            return true; // return true so that we still get the other services for the collection
     88        }
     89        // Get the default level out of <defaultLevel> (buildConfig.xml)
     90        Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_LEVEL_ELEM);
     91        if (def != null)
     92        {
     93            this.default_level = def.getAttribute(GSXML.SHORTNAME_ATT);
     94        }
     95        if (this.default_level == null || this.default_level.equals(""))
     96        {
     97            logger.error("Default level not specified for " + this.cluster_name + ", assuming " + DOC_LEVEL);
     98            this.default_level = DOC_LEVEL;
     99        }
     100
     101        return true;
     102
    89103    }
    90     if (this.default_level == null || this.default_level.equals("")) {
    91         logger.error("Default level not specified for "+this.cluster_name+", assuming "+DOC_LEVEL);
    92         this.default_level = DOC_LEVEL;
    93     }
    94    
    95     return true;
    96    
    97     }
    98    
    99     /** returns the content of a node
    100      * should return a nodeContent element:
    101      * <nodeContent>text content or other elements</nodeContent>
    102      */
    103     protected Element getNodeContent(String doc_id, String lang) throws GSException {   
    104       String [] args = new String[1];
    105       args[0] = doc_id;
    106       String doc_content = getTextString("TextRetrievalError", lang, args);
    107     try {
    108         if (!text_available) {
    109         throw new Exception("No text directory available");
    110         }
    111            
    112         DBInfo info=this.coll_db.getInfo(OID.getTop(doc_id));
    113         if (info == null) {
    114         throw new Exception("Couldn't get database entry for "+OID.getTop(doc_id));
    115         }
    116        
    117         String archivedir=info.getInfo("archivedir");
    118         File doc_xml_file = new File(text_dir+archivedir+File.separatorChar+"doc.xml");
    119         if (!doc_xml_file.isFile()) {
    120         throw new Exception("Doc XML file "+doc_xml_file.getPath()+" does not exist");
    121         }
    122         Document doc_xml_doc = this.converter.getDOM(doc_xml_file, "utf-8");
    123         if (doc_xml_doc == null) {
    124         throw new Exception("Couldn't parse file "+doc_xml_file.getPath());
    125         }
    126         Element full_document = doc_xml_doc.getDocumentElement();
    127         if (full_document == null) {
    128         throw new Exception("Couldn't parse file "+doc_xml_file.getPath());
    129         }
    130         Element current_section = null;
    131         if (default_level.equals(DOC_LEVEL)) {
    132         current_section = full_document;
    133         } else {
    134         current_section = GSXML.getNamedElement(full_document, SEC_LEVEL, ID_ATT, doc_id);
    135         }
    136         if (current_section == null) {
    137         throw new Exception("Couldn't find section "+ doc_id+" in file "+doc_xml_file.getPath());
    138         }
    139         doc_content = GSXML.getNodeText(current_section);
    140         if (doc_content == null) {
    141         doc_content = "";
    142         } else {
    143         doc_content = resolveTextMacros(doc_content, doc_id, lang);
    144         }
    145     } catch (Exception e) {
    146         logger.error("Error trying to get document text for "+doc_id+" in collection "+this.cluster_name+": "+e);
     104
     105    /**
     106     * returns the content of a node should return a nodeContent element:
     107     * <nodeContent>text content or other elements</nodeContent>
     108     */
     109    protected Element getNodeContent(String doc_id, String lang) throws GSException
     110    {
     111        String[] args = new String[1];
     112        args[0] = doc_id;
     113        String doc_content = getTextString("TextRetrievalError", lang, args);
     114        try
     115        {
     116            if (!text_available)
     117            {
     118                throw new Exception("No text directory available");
     119            }
     120
     121            DBInfo info = this.coll_db.getInfo(OID.getTop(doc_id));
     122            if (info == null)
     123            {
     124                throw new Exception("Couldn't get database entry for " + OID.getTop(doc_id));
     125            }
     126
     127            String archivedir = info.getInfo("archivedir");
     128            File doc_xml_file = new File(text_dir + archivedir + File.separatorChar + "doc.xml");
     129            if (!doc_xml_file.isFile())
     130            {
     131                throw new Exception("Doc XML file " + doc_xml_file.getPath() + " does not exist");
     132            }
     133            Document doc_xml_doc = this.converter.getDOM(doc_xml_file, "utf-8");
     134            if (doc_xml_doc == null)
     135            {
     136                throw new Exception("Couldn't parse file " + doc_xml_file.getPath());
     137            }
     138            Element full_document = doc_xml_doc.getDocumentElement();
     139            if (full_document == null)
     140            {
     141                throw new Exception("Couldn't parse file " + doc_xml_file.getPath());
     142            }
     143            Element current_section = null;
     144            if (default_level.equals(DOC_LEVEL))
     145            {
     146                current_section = full_document;
     147            }
     148            else
     149            {
     150                current_section = GSXML.getNamedElement(full_document, SEC_LEVEL, ID_ATT, doc_id);
     151            }
     152            if (current_section == null)
     153            {
     154                throw new Exception("Couldn't find section " + doc_id + " in file " + doc_xml_file.getPath());
     155            }
     156            doc_content = GSXML.getNodeText(current_section);
     157            if (doc_content == null)
     158            {
     159                doc_content = "";
     160            }
     161            else
     162            {
     163                doc_content = resolveTextMacros(doc_content, doc_id, lang);
     164            }
     165        }
     166        catch (Exception e)
     167        {
     168            logger.error("Error trying to get document text for " + doc_id + " in collection " + this.cluster_name + ": " + e);
     169        }
     170
     171        Element content_node = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
     172        Text t = this.doc.createTextNode(doc_content);
     173        content_node.appendChild(t);
     174        return content_node;
    147175    }
    148    
    149     Element content_node = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
    150     Text t = this.doc.createTextNode(doc_content);
    151     content_node.appendChild(t);
    152     return content_node;
    153     }
    154176}
Note: See TracChangeset for help on using the changeset viewer.