source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/GS2LuceneRetrieve.java@ 13916

Last change on this file since 13916 was 13916, checked in by kjdon, 17 years ago

levels now use displayItems for display text in the same way that indexes do.

  • Property svn:keywords set to Author Date Id Revision
File size: 5.6 KB
Line 
1/*
2 * GS2LuceneRetrieve.java
3 * Copyright (C) 2005 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.GSException;
23import org.greenstone.gsdl3.util.GSFile;
24import org.greenstone.gsdl3.util.GSXML;
25import org.greenstone.gsdl3.util.GDBMWrapper;
26import org.greenstone.gsdl3.util.DBInfo;
27import org.greenstone.gsdl3.util.GSHTML;
28import org.greenstone.gsdl3.util.OID;
29// XML classes
30import org.w3c.dom.Document;
31import org.w3c.dom.Element;
32import org.w3c.dom.Text;
33
34// General Java classes
35import java.io.File;
36
37import org.apache.log4j.Logger;
38
39/** Retrieve documents from a gs2 lucene collection. Note that this doesn't
40 actually use lucene, as the documents are stored in XML files */
41public class GS2LuceneRetrieve
42 extends AbstractGS2DocumentRetrieve
43{
44
45 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneRetrieve.class.getName());
46
47
48 protected static final String DOC_LEVEL="Doc";
49 protected static final String SEC_LEVEL="Sec";
50 protected static final String ID_ATT = "gs2:id";
51
52 // Parameters used
53 private static final String LEVEL_PARAM = "level";
54
55 // Elements used in the config file that are specific to this class
56 private static final String DEFAULT_LEVEL_ELEM = "defaultLevel";
57
58 private String default_level = null;
59 private String text_dir = null;
60
61 private boolean text_available = true;
62
63 public GS2LuceneRetrieve() {
64 }
65
66 public void cleanUp() {
67 super.cleanUp();
68 }
69
70 /** configure this service */
71 public boolean configure(Element info, Element extra_info)
72 {
73 if (!super.configure(info, extra_info)){
74 return false;
75 }
76
77 // Do specific configuration
78 logger.info("Configuring GS2LuceneRetrieve...");
79
80 text_dir = GSFile.collectionIndexDir(this.site_home, this.cluster_name) + File.separatorChar+"text"+File.separatorChar;
81 if (!(new File(text_dir).isDirectory())) {
82 logger.error("Text directory "+text_dir+" does not exist, will be unable to retrieve text for "+cluster_name);
83 text_available = false;
84 return true; // return true so that we still get the other services for the collection
85 }
86 // Get the default level out of <defaultLevel> (buildConfig.xml)
87 Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_LEVEL_ELEM);
88 if (def != null) {
89 this.default_level = def.getAttribute(GSXML.SHORTNAME_ATT);
90 }
91 if (this.default_level == null || this.default_level.equals("")) {
92 logger.error("Default level not specified for "+this.cluster_name+", assuming "+DOC_LEVEL);
93 this.default_level = DOC_LEVEL;
94 }
95
96 return true;
97
98 }
99
100 /** returns the content of a node
101 * should return a nodeContent element:
102 * <nodeContent>text content or other elements</nodeContent>
103 */
104 protected Element getNodeContent(String doc_id, String lang) throws GSException {
105 String doc_content = getTextString("TextRetrievalError", lang);
106 try {
107 if (!text_available) {
108 throw new Exception("No text directory available");
109 }
110
111 long doc_num = this.gdbm_src.OID2Docnum(doc_id);
112 if (doc_num == -1) {
113 throw new Exception("OID "+doc_id +" couldn't be converted to lucene doc num");
114 }
115
116 DBInfo info=this.gdbm_src.getInfo(OID.getTop(doc_id));
117 if (info == null) {
118 throw new Exception("Couldn't get GDBM database entry for "+OID.getTop(doc_id));
119 }
120
121 String archivedir=info.getInfo("archivedir");
122 File doc_xml_file = new File(text_dir+archivedir+File.separatorChar+"doc.xml");
123 if (!doc_xml_file.isFile()) {
124 throw new Exception("Doc XML file "+doc_xml_file.getPath()+" does not exist");
125 }
126 Document doc_xml_doc = this.converter.getDOM(doc_xml_file);
127 if (doc_xml_doc == null) {
128 throw new Exception("Couldn't parse file "+doc_xml_file.getPath());
129 }
130 Element full_document = doc_xml_doc.getDocumentElement();
131 if (full_document == null) {
132 throw new Exception("Couldn't parse file "+doc_xml_file.getPath());
133 }
134 Element current_section = null;
135 if (default_level.equals(DOC_LEVEL)) {
136 current_section = full_document;
137 } else {
138 current_section = GSXML.getNamedElement(full_document, SEC_LEVEL, ID_ATT, String.valueOf(doc_num));
139 }
140 if (current_section == null) {
141 throw new Exception("Couldn't find section "+ doc_num+" in file "+doc_xml_file.getPath());
142 }
143 doc_content = GSXML.getNodeText(current_section);
144 if (doc_content == null) {
145 doc_content = "";
146 } else {
147 doc_content = resolveTextMacros(doc_content, doc_id, lang);
148 }
149 } catch (Exception e) {
150 logger.error("Error trying to get document text for "+doc_id+" in collection "+this.cluster_name+": "+e);
151 }
152
153 Element content_node = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
154 Text t = this.doc.createTextNode(doc_content);
155 content_node.appendChild(t);
156 return content_node;
157 }
158}
Note: See TracBrowser for help on using the repository browser.