source: trunk/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneDelete.java@ 12264

Last change on this file since 12264 was 12264, checked in by mdewsnip, 18 years ago

New classes to support incremental building with Lucene, many thanks to John Thompson and DL Consulting Ltd.

  • Property svn:keywords set to Author Date Id Revision
File size: 6.7 KB
Line 
1/** @file GS2LuceneDelete.java
2 *
3 * Provides a wrapper to the document deleting features of Lucene.
4 *
5 * This java application makes use of the existing Lucene class IndexModifier
6 * to access and make changes to the information stored about documents in a
7 * Lucene database. This is an essential component of the IncrementalBuilder
8 * PERL module, and endevours to make editing the text and metadata of
9 * documents without having to rebuild the entire collection a reality (in
10 * other words, true incremental/dynamic building).
11 *
12 * A component of the Greenstone digital library software from the New Zealand
13 * Digital Library Project at the University of Waikato, New Zealand.
14 *
15 * This program is free software; you can redistribute it and/or modify it
16 * under the terms of the GNU General Public License as published by the Free
17 * Software Foundation; either version 2 of the License, or (at your option)
18 * any later version.
19 *
20 * This program is distributed in the hope that it will be useful, but WITHOUT
21 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
22 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
23 * more details.
24 *
25 * You should have received a copy of the GNU General Public License along
26 * with this program; if not, write to the Free Software Foundation, Inc., 675
27 * Mass Ave, Cambridge, MA 02139, USA.
28 *
29 * Copyright (c) 2006 DL Consulting Ltd., New Zealand
30 */
31
32package org.nzdl.gsdl.LuceneWrap;
33
34import java.io.IOException;
35import java.util.Arrays;
36import java.util.Enumeration;
37import java.util.Vector;
38
39import org.apache.lucene.analysis.Analyzer;
40import org.apache.lucene.analysis.standard.StandardAnalyzer;
41import org.apache.lucene.document.Document;
42import org.apache.lucene.document.Field;
43
44import org.nzdl.gsdl.LuceneWrap.GS2IndexModifier;
45
46/** Contains methods for deleting a document that has previously been indexed
47 * into a Lucene database.
48 * @author John Thompson, DL Consulting Ltd. (unless stated otherwise)
49 */
50public class GS2LuceneDelete
51{
52 /** This is the main entry point to the deletor and is responsible for
53 * parsing the arguments and creating an instance of the deletor class.
54 *
55 * @param args The arguments passed into the application as a string
56 * array
57 * @return An integer describing the exit state of the application
58 * @throws Exception on any fatal error state
59 */
60 static public void main (String args[])
61 throws Exception
62 {
63 // Parse arguments
64 int node_id = -1;
65 String index_path = "";
66
67 for (int i = 0; i < args.length; i += 2)
68 {
69 if (args[i].equals("--index"))
70 {
71 index_path = args[i + 1];
72 }
73 else if (args[i].equals("--nodeid"))
74 {
75 String temp = args[i + 1];
76 node_id = Integer.parseInt(temp);
77 temp = null; // Off to the gc with you!
78 }
79 else
80 {
81 System.out.println("Error! Unknown argument: " + args[i]);
82 GS2LuceneDelete.printUsage();
83 }
84 }
85
86 // Check arguments
87 if(index_path.equals(""))
88 {
89 System.out.println("Error! Missing index path");
90 GS2LuceneDelete.printUsage();
91 }
92 if(node_id == -1)
93 {
94 System.out.println("Error! Missing or invalid Node ID");
95 GS2LuceneDelete.printUsage();
96 }
97
98
99 // Instantiate deletor, and perform the delete
100 GS2LuceneDelete deletor = new GS2LuceneDelete(index_path);
101 deletor.deleteDocument(node_id);
102 deletor.destroy();
103 deletor = null;
104 }
105 /** main() **/
106
107 /** **/
108 private boolean debug = true;
109
110 /** **/
111 private GS2IndexModifier index_modifier;
112
113 /** Constructor which takes the path to the Lucene index to be edited.
114 *
115 * @param index_path The full path to the index directory as a String
116 */
117 public GS2LuceneDelete(String index_path)
118 throws IOException
119 {
120 Analyzer analyzer = new StandardAnalyzer();
121 index_modifier = new GS2IndexModifier(index_path, analyzer);
122 }
123 /** GS2LuceneDelete **/
124
125 /** When called prints a debug message but only if debugging is enabled.
126 */
127 public void debug(String message)
128 {
129 if(debug)
130 {
131 System.err.println(message);
132 }
133 }
134 /** debug() **/
135
136 /** Destructor which unallocates connection to Lucene.
137 */
138 public void destroy()
139 throws IOException
140 {
141 index_modifier.close();
142 index_modifier = null;
143 }
144
145 /** Delete the indicated document from the Lucene index. This process is
146 * very similar to the initial step of index editing.
147 *
148 * @param node_id The unique identifier of a Lucene document as an
149 * integer
150 */
151 public void deleteDocument(int node_id)
152 throws IOException
153 {
154 debug("GS2LuceneDelete.deleteDocument(" + node_id + ")");
155 debug("- Initial number of documents in index: " + index_modifier.docCount());
156 // Retrieve the document requested
157 int doc_num = index_modifier.getDocNumByNodeID(node_id);
158 if (doc_num != -1)
159 {
160 debug("* Found document #" + doc_num);
161 // Retrieve the actual document
162 Document document = index_modifier.document(doc_num);
163 // Remove the document from the index before modifying
164 index_modifier.deleteDocument(doc_num);
165 debug("* Removed document from index");
166 }
167 else
168 {
169 debug("- No such document!");
170 }
171 debug("- Final number of documents in index: " + index_modifier.docCount());
172 }
173 /** editIndex() **/
174
175 /** Display program usage message.
176 */
177 static public void printUsage()
178 {
179 System.out.println("usage: GS2LuceneDelete --index <path> --nodeid <int>");
180 System.out.println("");
181 System.out.println("where:");
182 System.out.println(" index - is the full path to the directory containing the directory");
183 System.out.println(" to edit, including the level (ie didx, sidx)");
184 System.out.println(" nodeid - the unique identifier of the document to delete. This is the");
185 System.out.println(" same as the docnum in the GDBM");
186 System.out.println("");
187 System.exit(0);
188 }
189 /** printUsage() **/
190
191}
192/** class GS2LuceneDelete **/
Note: See TracBrowser for help on using the repository browser.