source: main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper3/GS2LuceneDelete.java@ 24726

Last change on this file since 24726 was 24726, checked in by davidb, 13 years ago

Repackaging to LuceneWrapper3

  • Property svn:executable set to *
File size: 5.6 KB
Line 
1/** @file GS2LuceneDelete.java
2 *
3 * Provides a wrapper to the document deleting features of Lucene.
4 *
5 * This java application makes use of the existing Lucene class IndexModifier
6 * to access and make changes to the information stored about documents in a
7 * Lucene database. This is an essential component of the IncrementalBuilder
8 * PERL module, and endevours to make editing the text and metadata of
9 * documents without having to rebuild the entire collection a reality (in
10 * other words, true incremental/dynamic building).
11 *
12 * A component of the Greenstone digital library software from the New Zealand
13 * Digital Library Project at the University of Waikato, New Zealand.
14 *
15 * This program is free software; you can redistribute it and/or modify it
16 * under the terms of the GNU General Public License as published by the Free
17 * Software Foundation; either version 2 of the License, or (at your option)
18 * any later version.
19 *
20 * This program is distributed in the hope that it will be useful, but WITHOUT
21 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
22 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
23 * more details.
24 *
25 * You should have received a copy of the GNU General Public License along
26 * with this program; if not, write to the Free Software Foundation, Inc., 675
27 * Mass Ave, Cambridge, MA 02139, USA.
28 *
29 * Copyright (c) 2006 DL Consulting Ltd., New Zealand
30 */
31
32package org.greenstone.LuceneWrapper3;
33
34import java.io.IOException;
35import java.io.File;
36//import org.apache.lucene.analysis.standard.StandardAnalyzer;
37import org.apache.lucene.index.IndexWriter;
38import org.apache.lucene.index.Term;
39
40import org.apache.lucene.store.SimpleFSDirectory;
41import org.apache.lucene.index.IndexWriter.MaxFieldLength;
42
43
44/** Contains methods for deleting a document that has previously been indexed
45 * into a Lucene database.
46 * @author John Thompson, DL Consulting Ltd. (unless stated otherwise)
47 */
48public class GS2LuceneDelete
49{
50 /** This is the main entry point to the deletor and is responsible for
51 * parsing the arguments and creating an instance of the deletor class.
52 *
53 * @param args The arguments passed into the application as a string
54 * array
55 * @return An integer describing the exit state of the application
56 * @throws Exception on any fatal error state
57 */
58 static public void main (String args[])
59 throws Exception
60 {
61 // Parse arguments
62 String index_path = "";
63 int node_id = -1;
64
65 for (int i = 0; i < args.length; i += 2)
66 {
67 if (args[i].equals("--index"))
68 {
69 index_path = args[i + 1];
70 }
71 else if (args[i].equals("--nodeid"))
72 {
73 node_id = Integer.parseInt(args[i + 1]);
74 }
75 else
76 {
77 System.out.println("Error! Unknown argument: " + args[i]);
78 GS2LuceneDelete.printUsage();
79 System.exit(0);
80 }
81 }
82
83 // Check arguments
84 if (index_path.equals(""))
85 {
86 System.out.println("Error! Missing index path");
87 GS2LuceneDelete.printUsage();
88 System.exit(0);
89 }
90 if (node_id == -1)
91 {
92 System.out.println("Error! Missing or invalid Node ID");
93 GS2LuceneDelete.printUsage();
94 System.exit(0);
95 }
96
97 // Instantiate deletor, and perform the delete
98 GS2LuceneDelete deletor = new GS2LuceneDelete(index_path);
99 deletor.deleteDocument(node_id);
100 deletor.destroy();
101 deletor = null;
102 }
103
104
105 /** Display program usage message.
106 */
107 static public void printUsage()
108 {
109 System.out.println("usage: GS2LuceneDelete --index <path> --nodeid <int>");
110 System.out.println("");
111 System.out.println("where:");
112 System.out.println(" index - is the full path to the directory containing the directory");
113 System.out.println(" to edit, including the level (ie didx, sidx)");
114 System.out.println(" nodeid - the unique identifier of the document to delete. This is the");
115 System.out.println(" same as the docnum in the GDBM");
116 System.out.println("");
117 }
118
119
120 /** **/
121 private boolean debug = true;
122
123 /** **/
124 private IndexWriter index_writer = null;
125
126
127 /** Constructor which takes the path to the Lucene index to be edited.
128 *
129 * @param index_path The full path to the index directory as a String
130 */
131 public GS2LuceneDelete(String index_path)
132 throws IOException
133 {
134 SimpleFSDirectory index_path_dir = new SimpleFSDirectory(new File(index_path));
135 index_writer = new IndexWriter(index_path_dir, new GS2Analyzer(),
136 MaxFieldLength.UNLIMITED);
137 }
138
139
140 /** When called prints a debug message but only if debugging is enabled.
141 */
142 public void debug(String message)
143 {
144 if (debug)
145 {
146 System.err.println(message);
147 }
148 }
149
150
151 /** Destructor which unallocates connection to Lucene.
152 */
153 public void destroy()
154 throws IOException
155 {
156 index_writer.close();
157 index_writer = null;
158 }
159
160
161 /** Delete the indicated document from the Lucene index. This process is
162 * very similar to the initial step of index editing.
163 *
164 * @param node_id The unique identifier of a Lucene document as an
165 * integer
166 */
167 public void deleteDocument(int node_id)
168 throws IOException
169 {
170 debug("GS2LuceneDelete.deleteDocument(" + node_id + ")");
171 debug("- Initial number of documents in index: " + index_writer.numDocs());
172 index_writer.deleteDocuments(new Term("nodeid", "" + node_id));
173 debug("- Final number of documents in index: " + index_writer.numDocs());
174 }
175}
Note: See TracBrowser for help on using the repository browser.