source: trunk/greenstone3-extensions/vishnu/src/vishnu/builder/LuceneWrapper.java@ 8189

Last change on this file since 8189 was 8189, checked in by kjdon, 20 years ago

first version of Imperial College's Visualiser code

  • Property svn:keywords set to Author Date Id Revision
File size: 4.4 KB
Line 
1package vishnu.builder;
2
3import java.io.*;
4import java.util.*;
5import org.apache.lucene.analysis.standard.StandardAnalyzer;
6import org.apache.lucene.index.IndexWriter;
7
8
9public class LuceneWrapper extends Indexer
10{
11
12 private static void usage()
13 {
14 System.out.println("java LucenceWrapper -i InputDirectory \n -o OutputDirectory \n -c CollectionName");
15 }
16
17
18 public static void main(String[] args)
19 {
20 String in = null;
21 String out = null;
22 String coll = null;
23
24 if( args.length == 0 ){
25 usage();
26 System.exit(0);
27 }
28
29 int a = 0;
30 while( a < args.length ){
31 if( args[a].equals("-i") )
32 in = args[++a];
33 if( args[a].equals("-o") )
34 out = args[++a];
35 if( args[a].equals("-c") )
36 coll = args[++a];
37 a++;
38 }
39 if (out==null || in==null|| coll==null) {
40 usage();
41 System.exit(0);
42 }
43 if (!out.endsWith(File.separator)) {
44 out += File.separator;
45 }
46 if (!in.endsWith(File.separator)) {
47 in += File.separator;
48 }
49
50 LuceneWrapper luc = new LuceneWrapper();
51
52 out = out + "luc_index";
53 luc.setCollectionName(coll);
54 luc.setOutputDirectory(out);
55 luc.setInputDirectory(in);
56 luc.startIndexing();
57 }
58
59
60 public LuceneWrapper()
61 {
62 super("Lucene Indexer");
63 }
64
65 public void startIndexing() {
66
67 if( inputDir == null || outputDir == null ){
68 usage();
69 System.exit(0);
70 }
71
72 /* check if index dir exists */
73 File in = new File(inputDir);
74 if (!in.isDirectory()) {
75 System.err.println("input directory ("+inputDir+") not a directory, exiting...");
76 System.exit(0);
77 }
78
79 /**** check if output directory exists, if not create it ****/
80 File f = new File(outputDir);
81 if( !f.exists() ){
82 try {
83 f.mkdirs();
84 } catch (Exception e) {
85 System.err.println("Couldn't create output directory "+outputDir);
86 System.err.println(e);
87 System.exit(0);
88 }
89 }
90 try {
91 IndexWriter writer = new IndexWriter(outputDir, new StandardAnalyzer(), true);
92 // this is a bit of a hack putting import here
93 indexFiles(writer, in, "import/");
94 writer.optimize();
95 writer.close();
96 } catch (Exception e) {
97 System.err.println("Error creating the Lucene index: "+e);
98 System.exit(0);
99 }
100
101 }
102
103 private void indexFiles(IndexWriter writer, File input_dir, String current_path) {
104 File [] doc_list = input_dir.listFiles();
105 for (int i=0; i<doc_list.length; i++) {
106 File f = doc_list[i];
107 if (f.isDirectory()) {
108 indexFiles(writer, f, current_path+f.getName()+File.separator);
109 } else {
110 try {
111 writer.addDocument(FileDocument.Document(f, current_path+f.getName()));
112 } catch (Exception e) {
113 System.err.println("Error indexing document "+current_path+f.getName()+": "+e);
114 }
115 }
116 }
117
118 }
119
120
121 public void startIndexingOld()
122 {
123
124 if( inputDir == null || outputDir == null ){
125 usage();
126 System.exit(0);
127 }
128
129
130 /**** check if output directory exists, if not create it ****/
131
132
133 File f = new File(outputDir);
134 if( !f.exists() ){
135 System.out.println("Creating output directory..");
136
137 StringTokenizer strt = new StringTokenizer(outputDir,"/");
138 String d = "";
139 while(strt.hasMoreTokens()){
140 String str = strt.nextToken();
141 if( str.indexOf(".") == -1 ){
142 d += "/" + str;
143 if( !(new File(d)).exists() ){
144 try{
145 Runtime rt = Runtime.getRuntime();
146 Process proc = rt.exec("mkdir " + d);
147 int exitVal = proc.waitFor();
148 }catch(Exception ex){ex.printStackTrace();}
149 }
150 }
151 }
152 }
153
154 File docList = new File(inputDir + "/docs.lst");
155
156 if (!docList.exists()){
157 System.out.println("No documents found");
158 System.exit(0);
159 }
160
161 Vector docs = new Vector();
162
163 try{
164 BufferedReader dis = new BufferedReader(new InputStreamReader(new FileInputStream(docList)));
165 String line = "";
166 while( (line=dis.readLine())!=null ){
167 docs.addElement(new String(inputDir+"/" + line));
168 }
169 dis.close();
170 } catch(IOException ex){ex.printStackTrace();}
171
172 try{
173
174 IndexWriter writer = new IndexWriter(outputDir, new StandardAnalyzer(), true);
175
176 for( int i = 0; i < docs.size(); i++ ){
177 File file = new File((String)docs.elementAt(i));
178 //writer.addDocument(FileDocument.Document(file));
179 }
180 writer.optimize();
181 writer.close();
182 }catch(Exception ex){ex.printStackTrace();}
183 }
184}
185
Note: See TracBrowser for help on using the repository browser.