source: trunk/greenstone3-extensions/vishnu/src/vishnu/builder/LuceneWrapper.java@ 8290

Last change on this file since 8290 was 8290, checked in by kjdon, 20 years ago

some mods, not sure what they are. will tidy them up soon, but didn't want to leave the package uncompilable so committing this now

  • Property svn:keywords set to Author Date Id Revision
File size: 3.8 KB
Line 
1package vishnu.builder;
2
3import java.io.*;
4import java.util.*;
5import org.apache.lucene.analysis.standard.StandardAnalyzer;
6import org.apache.lucene.index.IndexWriter;
7
8
9public class LuceneWrapper extends Indexer
10{
11
12 private static void usage()
13 {
14 System.out.println("java LucenceWrapper -i InputDirectory \n -o OutputDirectory \n -c CollectionName");
15 }
16
17
18 public static void main(String[] args)
19 {
20 String in = null;
21 String out = null;
22 String coll = null;
23
24 if( args.length == 0 ){
25 usage();
26 System.exit(0);
27 }
28
29 int a = 0;
30 while( a < args.length ){
31 if( args[a].equals("-i") )
32 in = args[++a];
33 if( args[a].equals("-o") )
34 out = args[++a];
35 if( args[a].equals("-c") )
36 coll = args[++a];
37 a++;
38 }
39 if (out==null || in==null|| coll==null) {
40 usage();
41 System.exit(0);
42 }
43 if (!out.endsWith(File.separator)) {
44 out += File.separator;
45 }
46 if (!in.endsWith(File.separator)) {
47 in += File.separator;
48 }
49
50 LuceneWrapper luc = new LuceneWrapper();
51
52 out = out + "luc_index";
53 luc.setCollectionName(coll);
54 luc.setOutputDirectory(out);
55 luc.setInputDirectory(in);
56 luc.startIndexing();
57 }
58
59
60 public LuceneWrapper()
61 {
62 super("Lucene Indexer");
63 }
64
65 public void startIndexing() {
66 initialize();
67 try {
68 IndexWriter writer = new IndexWriter(outputDir, new StandardAnalyzer(), true);
69 // this is a bit of a hack putting import here
70 indexFiles(writer, new File(inputDir), "import/");
71 writer.optimize();
72 writer.close();
73 } catch (Exception e) {
74 System.err.println("Error creating the Lucene index: "+e);
75 System.exit(0);
76 }
77
78 }
79
80 private void indexFiles(IndexWriter writer, File input_dir, String current_path) {
81 File [] doc_list = input_dir.listFiles();
82 for (int i=0; i<doc_list.length; i++) {
83 File f = doc_list[i];
84 if (f.isDirectory()) {
85 indexFiles(writer, f, current_path+f.getName()+File.separator);
86 } else {
87 try {
88 writer.addDocument(FileDocument.Document(f, current_path+f.getName()));
89 } catch (Exception e) {
90 System.err.println("Error indexing document "+current_path+f.getName()+": "+e);
91 }
92 }
93 }
94
95 }
96
97
98 public void startIndexingOld()
99 {
100
101 if( inputDir == null || outputDir == null ){
102 usage();
103 System.exit(0);
104 }
105
106
107 /**** check if output directory exists, if not create it ****/
108
109
110 File f = new File(outputDir);
111 if( !f.exists() ){
112 System.out.println("Creating output directory..");
113
114 StringTokenizer strt = new StringTokenizer(outputDir,"/");
115 String d = "";
116 while(strt.hasMoreTokens()){
117 String str = strt.nextToken();
118 if( str.indexOf(".") == -1 ){
119 d += "/" + str;
120 if( !(new File(d)).exists() ){
121 try{
122 Runtime rt = Runtime.getRuntime();
123 Process proc = rt.exec("mkdir " + d);
124 int exitVal = proc.waitFor();
125 }catch(Exception ex){ex.printStackTrace();}
126 }
127 }
128 }
129 }
130
131 File docList = new File(inputDir + "/docs.lst");
132
133 if (!docList.exists()){
134 System.out.println("No documents found");
135 System.exit(0);
136 }
137
138 Vector docs = new Vector();
139
140 try{
141 BufferedReader dis = new BufferedReader(new InputStreamReader(new FileInputStream(docList)));
142 String line = "";
143 while( (line=dis.readLine())!=null ){
144 docs.addElement(new String(inputDir+"/" + line));
145 }
146 dis.close();
147 } catch(IOException ex){ex.printStackTrace();}
148
149 try{
150
151 IndexWriter writer = new IndexWriter(outputDir, new StandardAnalyzer(), true);
152
153 for( int i = 0; i < docs.size(); i++ ){
154 File file = new File((String)docs.elementAt(i));
155 //writer.addDocument(FileDocument.Document(file));
156 }
157 writer.optimize();
158 writer.close();
159 }catch(Exception ex){ex.printStackTrace();}
160 }
161}
162
Note: See TracBrowser for help on using the repository browser.