source: trunk/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneIndexer.java@ 12257

Last change on this file since 12257 was 12257, checked in by mdewsnip, 18 years ago

Added package definitions.

  • Property svn:keywords set to Author Date Id Revision
File size: 2.5 KB
Line 
1/**
2 *
3 * class for indexing XML generated by lucenebuildproc.pm
4 * @author [email protected]
5 * @author [email protected]
6 * @version
7 */
8
9package org.nzdl.gsdl.LuceneWrap;
10
11
12import java.io.*;
13import java.util.Vector;
14
15public class GS2LuceneIndexer {
16
17 public static void main (String args[]) throws Exception
18 {
19
20 int verbosity = 1;
21 boolean create = false;
22
23 Vector filtered_args = new Vector();
24
25 int argc = args.length;
26 int i = 0;
27 while (i<argc) {
28 if (args[i].startsWith("-")) {
29
30 // -create causes build to be incremental
31 if (args[i].equals("-create")) {
32 create = true;
33 }
34
35 // -verbosity [num]
36 else if (args[i].equals("-verbosity")) {
37 i++;
38 if (i<argc) {
39 verbosity = Integer.parseInt(args[i]);
40 }
41 }
42 else {
43 System.out.println("Unrecognised option: " + args[i]);
44 }
45 }
46 else {
47 filtered_args.add((Object)args[i]);
48 }
49 i++;
50 }
51
52 if (filtered_args.size() != 3) {
53 System.out.println("Usage: java GS2LuceneIndexer [-create|-verbosity [num]] doc-tag-level building_dir index");
54 return;
55 }
56
57 String doc_tag_level = (String)filtered_args.get(0);
58 String building_dirname = (String)filtered_args.get(1);
59 String index_dirname = (String)filtered_args.get(2);
60
61 String import_dirname = building_dirname + File.separator + "text";
62
63 File import_dir = new File(import_dirname);
64 File building_dir = new File(building_dirname);
65
66 if (!import_dir.exists()) {
67 System.out.println("Couldn't find import directory: "+import_dirname);
68 return;
69 }
70
71 File idx_dir = new File(building_dir.getPath()+File.separator+index_dirname+File.separator);
72 idx_dir.mkdir();
73
74 // Set up indexer
75 IndexXML indexer = new IndexXML(doc_tag_level,idx_dir, create);
76 indexer.init();
77
78 // Read from stdin the files to process
79 try {
80 InputStreamReader isr = new InputStreamReader(System.in, "UTF-8");
81 BufferedReader brin = new BufferedReader(isr);
82
83 StringBuffer xml_text = new StringBuffer(1024);
84 String line = null;
85 while ((line = brin.readLine()) != null) {
86 xml_text.append(line);
87 if (line.startsWith("</Doc>")) {
88 indexer.indexFile(xml_text.toString());
89 xml_text = new StringBuffer(1024);
90 }
91 //File xml_file = new File(import_dir + File.separator + line);
92 //indexer.indexFile(xml_file); // ****
93 }
94
95 brin.close();
96 isr.close();
97
98 } catch (IOException e) {
99 System.err.println("Error: unable to read from stdin");
100 e.printStackTrace();
101 }
102
103 indexer.finish();
104 }
105}
Note: See TracBrowser for help on using the repository browser.