source: trunk/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneIndexer.java@ 13686

Last change on this file since 13686 was 13686, checked in by kjdon, 17 years ago

package has changed to org.greenstone.LuceneWrapper to be consistent with other indexer packages

  • Property svn:keywords set to Author Date Id Revision
File size: 3.4 KB
Line 
1/**********************************************************************
2 *
3 * GS2LuceneIndexer.java
4 *
5 * Copyright 2004 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27package org.greenstone.LuceneWrapper;
28
29
30import java.io.*;
31import java.util.Vector;
32
33/**
34 * class for indexing XML generated by lucenebuildproc.pm
35 */
36
37public class GS2LuceneIndexer {
38
39 public static void main (String args[]) throws Exception
40 {
41
42 int verbosity = 1;
43 boolean create = false;
44
45 Vector filtered_args = new Vector();
46
47 int argc = args.length;
48 int i = 0;
49 while (i<argc) {
50 if (args[i].startsWith("-")) {
51
52 // -create causes build to be incremental
53 if (args[i].equals("-create")) {
54 create = true;
55 }
56
57 // -verbosity [num]
58 else if (args[i].equals("-verbosity")) {
59 i++;
60 if (i<argc) {
61 verbosity = Integer.parseInt(args[i]);
62 }
63 }
64 else {
65 System.out.println("Unrecognised option: " + args[i]);
66 }
67 }
68 else {
69 filtered_args.add((Object)args[i]);
70 }
71 i++;
72 }
73
74 if (filtered_args.size() != 3) {
75 System.out.println("Usage: java GS2LuceneIndexer [-create|-verbosity [num]] doc-tag-level building_dir index");
76 return;
77 }
78
79 String doc_tag_level = (String)filtered_args.get(0);
80 String building_dirname = (String)filtered_args.get(1);
81 String index_dirname = (String)filtered_args.get(2);
82
83 String import_dirname = building_dirname + File.separator + "text";
84
85 File import_dir = new File(import_dirname);
86 File building_dir = new File(building_dirname);
87
88 if (!import_dir.exists()) {
89 System.out.println("Couldn't find import directory: "+import_dirname);
90 return;
91 }
92
93 File idx_dir = new File(building_dir.getPath()+File.separator+index_dirname+File.separator);
94 idx_dir.mkdir();
95
96 // Set up indexer
97 IndexXML indexer = new IndexXML(doc_tag_level,idx_dir, create);
98 indexer.init();
99
100 // Read from stdin the files to process
101 try {
102 InputStreamReader isr = new InputStreamReader(System.in, "UTF-8");
103 BufferedReader brin = new BufferedReader(isr);
104
105 StringBuffer xml_text = new StringBuffer(1024);
106 String line = null;
107 while ((line = brin.readLine()) != null) {
108 xml_text.append(line);
109 if (line.startsWith("</Doc>")) {
110 indexer.indexFile(xml_text.toString());
111 xml_text = new StringBuffer(1024);
112 }
113 //File xml_file = new File(import_dir + File.separator + line);
114 //indexer.indexFile(xml_file); // ****
115 }
116
117 brin.close();
118 isr.close();
119
120 } catch (IOException e) {
121 System.err.println("Error: unable to read from stdin");
122 e.printStackTrace();
123 }
124
125 indexer.finish();
126 }
127}
Note: See TracBrowser for help on using the repository browser.