source: trunk/greenstone3-extensions/gs3build/src/org/greenstone/gsdl3/gs3build/doctypes/GS2PerlRecogniser.java@ 12191

Last change on this file since 12191 was 12191, checked in by kjdon, 18 years ago

committed some changes that I had made ages ago. Not sure if it still compiles - I need to write an ant build file for this, and check compilation. Will do it once I need to - its unclear whether anyone will ever use this again

  • Property svn:keywords set to Author Date Id Revision
File size: 4.1 KB
Line 
1package org.greenstone.gsdl3.gs3build.doctypes;
2
3import java.util.ArrayList;
4import java.io.File;
5import java.net.URL;
6import org.w3c.dom.Element;
7
8import org.greenstone.gsdl3.util.GSFile;
9import org.greenstone.gsdl3.util.Processing;
10import org.greenstone.gsdl3.gs3build.util.HTTPTools;
11
12public class GS2PerlRecogniser extends AbstractRecogniser
13{
14 public String plugin_name;
15 public GS2PerlRecogniser()
16 {
17 //this.preferred_mime_type = "application/pdf";
18 //this.preferred_mime_type = "text/plain";
19 this.filename_extensions = new ArrayList();
20 //this.filename_extensions.add(".pdf");
21 //this.filename_extensions.add(".txt");
22 this.document_type = METSDocument.METS_DOCUMENT_TYPE;
23 }
24
25 public boolean configure(Element config_elem) {
26 String mime_type = config_elem.getAttribute("mimeType");
27 if (mime_type.equals("")) {
28 System.err.println("GS2PerlRecogniser configure Error: no mimetype specified");
29 return false;
30 }
31 this.preferred_mime_type = mime_type;
32 String plugin = config_elem.getAttribute("plugin");
33 if (plugin.equals("")) {
34 System.err.println("GS2PerlRecogniser configure Error: no plugin specified");
35 return false;
36 }
37 this.plugin_name = plugin;
38
39 String extensions = config_elem.getAttribute("filenameExtensions");
40 if (extensions.equals("")) {
41 System.err.println("GS2PerlRecogniser configure Error: no filename extensions specified");
42 return false;
43 }
44 String [] exts = extensions.split("|");
45 for (int i=0; i<exts.length; i++) {
46 this.filename_extensions.add(exts[i]);
47 }
48 return true;
49
50 }
51 public boolean parseDocument(URL url)
52 {
53 String filename = null;
54
55 if (url.getProtocol().equals("file")) {
56 filename = url.getPath();
57 }
58
59 if (filename != null) {
60 if ( isAcceptedFilename(filename)) {
61
62 System.out.println("Posting "+document_type+" Document " + filename);
63 METSDocument doc = processFile(url, filename);
64
65 if (doc != null) {
66 this.list_repository.addDocument(doc);
67 return true;
68 }
69 return false;
70 }
71 } else {
72 // Get Mime type remotely, and then proceed if required
73 String mimeType = HTTPTools.getMIMEType(url);
74
75 if (mimeType == this.preferred_mime_type) {
76 System.out.println("Posting "+document_type+" Document " + url.toString());
77 METSDocument doc = processFile(url, null);
78 if (doc != null) {
79
80 this.list_repository.addDocument(doc);
81 return true;
82 }
83 return false;
84 }
85 }
86 return false;
87 }
88
89
90 protected METSDocument processFile(URL url, String filename) {
91
92 // we can only do files at this stage
93 if (filename == null) return null;
94 File file = new File(filename);
95 String local_dir = file.getParent();
96 System.err.println("local dir = "+local_dir);
97
98 //String command = "importone.pl -saveas METS -collectdir /research/kjdon/home/gsdl3/web/sites/localsite/collect -importdir "+local_dir+" -archivedir "+local_dir+" -- kath TEXTPlug "+ file.getName();
99 String command = "importone.pl -saveas METS -collectdir "+GSFile.collectDir(this.coll_manager.siteHome)+" -importdir "+local_dir+" -archivedir "+local_dir+" -- "+this.coll_manager.collectionName +" " +this.plugin_name+" "+ file.getName();
100 System.err.println("Command = "+command);
101
102 int exit_value = Processing.runProcess(command);
103 if (exit_value == 0) {
104 System.out.println("converted success");
105 } else {
106 System.out.println("Unable to convert to mets");
107
108 return null ;
109 }
110
111 //read in the archives.inf file to see the directory for the new mets
112 File arcinfo = new File(local_dir, "archives.inf");
113 if (!arcinfo.isFile()) {
114 System.err.println("cant find archives.inf file ("+arcinfo.getPath()+", aborting");
115 return null;
116 }
117 try {
118 String content = new String(GSFile.readFile(arcinfo.getPath()));
119 content = content.trim();
120 System.err.println("content = "+content);
121 String items[] = content.split("\\s+");
122 String filepath = items[1];
123 // delete the archive file
124 arcinfo.delete();
125 File mets_file = new File(local_dir, filepath);
126 System.err.println("mets file = "+mets_file.getPath());
127 return new METSDocument(mets_file.toURL());
128 } catch (Exception e) {
129 System.err.println("processfile: "+e);
130 return null;
131 }
132 }
133}
134
135
136
Note: See TracBrowser for help on using the repository browser.