1 | package org.greenstone.gsdl3.gs3build.doctypes;
|
---|
2 |
|
---|
3 | import java.util.ArrayList;
|
---|
4 | import java.io.File;
|
---|
5 | import java.net.URL;
|
---|
6 | import org.w3c.dom.Element;
|
---|
7 |
|
---|
8 | import org.greenstone.gsdl3.util.GSFile;
|
---|
9 | import org.greenstone.gsdl3.util.Processing;
|
---|
10 | import org.greenstone.gsdl3.gs3build.util.HTTPTools;
|
---|
11 |
|
---|
12 | public class GS2PerlRecogniser extends AbstractRecogniser
|
---|
13 | {
|
---|
14 | public String plugin_name;
|
---|
15 | public GS2PerlRecogniser()
|
---|
16 | {
|
---|
17 | //this.preferred_mime_type = "application/pdf";
|
---|
18 | //this.preferred_mime_type = "text/plain";
|
---|
19 | this.filename_extensions = new ArrayList();
|
---|
20 | //this.filename_extensions.add(".pdf");
|
---|
21 | //this.filename_extensions.add(".txt");
|
---|
22 | this.document_type = METSDocument.METS_DOCUMENT_TYPE;
|
---|
23 | }
|
---|
24 |
|
---|
25 | public boolean configure(Element config_elem) {
|
---|
26 | String mime_type = config_elem.getAttribute("mimeType");
|
---|
27 | if (mime_type.equals("")) {
|
---|
28 | System.err.println("GS2PerlRecogniser configure Error: no mimetype specified");
|
---|
29 | return false;
|
---|
30 | }
|
---|
31 | this.preferred_mime_type = mime_type;
|
---|
32 | String plugin = config_elem.getAttribute("plugin");
|
---|
33 | if (plugin.equals("")) {
|
---|
34 | System.err.println("GS2PerlRecogniser configure Error: no plugin specified");
|
---|
35 | return false;
|
---|
36 | }
|
---|
37 | this.plugin_name = plugin;
|
---|
38 |
|
---|
39 | String extensions = config_elem.getAttribute("filenameExtensions");
|
---|
40 | if (extensions.equals("")) {
|
---|
41 | System.err.println("GS2PerlRecogniser configure Error: no filename extensions specified");
|
---|
42 | return false;
|
---|
43 | }
|
---|
44 | String [] exts = extensions.split("|");
|
---|
45 | for (int i=0; i<exts.length; i++) {
|
---|
46 | this.filename_extensions.add(exts[i]);
|
---|
47 | }
|
---|
48 | return true;
|
---|
49 |
|
---|
50 | }
|
---|
51 | public boolean parseDocument(URL url)
|
---|
52 | {
|
---|
53 | String filename = null;
|
---|
54 |
|
---|
55 | if (url.getProtocol().equals("file")) {
|
---|
56 | filename = url.getPath();
|
---|
57 | }
|
---|
58 |
|
---|
59 | if (filename != null) {
|
---|
60 | if ( isAcceptedFilename(filename)) {
|
---|
61 |
|
---|
62 | System.out.println("Posting "+document_type+" Document " + filename);
|
---|
63 | METSDocument doc = processFile(url, filename);
|
---|
64 |
|
---|
65 | if (doc != null) {
|
---|
66 | this.list_repository.addDocument(doc);
|
---|
67 | return true;
|
---|
68 | }
|
---|
69 | return false;
|
---|
70 | }
|
---|
71 | } else {
|
---|
72 | // Get Mime type remotely, and then proceed if required
|
---|
73 | String mimeType = HTTPTools.getMIMEType(url);
|
---|
74 |
|
---|
75 | if (mimeType == this.preferred_mime_type) {
|
---|
76 | System.out.println("Posting "+document_type+" Document " + url.toString());
|
---|
77 | METSDocument doc = processFile(url, null);
|
---|
78 | if (doc != null) {
|
---|
79 |
|
---|
80 | this.list_repository.addDocument(doc);
|
---|
81 | return true;
|
---|
82 | }
|
---|
83 | return false;
|
---|
84 | }
|
---|
85 | }
|
---|
86 | return false;
|
---|
87 | }
|
---|
88 |
|
---|
89 |
|
---|
90 | protected METSDocument processFile(URL url, String filename) {
|
---|
91 |
|
---|
92 | // we can only do files at this stage
|
---|
93 | if (filename == null) return null;
|
---|
94 | File file = new File(filename);
|
---|
95 | String local_dir = file.getParent();
|
---|
96 | System.err.println("local dir = "+local_dir);
|
---|
97 |
|
---|
98 | //String command = "importone.pl -saveas METS -collectdir /research/kjdon/home/gsdl3/web/sites/localsite/collect -importdir "+local_dir+" -archivedir "+local_dir+" -- kath TEXTPlug "+ file.getName();
|
---|
99 | String command = "importone.pl -saveas METS -collectdir "+GSFile.collectDir(this.coll_manager.siteHome)+" -importdir "+local_dir+" -archivedir "+local_dir+" -- "+this.coll_manager.collectionName +" " +this.plugin_name+" "+ file.getName();
|
---|
100 | System.err.println("Command = "+command);
|
---|
101 |
|
---|
102 | int exit_value = Processing.runProcess(command);
|
---|
103 | if (exit_value == 0) {
|
---|
104 | System.out.println("converted success");
|
---|
105 | } else {
|
---|
106 | System.out.println("Unable to convert to mets");
|
---|
107 |
|
---|
108 | return null ;
|
---|
109 | }
|
---|
110 |
|
---|
111 | //read in the archives.inf file to see the directory for the new mets
|
---|
112 | File arcinfo = new File(local_dir, "archives.inf");
|
---|
113 | if (!arcinfo.isFile()) {
|
---|
114 | System.err.println("cant find archives.inf file ("+arcinfo.getPath()+", aborting");
|
---|
115 | return null;
|
---|
116 | }
|
---|
117 | try {
|
---|
118 | String content = new String(GSFile.readFile(arcinfo.getPath()));
|
---|
119 | content = content.trim();
|
---|
120 | System.err.println("content = "+content);
|
---|
121 | String items[] = content.split("\\s+");
|
---|
122 | String filepath = items[1];
|
---|
123 | // delete the archive file
|
---|
124 | arcinfo.delete();
|
---|
125 | File mets_file = new File(local_dir, filepath);
|
---|
126 | System.err.println("mets file = "+mets_file.getPath());
|
---|
127 | return new METSDocument(mets_file.toURL());
|
---|
128 | } catch (Exception e) {
|
---|
129 | System.err.println("processfile: "+e);
|
---|
130 | return null;
|
---|
131 | }
|
---|
132 | }
|
---|
133 | }
|
---|
134 |
|
---|
135 |
|
---|
136 |
|
---|