1 | package org.greenstone.gsdl3.gs3build.doctypes;
|
---|
2 |
|
---|
3 | import java.io.*;
|
---|
4 | import java.net.*;
|
---|
5 |
|
---|
6 | import org.greenstone.gsdl3.gs3build.metadata.*;
|
---|
7 |
|
---|
8 | public class IndexRecogniser implements RecogniserInterface
|
---|
9 | {
|
---|
10 | DocumentList listRepository;
|
---|
11 |
|
---|
12 | public IndexRecogniser(DocumentList listRepository)
|
---|
13 | { this.listRepository = listRepository;
|
---|
14 | }
|
---|
15 |
|
---|
16 | public boolean parseDocument(METSFile file)
|
---|
17 | {
|
---|
18 | String MIMEType = file.getMIMEType();
|
---|
19 | if (MIMEType == null ||
|
---|
20 | MIMEType.equals("text/plain")) {
|
---|
21 | URL location = file.getLocation();
|
---|
22 | return this.parseDocument(location);
|
---|
23 | }
|
---|
24 | return false;
|
---|
25 | }
|
---|
26 |
|
---|
27 | public boolean parseDocument(URL url)
|
---|
28 | { String fileName = null;
|
---|
29 |
|
---|
30 | if (url.toString().startsWith("file://"))
|
---|
31 | { fileName = url.toString().substring(7);
|
---|
32 | }
|
---|
33 | else if (url.toString().startsWith("file:/"))
|
---|
34 | { fileName = url.toString().substring(5);
|
---|
35 | }
|
---|
36 |
|
---|
37 | if (fileName != null) {
|
---|
38 | String leafName;
|
---|
39 | int leafAt = fileName.lastIndexOf(File.separator);
|
---|
40 | if (leafAt >= 0) {
|
---|
41 | leafName = fileName.substring(leafAt+1);
|
---|
42 | }
|
---|
43 | else {
|
---|
44 | leafName = fileName;
|
---|
45 | }
|
---|
46 |
|
---|
47 | if (leafName.equals("index.txt"))
|
---|
48 | {
|
---|
49 | System.out.println("Posting Index Document " + fileName);
|
---|
50 | IndexDocument doc = new IndexDocument(url);
|
---|
51 | this.listRepository.addDocument(doc);
|
---|
52 | // TODO: spawn knowledge of children too...
|
---|
53 | // System.out.println(doc.getDocumentText());
|
---|
54 | return true;
|
---|
55 | }
|
---|
56 | }
|
---|
57 | else {
|
---|
58 | // TODO: get Mime type remotely, and then proceed if required
|
---|
59 | }
|
---|
60 | return false;
|
---|
61 | }
|
---|
62 | }
|
---|