[5944] | 1 | package org.greenstone.gsdl3.gs3build.doctypes;
|
---|
| 2 |
|
---|
| 3 | import java.io.*;
|
---|
| 4 | import java.net.*;
|
---|
| 5 |
|
---|
| 6 | import org.greenstone.gsdl3.gs3build.metadata.*;
|
---|
| 7 |
|
---|
| 8 | public class IndexRecogniser implements RecogniserInterface
|
---|
| 9 | {
|
---|
| 10 | DocumentList listRepository;
|
---|
| 11 |
|
---|
| 12 | public IndexRecogniser(DocumentList listRepository)
|
---|
| 13 | { this.listRepository = listRepository;
|
---|
| 14 | }
|
---|
| 15 |
|
---|
| 16 | public boolean parseDocument(METSFile file)
|
---|
| 17 | {
|
---|
| 18 | String MIMEType = file.getMIMEType();
|
---|
| 19 | if (MIMEType == null ||
|
---|
| 20 | MIMEType.equals("text/plain")) {
|
---|
| 21 | URL location = file.getLocation();
|
---|
| 22 | return this.parseDocument(location);
|
---|
| 23 | }
|
---|
| 24 | return false;
|
---|
| 25 | }
|
---|
| 26 |
|
---|
| 27 | public boolean parseDocument(URL url)
|
---|
| 28 | { String fileName = null;
|
---|
| 29 |
|
---|
| 30 | if (url.toString().startsWith("file://"))
|
---|
| 31 | { fileName = url.toString().substring(7);
|
---|
| 32 | }
|
---|
| 33 | else if (url.toString().startsWith("file:/"))
|
---|
| 34 | { fileName = url.toString().substring(5);
|
---|
| 35 | }
|
---|
| 36 |
|
---|
| 37 | if (fileName != null) {
|
---|
| 38 | String leafName;
|
---|
| 39 | int leafAt = fileName.lastIndexOf(File.separator);
|
---|
| 40 | if (leafAt >= 0) {
|
---|
| 41 | leafName = fileName.substring(leafAt+1);
|
---|
| 42 | }
|
---|
| 43 | else {
|
---|
| 44 | leafName = fileName;
|
---|
| 45 | }
|
---|
| 46 |
|
---|
| 47 | if (leafName.equals("index.txt"))
|
---|
| 48 | {
|
---|
| 49 | System.out.println("Posting Index Document " + fileName);
|
---|
| 50 | IndexDocument doc = new IndexDocument(url);
|
---|
| 51 | this.listRepository.addDocument(doc);
|
---|
| 52 | // TODO: spawn knowledge of children too...
|
---|
| 53 | // System.out.println(doc.getDocumentText());
|
---|
| 54 | return true;
|
---|
| 55 | }
|
---|
| 56 | }
|
---|
| 57 | else {
|
---|
| 58 | // TODO: get Mime type remotely, and then proceed if required
|
---|
| 59 | }
|
---|
| 60 | return false;
|
---|
| 61 | }
|
---|
| 62 | }
|
---|