source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/TextRecogniser.java@ 5944

Last change on this file since 5944 was 5944, checked in by cs025, 20 years ago

Index document type, metadata extensions

  • Property svn:keywords set to Author Date Id Revision
File size: 1.5 KB
Line 
1package org.greenstone.gsdl3.gs3build.doctypes;
2
3import java.io.*;
4import java.net.*;
5
6import org.greenstone.gsdl3.gs3build.metadata.*;
7import org.greenstone.gsdl3.gs3build.util.HTTPTools;
8
9public class TextRecogniser implements RecogniserInterface
10{
11 DocumentList listRepository;
12
13 public TextRecogniser(DocumentList listRepository)
14 { this.listRepository = listRepository;
15 }
16
17 public boolean parseDocument(METSFile file)
18 {
19 String MIMEType = file.getMIMEType();
20 if (MIMEType == null ||
21 MIMEType.equals("text/plain")) {
22 URL location = file.getLocation();
23 return this.parseDocument(location);
24 }
25 return false;
26 }
27
28 public boolean parseDocument(URL url)
29 { String fileName = null;
30
31 if (url.toString().startsWith("file://")) {
32 fileName = url.toString().substring(7);
33 }
34 else if (url.toString().startsWith("file:/")) {
35 fileName = url.toString().substring(5);
36 }
37
38 if (fileName != null) {
39 if (fileName.endsWith(".txt") ||
40 fileName.endsWith(".text"))
41 { this.listRepository.addDocument(new TextDocument(url));
42 // TODO: spawn knowledge of children too...
43 System.out.println(">>> Posting text document " + fileName);
44 return true;
45 }
46 }
47 else
48 { // Check MIME type
49 String mimeType = HTTPTools.getMIMEType(url);
50
51 if (mimeType == "text/plain")
52 { System.out.println("Posting Text document " + url.toString());
53
54 TextDocument doc = new TextDocument(url);
55 this.listRepository.addDocument(doc);
56 return true;
57 }
58 }
59
60 return false;
61 }
62}
Note: See TracBrowser for help on using the repository browser.