1 | package org.greenstone.gsdl3.gs3build.doctypes;
|
---|
2 |
|
---|
3 | import java.io.*;
|
---|
4 | import java.net.*;
|
---|
5 |
|
---|
6 | import org.greenstone.gsdl3.gs3build.metadata.*;
|
---|
7 | import org.greenstone.gsdl3.gs3build.util.HTTPTools;
|
---|
8 |
|
---|
9 | public class HTMLRecogniser implements RecogniserInterface
|
---|
10 | {
|
---|
11 | DocumentList listRepository;
|
---|
12 |
|
---|
13 | public HTMLRecogniser(DocumentList listRepository)
|
---|
14 | { this.listRepository = listRepository;
|
---|
15 | }
|
---|
16 |
|
---|
17 | public boolean parseDocument(METSFile file)
|
---|
18 | {
|
---|
19 | String MIMEType = file.getMIMEType();
|
---|
20 | if (MIMEType == null ||
|
---|
21 | MIMEType.equals("text/html")) {
|
---|
22 | URL location = file.getLocation();
|
---|
23 | return this.parseDocument(location);
|
---|
24 | }
|
---|
25 | return false;
|
---|
26 | }
|
---|
27 |
|
---|
28 | public boolean parseDocument(URL url)
|
---|
29 | { if (url.toString().startsWith("file://")) {
|
---|
30 | String fileName = url.toString().substring(7);
|
---|
31 | if (fileName.endsWith(".htm") ||
|
---|
32 | fileName.endsWith(".html"))
|
---|
33 | { System.out.println("Posting HTML Document " + fileName);
|
---|
34 |
|
---|
35 | HTMLDocument doc = new HTMLDocument(url);
|
---|
36 | this.listRepository.addDocument(doc);
|
---|
37 | return true;
|
---|
38 | }
|
---|
39 | }
|
---|
40 | else {
|
---|
41 | // Get Mime type remotely, and then proceed if required
|
---|
42 | String mimeType = HTTPTools.getMIMEType(url);
|
---|
43 |
|
---|
44 | if (mimeType == "text/html")
|
---|
45 | { System.out.println("Posting HTML Document " + url.toString());
|
---|
46 |
|
---|
47 | HTMLDocument doc = new HTMLDocument(url);
|
---|
48 | this.listRepository.addDocument(doc);
|
---|
49 | return true;
|
---|
50 | }
|
---|
51 | }
|
---|
52 | return false;
|
---|
53 | }
|
---|
54 | } |
---|