source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/HTMLRecogniser.java@ 5800

Last change on this file since 5800 was 5800, checked in by cs025, 20 years ago

Adding gs3build

  • Property svn:keywords set to Author Date Id Revision
File size: 1.3 KB
Line 
1package org.greenstone.gsdl3.gs3build.doctypes;
2
3import java.io.*;
4import java.net.*;
5
6import org.greenstone.gsdl3.gs3build.metadata.*;
7import org.greenstone.gsdl3.gs3build.util.HTTPTools;
8
9public class HTMLRecogniser implements RecogniserInterface
10{
11 DocumentList listRepository;
12
13 public HTMLRecogniser(DocumentList listRepository)
14 { this.listRepository = listRepository;
15 }
16
17 public boolean parseDocument(METSFile file)
18 {
19 String MIMEType = file.getMIMEType();
20 if (MIMEType == null ||
21 MIMEType.equals("text/html")) {
22 URL location = file.getLocation();
23 return this.parseDocument(location);
24 }
25 return false;
26 }
27
28 public boolean parseDocument(URL url)
29 { if (url.toString().startsWith("file://")) {
30 String fileName = url.toString().substring(7);
31 if (fileName.endsWith(".htm") ||
32 fileName.endsWith(".html"))
33 { System.out.println("Posting HTML Document " + fileName);
34
35 HTMLDocument doc = new HTMLDocument(url);
36 this.listRepository.addDocument(doc);
37 return true;
38 }
39 }
40 else {
41 // Get Mime type remotely, and then proceed if required
42 String mimeType = HTTPTools.getMIMEType(url);
43
44 if (mimeType == "text/html")
45 { System.out.println("Posting HTML Document " + url.toString());
46
47 HTMLDocument doc = new HTMLDocument(url);
48 this.listRepository.addDocument(doc);
49 return true;
50 }
51 }
52 return false;
53 }
54}
Note: See TracBrowser for help on using the repository browser.