source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/HTMLRecogniser.java@ 5944

Last change on this file since 5944 was 5944, checked in by cs025, 20 years ago

Index document type, metadata extensions

  • Property svn:keywords set to Author Date Id Revision
File size: 1.5 KB
Line 
1package org.greenstone.gsdl3.gs3build.doctypes;
2
3import java.io.*;
4import java.net.*;
5
6import org.greenstone.gsdl3.gs3build.metadata.*;
7import org.greenstone.gsdl3.gs3build.util.HTTPTools;
8
9public class HTMLRecogniser implements RecogniserInterface
10{
11 DocumentList listRepository;
12
13 public HTMLRecogniser(DocumentList listRepository)
14 { this.listRepository = listRepository;
15 }
16
17 public boolean parseDocument(METSFile file)
18 {
19 String MIMEType = file.getMIMEType();
20 if (MIMEType == null ||
21 MIMEType.equals("text/html")) {
22 URL location = file.getLocation();
23 return this.parseDocument(location);
24 }
25 return false;
26 }
27
28 public boolean parseDocument(URL url)
29 { String fileName = null;
30
31 if (url.toString().startsWith("file://")) {
32 fileName = url.toString().substring(7);
33 }
34 else if (url.toString().startsWith("file:/")) {
35 fileName = url.toString().substring(5);
36 }
37
38 if (fileName != null) {
39 if (fileName.endsWith(".htm") ||
40 fileName.endsWith(".html"))
41 { System.out.println("Posting HTML Document " + fileName);
42
43 HTMLDocument doc = new HTMLDocument(url);
44 this.listRepository.addDocument(doc);
45 return true;
46 }
47 }
48 else {
49 // Get Mime type remotely, and then proceed if required
50 String mimeType = HTTPTools.getMIMEType(url);
51
52 if (mimeType == "text/html")
53 { System.out.println("Posting HTML Document " + url.toString());
54
55 HTMLDocument doc = new HTMLDocument(url);
56 this.listRepository.addDocument(doc);
57 return true;
58 }
59 }
60 return false;
61 }
62}
Note: See TracBrowser for help on using the repository browser.