Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: gs2-extensions/video-and-audio/trunk/src/opt/Terrier/VideoDocument.java@ 26186

Last change on this file since 26186 was 26186, checked in by jmt12, 12 years ago
Adding in (optional) support for video and image processing in DSpace and Terrier. These kinda belong here as they depend on the video-and-audio support (like MediaInfo, HandbrakeCLI, and Hive2) to work
File size: 15.3 KB

Line
1	/**
2	* Adding support for Videos in Terrier
3	* @author: John Thompson, jmt12, #9826509
4	*
5	* The contents of this file are subject to the Mozilla Public License
6	* Version 1.1 (the "License"); you may not use this file except in
7	* compliance with the License. You may obtain a copy of the License at
8	* http://www.mozilla.org/MPL/
9	*
10	* Software distributed under the License is distributed on an "AS IS"
11	* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
12	* the License for the specific language governing rights and limitations
13	* under the License.
14	*
15	* Copyright (c) 2011 The University of Waikato. All Rights Reserved.
16	*/
17	package org.terrier.indexing;
18
19	import java.io.BufferedOutputStream;
20	import java.io.BufferedReader;
21	import java.io.File;
22	import java.io.FileOutputStream;
23	import java.io.InputStream;
24	import java.io.InputStreamReader;
25	import java.io.IOException;
26	import java.io.PrintWriter;
27	import java.io.StringReader;
28	import java.io.Reader;
29	import java.lang.Thread;
30	import java.nio.file.Files;
31	import java.nio.file.FileVisitResult;
32	import static java.nio.file.FileVisitResult.*;
33	import java.nio.file.Path;
34	import java.nio.file.Paths;
35	import java.nio.file.SimpleFileVisitor;
36	import java.nio.file.attribute.BasicFileAttributes;
37	import java.util.Collections;
38	import java.util.Arrays;
39	import java.util.Map;
40	import java.util.Set;
41	import java.util.regex.Matcher;
42	import java.util.regex.Pattern;
43
44	import org.apache.log4j.Logger;
45	import org.terrier.indexing.tokenisation.TokenStream;
46	import org.terrier.indexing.tokenisation.Tokeniser;
47	import org.terrier.utility.ApplicationSetup;
48
49	public class VideoDocument
50	implements Document
51	{
52	/** A reference to the logger for messaging */
53	protected static final Logger logger = Logger.getLogger(FileDocument.class);
54	/** The map of properties (fields) for this document. */
55	protected Map<String,String> properties;
56	/** A reader built from a dummy text string. */
57	protected Reader reader;
58	/** A token stream produced by the configured tokeniser when feed the dummy
59	* reader.
60	*/
61	protected TokenStream tokenizer;
62
63	// Handbrake Configuration
64	protected String streaming_hq_size = ApplicationSetup.getProperty("VideoDocument.streaming_hq_size", "720");
65	protected String streaming_hq_video_bitrate = ApplicationSetup.getProperty("VideoDocument.streaming_hq_video_bitrate", "496");
66	protected String streaming_hq_audio_bitrate = ApplicationSetup.getProperty("VideoDocument.streaming_hq_audio_bitrate", "80");
67
68	/ The preview size (width). /
69	protected final String preview_width = ApplicationSetup.getProperty("ImageDocument.preview_width", "200");
70
71
72	/ Default constructor. /
73	protected VideoDocument() {}
74
75	/** Constructs an instance of the ImageDocument from the given input stream.
76	* @param docStream the input stream that reads the file.
77	* @param docProperties the initial properties (docno, filename)
78	* @param tok the tokeniser defined for this collection
79	*/
80	public VideoDocument(InputStream istream, Map<String,String> default_properties, Tokeniser tok)
81	{
82	logger.info("VideoDocument::VideoDocument()");
83	// 0. Initialization from arguments
84	this.properties = default_properties;
85
86	// Set properties
87	logger.info("VideoDocument - extracting properties");
88	// A. Hardcoded properties
89	this.properties.put("parser", "VideoDocument");
90	this.properties.put("abstract", "This is a video so here is some dummy text to prevent indexer failing.");
91	// B. Properties derived from filename
92	String title = this.properties.get("filename").substring(properties.get("filename").lastIndexOf(System.getProperty("file.separator")) + 1);
93	this.properties.put("title", title);
94	String ext = this.properties.get("filename").substring(properties.get("filename").lastIndexOf(".") + 1);
95	String target_filename = "doc." + ext;
96	this.properties.put("source","doc." + ext);
97	String assoc_filename = "D" + properties.get("docno");
98	this.properties.put("assocfile", assoc_filename);
99
100	// Copy (symlink) the file into place in the shared directory
101	Path raw_video_path = Paths.get(properties.get("filename"));
102	Path assoc_path = Paths.get(ApplicationSetup.TERRIER_SHARE, "images", "assoc", assoc_filename);
103	// - if the assoc path already exists, we need to recursively delete it and
104	// its contents
105	if (Files.exists(assoc_path))
106	{
107	logger.info("VideoDocument - removing existing (old) associated files");
108	try
109	{
110	Files.walkFileTree(assoc_path, new SimpleFileVisitor<Path>()
111	{
112	@Override
113	public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
114	throws IOException
115	{
116	///ystem.out.println("Deleting file: " + file);
117	Files.delete(file);
118	return CONTINUE;
119	}
120	@Override
121	public FileVisitResult postVisitDirectory(Path dir, IOException exc)
122	throws IOException
123	{
124	///ystem.out.println("Deleting dir: " + dir);
125	if (exc == null)
126	{
127	Files.delete(dir);
128	return CONTINUE;
129	}
130	else
131	{
132	throw exc;
133	}
134	}
135	});
136	}
137	catch (Exception e)
138	{
139	logger.error("Exception while recursively deleting assoc folder:", e);
140	}
141	}
142	assoc_path.toFile().mkdirs(); // bet there is a nice nio way to do this
143	Path target_path = assoc_path.resolve(target_filename);
144	logger.info("VideoDocument - symlinking original video into assoc directory");
145	try
146	{
147	Files.createSymbolicLink(target_path, raw_video_path);
148	}
149	// not supported? We'll try copying below
150	catch (UnsupportedOperationException ex)
151	{
152	}
153	// All other exceptions can be fatal
154	catch (Exception e)
155	{
156	logger.error("Exception while symlinking video: ", e);
157	}
158	// - copy if the file doesn't exist yet
159	if (!target_path.toFile().exists())
160	{
161	logger.info("VideoDocument - symlink failed, copying instead");
162	try
163	{
164	Files.copy(raw_video_path, target_path);
165	}
166	// Fatality!
167	catch (Exception e)
168	{
169	logger.error("Exception while copying video: ", e);
170	}
171	}
172
173	// 1. Extract Metadata using MediaInfo and store as properties
174	logger.info("VideoDocument - extracting video metadata");
175	try
176	{
177	String metadata_command[] = {
178	"mediainfo",
179	"--Output=XML",
180	raw_video_path.toString()
181	};
182	logger.info("VideoDocument - metadata command: " + Arrays.toString(metadata_command));
183	Process metadata_process = Runtime.getRuntime().exec(metadata_command);
184	StreamGobbler metadata_process_error_gobbler = new StreamGobbler(metadata_process.getErrorStream());
185	metadata_process_error_gobbler.start();
186	BufferedReader metadata_br = new BufferedReader(new InputStreamReader(metadata_process.getInputStream()));
187	String line;
188	String type = "Unknown";
189	Pattern type_pattern = Pattern.compile("<track type=\"([a-zA-Z]+)\">");
190	Pattern metadata_pattern = Pattern.compile("<([a-zA-Z_]+)>(.*)</\\1>");
191	while ((line = metadata_br.readLine()) != null)
192	{
193	Matcher type_matcher = type_pattern.matcher(line);
194	if (type_matcher.matches())
195	{
196	type = type_matcher.group(1);
197	}
198	else
199	{
200	Matcher metadata_matcher = metadata_pattern.matcher(line);
201	if (metadata_matcher.matches())
202	{
203	String field = type.toLowerCase() + ":" + metadata_matcher.group(1).toLowerCase();
204	String value = metadata_matcher.group(2);
205	logger.info("VideoDocument - adding metadata: " + field + " => " + value);
206	this.properties.put(field, value);
207	}
208	}
209	}
210	int metadata_status = metadata_process.waitFor();
211	}
212	catch (Exception e)
213	{
214	logger.error("Exception while extracting video metadata:", e);
215	}
216
217	// 2. Convert Video to streamable format using HandbrakeCLI
218	logger.info("VideoDocument - convert video to streamable format");
219	Path converted_video_path = assoc_path.resolve("tsv.mp4");
220	try
221	{
222	String convert_command[] = {
223	"HandBrakeCLI",
224	"-i", raw_video_path.toString(),
225	"-t", "1",
226	"-c", "1",
227	"-o", converted_video_path.toString(),
228	"-f", "mp4",
229	"-O",
230	"-w", this.streaming_hq_size,
231	"--loose-anamorphic",
232	"-e", "x264",
233	"-b", this.streaming_hq_video_bitrate,
234	"-a", "1",
235	"-E", "faac",
236	"-6", "dpl2",
237	"-R", "Auto",
238	"-B", this.streaming_hq_audio_bitrate,
239	"-D", "0.0",
240	"-x", "ref=2:bframes=2:subq=6:mixed-refs=0:weightb=0:8x8dct=0:trellis=0:threads=1"
241	};
242	logger.info("VideoDocument - convert command: " + Arrays.toString(convert_command));
243	// @todo determine the best way to account for configuration options
244	Process convert_process = Runtime.getRuntime().exec(convert_command);
245	StreamGobbler convert_process_error_gobbler = new StreamGobbler(convert_process.getErrorStream());
246	convert_process_error_gobbler.start();
247	StreamGobbler convert_process_out_gobbler = new StreamGobbler(convert_process.getInputStream());
248	convert_process_out_gobbler.start();
249	int convert_status = convert_process.waitFor();
250	if (convert_status != 0 \|\| !Files.exists(converted_video_path))
251	{
252	throw new Exception("Convert command failed (exit status: " + convert_status + ")");
253	}
254	}
255	catch (Exception e)
256	{
257	logger.error("Exception while converting video to streamable format: ", e);
258	}
259
260	// 3. Generate keyframes from streamable video and attach the shot names
261	// as a property
262	logger.info("VideoDocument - extracting keyframes from video");
263	try
264	{
265	Path shots_path = assoc_path.resolve("shots.xml");
266	String keyframe_command[] = {
267	"hive2_ffmpegsvn",
268	"-o", shots_path.toString(),
269	"-k", assoc_path.toString(),
270	"-m", "0.5",
271	"-l", "0.05",
272	converted_video_path.toString()
273	};
274	logger.info("VideoDocument - keyframe command: " + Arrays.toString(keyframe_command));
275	Process keyframe_process = Runtime.getRuntime().exec(keyframe_command);
276	//Path keyframe_err_file = temp_dir_path.resolve("hive2-err.txt");
277	StreamGobbler keyframe_error_gobbler = new StreamGobbler(keyframe_process.getErrorStream());//, keyframe_err_file.toString());
278	keyframe_error_gobbler.start();
279	//Path keyframe_out_file = temp_dir_path.resolve("hive2-out.txt");
280	StreamGobbler keyframe_out_gobbler = new StreamGobbler(keyframe_process.getInputStream()); //, keyframe_out_file.toString());
281	keyframe_out_gobbler.start();
282	int keyframe_status = keyframe_process.waitFor();
283	if (keyframe_status != 0 \|\| !Files.exists(shots_path))
284	{
285	throw new Exception("Keyframe command failed (exit status: " + keyframe_status + ")");
286	}
287	File files[] = assoc_path.toFile().listFiles();
288	Arrays.sort(files);
289	Pattern image_filename_pattern = Pattern.compile("tsv.*\\.jpg");
290	String keyframe_images = "";
291	for (int i = 0; i < files.length; i++)
292	{
293	String image_filename = files[i].toPath().getFileName().toString();
294	logger.info("VideoDocument - considering keyframe image: " + image_filename);
295	Matcher image_filename_matcher = image_filename_pattern.matcher(image_filename);
296	if (image_filename_matcher.matches())
297	{
298	if (keyframe_images.equals(""))
299	{
300	keyframe_images = image_filename;
301	}
302	else
303	{
304	keyframe_images += "," + image_filename;
305	}
306	}
307	}
308	this.properties.put("preview", keyframe_images);
309	}
310	catch (Exception e)
311	{
312	logger.error("Exception while extracting keyframes from video: ", e);
313	}
314
315	// 4. Create a dummy reader around some dummy text and then tokenize it
316	logger.info("VideoDocument - feed dummy text as token stream to indexer");
317	try
318	{
319	this.reader = new StringReader(this.properties.get("abstract"));
320	this.tokenizer = tok.tokenise(this.reader);
321	}
322	catch (Exception e)
323	{
324	logger.error("Exception while creating dummy text stream: ", e);
325	}
326	logger.info("VideoDocument - Complete!");
327	}
328	/ VideoDocument(InputStream istream, Map<String,String> default_properties, Tokeniser tok) /
329
330	/** Returns true when the end of the document has been reached, and there
331	* are no other terms to be retrieved from it.
332	* @return boolean true if there are no more terms in the document, otherwise
333	* it returns false.
334	*/
335	public boolean endOfDocument()
336	{
337	return !this.tokenizer.hasNext();
338	}
339	/ endOfDocument() /
340
341	/** Returns the underlying map of all the properties defined by this Document.
342	* @since 1.1.0
343	*/
344	public Map<String,String> getAllProperties()
345	{
346	return this.properties;
347	}
348	/ getAllProperties() /
349
350	/** Returns a list of the fields the current term appears in.
351	* @return HashSet a set of the terms that the current term appears in.
352	*/
353	public Set<String> getFields()
354	{
355	// Returns null because there is no support for fields with file documents.
356	return Collections.emptySet();
357	}
358	/ getFields() /
359
360	/** Gets the next term of the document.
361	* <B>NB:</B>Null string returned from getNextTerm() should
362	* be ignored. They do not signify the lack of any more terms.
363	* endOfDocument() should be used to check that.
364	* @return String the next term of the document. Null returns should be
365	* ignored.
366	*/
367	public String getNextTerm()
368	{
369	return this.tokenizer.next();
370	}
371	/ getNextTerm() /
372
373	/** Allows access to a named property of the Document. Examples might be URL,
374	* filename etc.
375	* @param name Name of the property. It is suggested, but not required that
376	* this name should not be case insensitive.
377	* @since 1.1.0
378	*/
379	public String getProperty(String name)
380	{
381	return this.properties.get(name.toLowerCase());
382	}
383	/ getProperty(String name) /
384
385	/** Returns a Reader object so client code can tokenise the document
386	* or deal with the document itself. Examples might be extracting URLs,
387	* language detection. */
388	public Reader getReader()
389	{
390	return this.reader;
391	}
392	/ getReader() /
393	}
394
395	class StreamGobbler
396	extends Thread
397	{
398	InputStream is;
399	String file_path;
400	boolean output_to_file;
401
402	StreamGobbler(InputStream is)
403	{
404	this.is = is;
405	this.output_to_file = false;
406	}
407
408	StreamGobbler(InputStream is, String file_path)
409	{
410	this.is = is;
411	this.file_path = file_path;
412	this.output_to_file = true;
413	}
414
415	public void run()
416	{
417	try
418	{
419	InputStreamReader isr = new InputStreamReader(is);
420	BufferedReader br = new BufferedReader(isr);
421	String line = null;
422	if (output_to_file)
423	{
424	PrintWriter pw = new PrintWriter(new BufferedOutputStream(new FileOutputStream(file_path)));
425	while ( (line = br.readLine()) != null)
426	{
427	pw.println(line);
428	}
429	pw.flush();
430	pw.close();
431	}
432	else
433	{
434	while ( (line = br.readLine()) != null)
435	{
436	// Do nothing - equivalent to > /dev/null
437	}
438	}
439	}
440	catch (IOException ioe)
441	{
442	ioe.printStackTrace();
443	}
444	}
445	}
446

Note: See TracBrowser for help on using the repository browser.

Download in other formats: