source: gs2-extensions/afrepo/trunk/src/src/AFRepo.GSDLEXT.class.php.in@ 28351

Last change on this file since 28351 was 28351, checked in by davidb, 11 years ago

Added in ability to generate download page

File size: 12.8 KB
Line 
1<?php
2
3/**
4 * Bart Nagel <[email protected]>
5 *
6 * AFRepo top-level class to slot in with the Greenstone extension framework
7 *
8 * The audio is expected to be in subdirectories of the 'audio-files' directory, and
9 * for example purposes can be named in such a way that the example
10 * PathClassifier classifier can get meanining from it.
11 */
12
13class AFRepo extends AFRepoBase {
14 private $allfiles;
15
16 public function AFRepo()
17 {
18 $this->audio_files_dir = "audio-files";
19 $this->af_prefix = getcwd() . "/" . $this->audio_files_dir;
20 }
21
22 public function getName() {
23 return "Salami 4Store audio repository";
24 }
25
26 public function getURIPrefix() {
27 #return "@afrepo-http-prefix@/afrepo/";
28 return "@public-facing-afrepo-http-prefix@";
29 }
30
31 /* Done this way, can talk to the 4store server directly
32 rather than relying on proxying through the PHP-based 'afrepo' server */
33 public function getTrippleStoreLocalURIPrefix() {
34 //return "@afrepo-http-prefix@/4store/";
35 return "@4store-http-prefix@/";
36 }
37
38 public function getTrippleStoreURIPrefix() {
39 return "@public-facing-4store-http-prefix@";
40 }
41
42 public function getSparqlEndpoint() {
43 return $this->getTrippleStoreURIPrefix() . "sparql/";
44 }
45
46 public function getDataEndpoint() {
47 return $this->getTrippleStoreLocalURIPrefix() . "data/";
48 }
49
50 /**
51 * getAudioPath
52 * Return the path to the audio links in this repository (without a trailing
53 * slash)
54 */
55 public function getAudioPath() {
56 return realpath("audio-ids");
57 }
58
59
60 public function remove_audio_files_prefix($full_filepath) {
61# echo "full filepath = " . $full_filepath . "\n";
62# echo "af prefix = " . $this->af_prefix . "\n";
63
64 $af_prefix_len = strlen($this->af_prefix);
65 $filepath = $full_filepath;
66
67 if (substr($filepath, 0, $af_prefix_len) == $this->af_prefix) {
68 $filepath = substr($filepath, $af_prefix_len);
69 }
70
71# echo "** filepath = " . $filepath . "\n";
72
73 return $filepath;
74 }
75
76
77 /**
78 * fileInRepo
79 * Return true if the audiofile with the given path (canonical or symlink)
80 * is in the repository or false if not
81 */
82 public function fileInRepo($full_filepath) {
83 $realpath = realpath($full_filepath);
84 if ($realpath === false) {
85 trigger_error("file '$filepath' does not exist on disk or is a broken symlink", E_USER_WARNING);
86 return false;
87 }
88
89 return array_key_exists($full_filepath, $this->getAllFiles());
90 }
91
92
93 /**
94 * idToCanonicalPath
95 * Return the path to the canonical file with the given ID
96 */
97 public function idToCanonicalPath($id) {
98 return readlink($this->idToLinkPath($id));
99 }
100
101
102 /**
103 * filePathToId
104 * Return the ID of the audiofile with the given path (which can be
105 * canonical or a symlink)
106 */
107 public function filePathToId($full_filepath) {
108 if (!$this->fileInRepo($full_filepath)) {
109 throw new Exception("file with path '$filepath' is not in the repository");
110 }
111
112 $hash_filepath = "salami:/" . $this->remove_audio_files_prefix($full_filepath);
113
114## echo "Hashing on: " . $hash_filepath . "\n";
115
116 $id = md5($hash_filepath);
117 return $id;
118 }
119
120
121
122 // recursive method to delete things from the links directory according to the
123 // options
124 function getAllCollectionFilesRec($collection, $path) {
125
126 echo "Processing directory: " . $path . "\n";
127
128 $dir = dir($path);
129
130 while (($entry = $dir->read()) !== false) {
131
132 if ($entry == "." || $entry == "..") {
133 continue;
134 }
135
136 $fullpath = $path . "/" . $entry;
137
138 $isdir = false;
139 if (is_link($fullpath)) {
140
141 $realpath = readlink($fullpath);
142
143 // follow potential chain of sym-links
144 while (is_link($realpath)) {
145 $realpath = readlink($realpath);
146 }
147
148 $isdir = is_dir($realpath);
149 }
150 else {
151 $isdir = is_dir($fullpath);
152 }
153
154
155 if ($isdir) {
156
157 $this->getAllCollectionFilesRec($collection,$fullpath);
158 }
159 else {
160 // assume it is a file
161## echo "Adding file: " . $fullpath . "\n";
162 $this->allfiles[$fullpath] = true;
163 }
164 }
165
166 $dir->close();
167
168 }
169
170
171 public function getAllFiles()
172 {
173 if (!is_null($this->allfiles)) {
174 return $this->allfiles;
175 }
176
177 $this->allfiles = array();
178
179## $path = realpath($this->audio_files_dir);
180 $path = $this->af_prefix;
181 $dir = dir($path);
182
183 while (($file = $dir->read()) !== false) {
184
185 if ($file[0] == ".") {
186 // skip all dot files and dirs
187 continue;
188 }
189
190 $fullpath = $path . "/" . $file;
191
192 $isdir = false;
193 if (is_link($fullpath)) {
194
195 $realpath = readlink($fullpath);
196
197 // follow potential chain of sym-links
198 while (is_link($realpath)) {
199 $realpath = readlink($realpath);
200 }
201
202 $isdir = is_dir($realpath);
203 }
204 else {
205 $isdir = is_dir($fullpath);
206 }
207
208 if ($isdir) {
209
210 // recursively work through each collection directory
211 $this->getAllCollectionFilesRec($file, $fullpath);
212 }
213 }
214
215 $dir->close();
216
217 return $this->allfiles;
218 }
219
220
221 public function getAllFilesOLD() {
222 if (!is_null($this->allfiles))
223 return $this->allfiles;
224
225 $this->allfiles = array();
226 $path = realpath("audio-files");
227 $dir = dir($path);
228 while (($file = $dir->read()) !== false) {
229 if ($file[0] != "." && is_dir($path . "/" . $file)) {
230 $subdir = dir($path . "/" . $file);
231 while (($subfile = $subdir->read()) !== false) {
232 if ($subfile[0] != "." && is_file($path . "/" . $file . "/" . $subfile))
233 $this->allfiles[$path . "/" . $file . "/" . $subfile] = true;
234 }
235 $subdir->close();
236 }
237 }
238 $dir->close();
239
240 return $this->allfiles;
241 }
242
243 public function getSongFilesOLD($id) {
244 $filepath = $this->idToLinkPath($id);
245 $origfilepath = realpath($filepath);
246
247 if ($origfilepath === false)
248 return array();
249
250 // is it a clip?
251 if (preg_match('%\.clip\..{1,4}$%', $origfilepath)) {
252 // does full version exist?
253 $fullpath = preg_replace('%\.clip%', "", $origfilepath);
254 if (file_exists($fullpath))
255 return array($fullpath, $origfilepath);
256 return array($origfilepath);
257 }
258
259 // it's a full song. does clip exist?
260 $clippath = preg_replace('%(\..{1,4})$%', '.clip\1', $origfilepath);
261 if (file_exists($clippath))
262 return array($origfilepath, $clippath);
263 return array($origfilepath);
264 }
265
266 public function haveMetadataPermission() {
267 return true;
268 }
269
270 public function haveAudioPermission() {
271 return ipInRange($_SERVER["REMOTE_ADDR"], "127.0.0.0/8");
272 }
273
274 public function getMBID($id) {
275 $classifiers = array(
276 new TagClassifier(),
277 new EchoprintClassifier(),
278 new EchonestClassifier(),
279 new PathClassifier(),
280 );
281
282 foreach ($classifiers as $classifier)
283 if ($classifier->available() && $classifier->hasMBID($id))
284 return $classifier->getMBID($id);
285 return null;
286 }
287
288
289
290
291 /**
292 * extraTriples
293 * Return an array of any extra implementation-specific triples should be
294 * added to the RDF given the ID of an audiofile
295 *
296 * This should be an array of triples in Turtle format without the trailing
297 * dot. (See generateRDF)
298 * It should return triples about all audiofiles of the same song rather
299 * than just the one with the ID given.
300 */
301 protected function extraTriples($id) {
302
303 $extra_triples = array();
304
305
306 // analyze the file, get some metadata
307 $structured_metadata = $this->getStructuredMetadata($id);
308
309 if ($structured_metadata !== false) {
310
311 }
312
313 // loop through the audiofiles for this tune
314 /* foreach ($ids as $key => $fileid) { */
315
316 /* // some identifiers */
317 /* $audiofile = "repo:$fileid"; */
318 /* $digitalsignal = "repo:$fileid#DigitalSignal"; */
319
320 /* // triples about the document */
321 /* $triples[] = "{$audiofile}_ a foaf:Document; foaf:primaryTopic $audiofile"; */
322
323 /* // this is a mo:AudioFile, which is a mo:MusicalItem */
324 /* $triples[] = "$audiofile a mo:AudioFile"; */
325
326 /* // this encodes a corresponding digital signal */
327 /* $triples[] = "$audiofile mo:encodes $digitalsignal"; */
328
329 /* // our digital signal is a mo:DigitalSignal (which is a subclass of */
330 /* // mo:Signal, which is a subclass of mo:MusicalExpression) */
331 /* $triples[] = "$digitalsignal a mo:DigitalSignal"; */
332
333 /* // different logic depending whether this audiofile is the preferred */
334 /* // one or not */
335 /* if ($key == 0) { */
336 /* // preferred -- if we have an MBID this Signal is derived from */
337 /* // the original (that at Musicbrainz) */
338 /* if (!is_null($mbid)) */
339 /* $triples[] = "$digitalsignal mo:derived_from " . mbidToSignalURI($mbid); */
340 /* // otherwise we don't assert that it derives from anything */
341 /* } else { */
342 /* // non-preferred -- we assert that it is derived from our */
343 /* // preferred audiofile's Signal */
344 /* $triples[] = "$digitalsignal mo:derived_from repo:" . $ids[0] . "#DigitalSignal"; */
345 /* } */
346
347 /* // analyze the file, get some metadata */
348 /* $filemetadata = $this->getFileMetadata($fileid); */
349
350 /* // mo:AudioFile metadata */
351 /* if (isset($filemetadata["dataformat"])) */
352 /* $triples[] = "$audiofile mo:encoding \"" . $filemetadata["dataformat"] */
353 /* . (isset($filemetadata["bitrate"]) ? " @ " . $filemetadata["bitrate"] . "bps" : "") */
354 /* . (isset($filemetadata["bitrate_mode"]) ? " " . $filemetadata["bitrate_mode"] : "") */
355 /* . "\""; */
356
357 /* // mo:DigitalSignal metadata */
358 /* if (isset($filemetadata["playtime_seconds"])) */
359 /* $triples[] = "$digitalsignal mo:time [ " */
360 /* . "a time:Interval; " */
361 /* . "time:seconds \"" . $filemetadata["playtime_seconds"] . "\"^^xsd:float " */
362 /* . "]"; */
363 /* if (isset($filemetadata["channels"])) */
364 /* $triples[] = "$digitalsignal mo:channels \"" . $filemetadata["channels"] . "\"^^xsd:int"; */
365 /* if (isset($filemetadata["sample_rate"])) */
366 /* $triples[] = "$digitalsignal mo:sample_rate \"" . $filemetadata["sample_rate"] . "\"^^xsd:float"; */
367 /* } */
368
369 return $extra_triples;
370
371
372 }
373
374
375 /**
376 * getgetStructuredMetadata
377 * Return as a hashmap (keyed by algorithm the structured audio data
378 * from the "big crunch"
379 */
380 public function getStructuredMetadata($id) {
381
382 $classifier = new StructuredAudioClassifier();
383
384 if ($classifier->available() && $classifier->hasMetadata($id)) {
385 return $classifier->loadMetadata($id);
386 }
387
388 return false;
389
390 }
391
392
393 public function attributionToHtmlByAlgorithm() {
394
395 $classifier = new StructuredAudioClassifier();
396
397 $full_dataset_path = getcwd() . "/" . "datasets";
398
399
400 $ametadata = $classifier->getAttribution();
401
402 $style = "<style>\n";
403 $style .= " tr:nth-child(even) {background: #DDD}\n";
404 $style .= " tr:nth-child(odd) {background: #EEE}\n";
405 $style .= "</style>\n";
406
407 $html_str = "$style\n<table>";
408
409 foreach ($ametadata as $key => $value) {
410
411 $html_str .= "<tr>";
412
413 $entry = $value;
414 $title = $entry["title"];
415 $year = $entry["year"];
416 $articleUrl = $entry["articleUrl"];
417 $authorList = $entry["authorList"];
418 $authors = $entry["authors"];
419
420 $opt_linked_title = $title;
421
422 if ($articleUrl != false) {
423 $opt_linked_title = "<a href=\"$articleUrl\">$title</a>";
424 }
425
426 $desc_entry = "<i>$opt_linked_title</i> ($year)<br />\n";
427
428 foreach ($authorList as $author) {
429 $author_details = $authors[$author];
430 $organizations = $author_details["organization"];
431 $email = $author_details["email"];
432
433 $desc_entry .= $author;
434 if ($email != false) {
435 $email = str_replace("@"," at ",$email);
436 $desc_entry .= " (<tt>$email</tt>)";
437 }
438
439 $desc_entry .= "<br />\n";
440 }
441
442 $tarfilesize = filesize("$full_dataset_path/$key.tar.gz");
443
444 $download_key = "<a href=\"datasets/$key.tar.gz\">Download $key</a> ($tarfilesize bytes)";
445 $html_str .= " <td>$download_key</td><td>$desc_entry</td>\n";
446
447 $html_str .= "</tr>\n";
448
449 }
450 $html_str .= "</table>";
451
452 return $html_str;
453
454 }
455
456
457 public function attributionToHtmlByCollection() {
458
459## $classifier = new StructuredAudioClassifier();
460
461 $full_dataset_path = getcwd() . "/" . "datasets";
462
463
464 $cmetadata = array (
465 "american" => "American",
466 "classical" => "Classical",
467 "coversong" => "Cover Songs",
468 "dram" => "Dram",
469 "ehmann" => "Ehmann",
470 "jamendo" => "Jameno",
471 "latin" => "Latin",
472 "magnatune" => "Magnatune",
473 "mazurkas" => "Mazurkas",
474 "uscrap" => "US Bargin Bin",
475 "uspop" => "US Pop"
476 );
477
478 $style = "<style>\n";
479 $style .= " tr:nth-child(even) {background: #DDD}\n";
480 $style .= " tr:nth-child(odd) {background: #EEE}\n";
481 $style .= "</style>\n";
482
483 $html_str = "$style\n<table>";
484
485 foreach ($cmetadata as $key => $value) {
486
487 $html_str .= "<tr>";
488
489 $tarfilesize = filesize("$full_dataset_path/$key.tar.gz");
490
491 $download_key = "<a href=\"datasets/$key.tar.gz\">Download $value</a> ($tarfilesize bytes)";
492 $html_str .= " <td>$download_key</td>\n";
493
494 $html_str .= "</tr>\n";
495
496 }
497 $html_str .= "</table>";
498
499 return $html_str;
500
501 }
502
503
504}
505
506?>
Note: See TracBrowser for help on using the repository browser.