*
* AFRepo top-level class to slot in with the Greenstone extension framework
*
* The audio is expected to be in subdirectories of the 'audio-files' directory, and
* for example purposes can be named in such a way that the example
* PathClassifier classifier can get meanining from it.
*/
class AFRepo extends AFRepoBase {
private $allfiles;
public function AFRepo()
{
$this->audio_files_dir = "audio-files";
$this->af_prefix = getcwd() . "/" . $this->audio_files_dir;
}
public function getName() {
return "Salami 4Store audio repository";
}
public function getURIPrefix() {
#return "@afrepo-http-prefix@/afrepo/";
return "@public-facing-afrepo-http-prefix@";
}
/* Done this way, can talk to the 4store server directly
rather than relying on proxying through the PHP-based 'afrepo' server */
public function getTrippleStoreLocalURIPrefix() {
//return "@afrepo-http-prefix@/4store/";
return "@4store-http-prefix@/";
}
public function getTrippleStoreURIPrefix() {
return "@public-facing-4store-http-prefix@";
}
public function getSparqlEndpoint() {
return $this->getTrippleStoreURIPrefix() . "sparql/";
}
public function getDataEndpoint() {
return $this->getTrippleStoreLocalURIPrefix() . "data/";
}
/**
* getAudioPath
* Return the path to the audio links in this repository (without a trailing
* slash)
*/
public function getAudioPath() {
return realpath("audio-ids");
}
public function remove_audio_files_prefix($full_filepath) {
# echo "full filepath = " . $full_filepath . "\n";
# echo "af prefix = " . $this->af_prefix . "\n";
$af_prefix_len = strlen($this->af_prefix);
$filepath = $full_filepath;
if (substr($filepath, 0, $af_prefix_len) == $this->af_prefix) {
$filepath = substr($filepath, $af_prefix_len);
}
# echo "** filepath = " . $filepath . "\n";
return $filepath;
}
/**
* fileInRepo
* Return true if the audiofile with the given path (canonical or symlink)
* is in the repository or false if not
*/
public function fileInRepo($full_filepath) {
$realpath = realpath($full_filepath);
if ($realpath === false) {
trigger_error("file '$filepath' does not exist on disk or is a broken symlink", E_USER_WARNING);
return false;
}
return array_key_exists($full_filepath, $this->getAllFiles());
}
/**
* idToCanonicalPath
* Return the path to the canonical file with the given ID
*/
public function idToCanonicalPath($id) {
return readlink($this->idToLinkPath($id));
}
/**
* filePathToId
* Return the ID of the audiofile with the given path (which can be
* canonical or a symlink)
*/
public function filePathToId($full_filepath) {
if (!$this->fileInRepo($full_filepath)) {
throw new Exception("file with path '$filepath' is not in the repository");
}
$hash_filepath = "salami:/" . $this->remove_audio_files_prefix($full_filepath);
## echo "Hashing on: " . $hash_filepath . "\n";
$id = md5($hash_filepath);
return $id;
}
// recursive method to delete things from the links directory according to the
// options
function getAllCollectionFilesRec($collection, $path) {
echo "Processing directory: " . $path . "\n";
$dir = dir($path);
while (($entry = $dir->read()) !== false) {
if ($entry == "." || $entry == "..") {
continue;
}
$fullpath = $path . "/" . $entry;
$isdir = false;
if (is_link($fullpath)) {
$realpath = readlink($fullpath);
// follow potential chain of sym-links
while (is_link($realpath)) {
$realpath = readlink($realpath);
}
$isdir = is_dir($realpath);
}
else {
$isdir = is_dir($fullpath);
}
if ($isdir) {
$this->getAllCollectionFilesRec($collection,$fullpath);
}
else {
// assume it is a file
## echo "Adding file: " . $fullpath . "\n";
$this->allfiles[$fullpath] = true;
}
}
$dir->close();
}
public function getAllFiles()
{
if (!is_null($this->allfiles)) {
return $this->allfiles;
}
$this->allfiles = array();
## $path = realpath($this->audio_files_dir);
$path = $this->af_prefix;
$dir = dir($path);
while (($file = $dir->read()) !== false) {
if ($file[0] == ".") {
// skip all dot files and dirs
continue;
}
$fullpath = $path . "/" . $file;
$isdir = false;
if (is_link($fullpath)) {
$realpath = readlink($fullpath);
// follow potential chain of sym-links
while (is_link($realpath)) {
$realpath = readlink($realpath);
}
$isdir = is_dir($realpath);
}
else {
$isdir = is_dir($fullpath);
}
if ($isdir) {
// recursively work through each collection directory
$this->getAllCollectionFilesRec($file, $fullpath);
}
}
$dir->close();
return $this->allfiles;
}
public function getAllFilesOLD() {
if (!is_null($this->allfiles))
return $this->allfiles;
$this->allfiles = array();
$path = realpath("audio-files");
$dir = dir($path);
while (($file = $dir->read()) !== false) {
if ($file[0] != "." && is_dir($path . "/" . $file)) {
$subdir = dir($path . "/" . $file);
while (($subfile = $subdir->read()) !== false) {
if ($subfile[0] != "." && is_file($path . "/" . $file . "/" . $subfile))
$this->allfiles[$path . "/" . $file . "/" . $subfile] = true;
}
$subdir->close();
}
}
$dir->close();
return $this->allfiles;
}
public function getSongFilesOLD($id) {
$filepath = $this->idToLinkPath($id);
$origfilepath = realpath($filepath);
if ($origfilepath === false)
return array();
// is it a clip?
if (preg_match('%\.clip\..{1,4}$%', $origfilepath)) {
// does full version exist?
$fullpath = preg_replace('%\.clip%', "", $origfilepath);
if (file_exists($fullpath))
return array($fullpath, $origfilepath);
return array($origfilepath);
}
// it's a full song. does clip exist?
$clippath = preg_replace('%(\..{1,4})$%', '.clip\1', $origfilepath);
if (file_exists($clippath))
return array($origfilepath, $clippath);
return array($origfilepath);
}
public function haveMetadataPermission() {
return true;
}
public function haveAudioPermission() {
return ipInRange($_SERVER["REMOTE_ADDR"], "127.0.0.0/8");
}
public function getMBID($id) {
$classifiers = array(
new TagClassifier(),
new EchoprintClassifier(),
new EchonestClassifier(),
new PathClassifier(),
);
foreach ($classifiers as $classifier)
if ($classifier->available() && $classifier->hasMBID($id))
return $classifier->getMBID($id);
return null;
}
/**
* extraTriples
* Return an array of any extra implementation-specific triples should be
* added to the RDF given the ID of an audiofile
*
* This should be an array of triples in Turtle format without the trailing
* dot. (See generateRDF)
* It should return triples about all audiofiles of the same song rather
* than just the one with the ID given.
*/
protected function extraTriples($id) {
$extra_triples = array();
// analyze the file, get some metadata
$structured_metadata = $this->getStructuredMetadata($id);
if ($structured_metadata !== false) {
}
// loop through the audiofiles for this tune
/* foreach ($ids as $key => $fileid) { */
/* // some identifiers */
/* $audiofile = "repo:$fileid"; */
/* $digitalsignal = "repo:$fileid#DigitalSignal"; */
/* // triples about the document */
/* $triples[] = "{$audiofile}_ a foaf:Document; foaf:primaryTopic $audiofile"; */
/* // this is a mo:AudioFile, which is a mo:MusicalItem */
/* $triples[] = "$audiofile a mo:AudioFile"; */
/* // this encodes a corresponding digital signal */
/* $triples[] = "$audiofile mo:encodes $digitalsignal"; */
/* // our digital signal is a mo:DigitalSignal (which is a subclass of */
/* // mo:Signal, which is a subclass of mo:MusicalExpression) */
/* $triples[] = "$digitalsignal a mo:DigitalSignal"; */
/* // different logic depending whether this audiofile is the preferred */
/* // one or not */
/* if ($key == 0) { */
/* // preferred -- if we have an MBID this Signal is derived from */
/* // the original (that at Musicbrainz) */
/* if (!is_null($mbid)) */
/* $triples[] = "$digitalsignal mo:derived_from " . mbidToSignalURI($mbid); */
/* // otherwise we don't assert that it derives from anything */
/* } else { */
/* // non-preferred -- we assert that it is derived from our */
/* // preferred audiofile's Signal */
/* $triples[] = "$digitalsignal mo:derived_from repo:" . $ids[0] . "#DigitalSignal"; */
/* } */
/* // analyze the file, get some metadata */
/* $filemetadata = $this->getFileMetadata($fileid); */
/* // mo:AudioFile metadata */
/* if (isset($filemetadata["dataformat"])) */
/* $triples[] = "$audiofile mo:encoding \"" . $filemetadata["dataformat"] */
/* . (isset($filemetadata["bitrate"]) ? " @ " . $filemetadata["bitrate"] . "bps" : "") */
/* . (isset($filemetadata["bitrate_mode"]) ? " " . $filemetadata["bitrate_mode"] : "") */
/* . "\""; */
/* // mo:DigitalSignal metadata */
/* if (isset($filemetadata["playtime_seconds"])) */
/* $triples[] = "$digitalsignal mo:time [ " */
/* . "a time:Interval; " */
/* . "time:seconds \"" . $filemetadata["playtime_seconds"] . "\"^^xsd:float " */
/* . "]"; */
/* if (isset($filemetadata["channels"])) */
/* $triples[] = "$digitalsignal mo:channels \"" . $filemetadata["channels"] . "\"^^xsd:int"; */
/* if (isset($filemetadata["sample_rate"])) */
/* $triples[] = "$digitalsignal mo:sample_rate \"" . $filemetadata["sample_rate"] . "\"^^xsd:float"; */
/* } */
return $extra_triples;
}
/**
* getgetStructuredMetadata
* Return as a hashmap (keyed by algorithm the structured audio data
* from the "big crunch"
*/
public function getStructuredMetadata($id) {
$classifier = new StructuredAudioClassifier();
if ($classifier->available() && $classifier->hasMetadata($id)) {
return $classifier->loadMetadata($id);
}
return false;
}
public function attributionToHtmlByAlgorithm() {
$classifier = new StructuredAudioClassifier();
$full_dataset_path = getcwd() . "/" . "datasets";
$ametadata = $classifier->getAttribution();
$style = "\n";
$html_str = "$style\n
";
foreach ($ametadata as $key => $value) {
$html_str .= "";
$entry = $value;
$title = $entry["title"];
$year = $entry["year"];
$articleUrl = $entry["articleUrl"];
$authorList = $entry["authorList"];
$authors = $entry["authors"];
$opt_linked_title = $title;
if ($articleUrl != false) {
$opt_linked_title = "$title";
}
$desc_entry = "$opt_linked_title ($year)
\n";
foreach ($authorList as $author) {
$author_details = $authors[$author];
$organizations = $author_details["organization"];
$email = $author_details["email"];
$desc_entry .= $author;
if ($email != false) {
$email = str_replace("@"," at ",$email);
$desc_entry .= " ($email)";
}
$desc_entry .= "
\n";
}
$tarfilesize = filesize("$full_dataset_path/$key.tar.gz");
$download_key = "Download $key ($tarfilesize bytes)";
$html_str .= " $download_key | $desc_entry | \n";
$html_str .= "
\n";
}
$html_str .= "
";
return $html_str;
}
public function attributionToHtmlByCollection() {
## $classifier = new StructuredAudioClassifier();
$full_dataset_path = getcwd() . "/" . "datasets";
$cmetadata = array (
"american" => "American",
"classical" => "Classical",
"coversong" => "Cover Songs",
"dram" => "Dram",
"ehmann" => "Ehmann",
"jamendo" => "Jameno",
"latin" => "Latin",
"magnatune" => "Magnatune",
"mazurkas" => "Mazurkas",
"uscrap" => "US Bargin Bin",
"uspop" => "US Pop"
);
$style = "\n";
$html_str = "$style\n";
foreach ($cmetadata as $key => $value) {
$html_str .= "";
$tarfilesize = filesize("$full_dataset_path/$key.tar.gz");
$download_key = "Download $value ($tarfilesize bytes)";
$html_str .= " $download_key | \n";
$html_str .= "
\n";
}
$html_str .= "
";
return $html_str;
}
}
?>