Changeset 35842 for main/trunk/model-sites-dev
- Timestamp:
- 2022-01-01T21:39:55+13:00 (2 years ago)
- Location:
- main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare
- Files:
-
- 5 added
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/01-DOWNLOAD-ESC-LOD-DATA.sh
r35062 r35842 8 8 echo "" >> "$logfile" 9 9 10 for f in \ 11 "local--countries-in-esc-by-year-in-1956--with-errata.sparql" \ 12 "local--countries-in-esc-by-year-after-1956--with-errata.sparql" \ 13 ; do 10 if [ $# == 0 ] ; then 11 sparql_query_list="local--countries-in-esc-by-year-in-1956--with-errata.sparql local--countries-in-esc-by-year-after-1956--with-errata.sparql" 12 else 13 sparql_query_list="$*" 14 fi 15 16 for f in $sparql_query_list ; do 14 17 15 18 echo "Downloading JSON content via SPARQL query:" | tee -a "$logfile" -
main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/02-EXPLODE-SPARQLRESULTS-TO-IMPORT.sh
r35083 r35842 27 27 echo "Copying to '$tmp_cache' then exploding:" 28 28 29 f="local--countries-in-esc-by-year-in-1956--with-errata.json"30 echo " errata-lod/$f -> $tmp_cache/sparqlresults-$f"31 /bin/cp "errata-lod/$f" "$tmp_cache/sparqlresults-$f"29 #f="local--countries-in-esc-by-year-in-1956--with-errata.json" 30 #echo " errata-lod/$f -> $tmp_cache/sparqlresults-$f" 31 #/bin/cp "errata-lod/$f" "$tmp_cache/sparqlresults-$f" 32 32 33 explode_metadata_database.pl \34 -collectdir $GSDL3SRCHOME/web/sites/eurovision-lod/collect \35 -collection $collection \36 -plugin_options "-metadata_merge_on_concat_fields Country,Year,TitleDisambiguation -OIDtype assigned -OIDmetadata Identifier" \37 -plugin JSONSPARQLResultPlugin \38 $tmp_cache/sparqlresults-$f39 33 40 if [ $ ? != 0 ] ; then41 echo "Error encountered when exploding:" 1>&242 echo " $tmp_cache/sparqlresults-$f" 1>&2 43 exit 134 if [ $# == 0 ] ; then 35 f_list="local--countries-in-esc-by-year-in-1956--with-errata.json local--countries-in-esc-by-year-after-1956--with-errata.json" 36 else 37 f_list="$*" 44 38 fi 45 39 46 40 47 f="local--countries-in-esc-by-year-after-1956--with-errata.json" 48 echo " errata-lod/$f -> $tmp_cache/sparqlresults-$f" 49 /bin/cp "errata-lod/$f" "$tmp_cache/sparqlresults-$f" 41 for f in $f_list ; do 50 42 51 explode_metadata_database.pl \ 52 -collectdir $GSDL3SRCHOME/web/sites/eurovision-lod/collect \ 53 -collection $collection \ 54 -plugin_options "-metadata_merge_on_concat_fields Country,Year -OIDtype assigned -OIDmetadata Identifier" \ 55 -plugin JSONSPARQLResultPlugin \ 56 $tmp_cache/sparqlresults-$f 43 echo " errata-lod/$f -> $tmp_cache/sparqlresults-$f" 44 /bin/cp "errata-lod/$f" "$tmp_cache/sparqlresults-$f" 57 45 58 if [ $? != 0 ] ; then 59 echo "Error encountered when exploding:" 1>&2 60 echo " $tmp_cache/sparqlresults-$f" 1>&2 61 exit 1 62 fi 46 fgrep in-1956 $f > /dev/null 47 in_1956_status=$? 48 49 if [ $in_1956_status = 0 ] ; then 50 # matches substring check 51 explode_metadata_database.pl \ 52 -collectdir $GSDL3SRCHOME/web/sites/eurovision-lod/collect \ 53 -collection $collection \ 54 -plugin_options "-metadata_merge_on_concat_fields Country,Year,TitleDisambiguation -OIDtype assigned -OIDmetadata Identifier" \ 55 -plugin JSONSPARQLResultPlugin \ 56 $tmp_cache/sparqlresults-$f 57 explode_status=$? 58 else 59 explode_metadata_database.pl \ 60 -collectdir $GSDL3SRCHOME/web/sites/eurovision-lod/collect \ 61 -collection $collection \ 62 -plugin_options "-metadata_merge_on_concat_fields Country,Year -OIDtype assigned -OIDmetadata Identifier" \ 63 -plugin JSONSPARQLResultPlugin \ 64 $tmp_cache/sparqlresults-$f 65 explode_status=$? 66 fi 67 68 if [ $explode_status != 0 ] ; then 69 echo "Error encountered when exploding:" 1>&2 70 echo " $tmp_cache/sparqlresults-$f" 1>&2 71 exit 1 72 fi 73 done 74 75 76 #f="local--countries-in-esc-by-year-after-1956--with-errata.json" 77 #echo " errata-lod/$f -> $tmp_cache/sparqlresults-$f" 78 #/bin/cp "errata-lod/$f" "$tmp_cache/sparqlresults-$f" 79 80 #explode_metadata_database.pl \ 81 # -collectdir $GSDL3SRCHOME/web/sites/eurovision-lod/collect \ 82 # -collection $collection \ 83 # -plugin_options "-metadata_merge_on_concat_fields Country,Year -OIDtype assigned -OIDmetadata Identifier" \ 84 # -plugin JSONSPARQLResultPlugin \ 85 # $tmp_cache/sparqlresults-$f 86 # 87 #if [ $? != 0 ] ; then 88 # echo "Error encountered when exploding:" 1>&2 89 # echo " $tmp_cache/sparqlresults-$f" 1>&2 90 # exit 1 91 #fi 63 92 64 93 echo "" -
main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/03-GEN-VOTING-METADATA.sh
r35185 r35842 1 1 #!/bin/bash 2 3 . ./_local_prepare_config.sh 2 4 3 5 prep_dir=voting-excel 4 6 5 if [ ! -d ./my-python3] ; then7 if [ ! -d "./$my_python" ] ; then 6 8 echo "" 1>&2 7 echo "Failed to find: ./ my-python3" 1>&29 echo "Failed to find: ./$my_python" 1>&2 8 10 echo "Have you run:" 1>&2 9 11 echo " ./CREATE-PYTHON-VENV.sh" 1>&2 … … 46 48 exit_status=0 47 49 48 source ./ my-python3/bin/activate50 source ./$my_python/bin/activate 49 51 50 52 $prep_dir/xlsx-fromcountry-jsonmetadata.py \ -
main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/05-PARSE-ADDITIONAL-METADATA-FROM-WIKIPEDIA.sh
r35185 r35842 1 1 #!/bin/bash 2 3 . ./_local_prepare_config.sh 4 5 if [ $# = 2 ] ; then 6 startyear=$1 7 endyear=$2 8 else 9 startyear=1956 10 endyear=2021 11 fi 2 12 3 13 prep_dir=errata-categories 4 14 5 if [ ! -d ./ my-python3] ; then15 if [ ! -d ./$my_python ] ; then 6 16 echo "" 1>&2 7 echo "Failed to find: ./ my-python3" 1>&217 echo "Failed to find: ./$my_python" 1>&2 8 18 echo "Have you run:" 1>&2 9 19 echo " ./CREATE-PYTHON-VENV.sh" 1>&2 … … 22 32 exit_status=0 23 33 24 source my-python3/bin/activate34 source ./$my_python/bin/activate 25 35 26 36 $prep_dir/esc-wikipedia-download-and-process-votes.py \ 27 --startyear 1956\28 --endyear 2021\37 --startyear $startyear \ 38 --endyear $endyear \ 29 39 --cachedir $prep_dir/cache-wikipedia \ 30 40 $prep_dir/metadata-esc-year/metadata_esc.json … … 40 50 41 51 $prep_dir//esc-wikipedia-download-and-detect-missing-cat-entries.py \ 42 --startyear 1956\43 --endyear 2021\52 --startyear $startyear \ 53 --endyear $endyear \ 44 54 --cachedir $prep_dir/cache-wikipedia \ 45 55 --queryfile.sparql "$prep_dir/dbpedia--countries-missing-from-esc-category-in-the-year.sparql" \ -
main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/06-COPY-PARSED-ADDITIONAL-METADATA-TO-IMPORT.sh
r35185 r35842 22 22 echo "" 23 23 echo "Copying:" 24 echo " errata-categories/metadata-esc-year/*1956*.nul -> $ia_dir/." 25 /bin/cp "errata-categories/metadata-esc-year/"*1956*.nul "$ia_dir/." 24 25 ls errata-categories/metadata-esc-year/*1956*.nul >/dev/null 2>&1 26 has_1956_nul_files_status=$? 27 28 if [ $has_1956_nul_files_status = 0 ] ; then 29 echo " errata-categories/metadata-esc-year/*1956*.nul -> $ia_dir/." 30 /bin/cp "errata-categories/metadata-esc-year/"*1956*.nul "$ia_dir/." 31 fi 32 26 33 echo " errata-categories/metadata-esc-year/metadata_esc.json -> $ia_dir/." 27 34 /bin/cp "errata-categories/metadata-esc-year/metadata_esc.json" "$ia_dir/." … … 42 49 forced2021_dir=../import/sparqlresults-local--countries-in-esc-by-year-after-1956--with-errata.00000001 43 50 44 echo "" 45 echo "Copying (forcing 2021 files into 00000001 import area):" 46 echo " errata-categories/metadata-esc-year/*2021.nul -> $forced2021_dir/." 47 /bin/cp "errata-categories/metadata-esc-year/"*2021.nul "$forced2021_dir/." 51 ls errata-categories/metadata-esc-year/*2021*.nul >/dev/null 2>&1 52 has_2021_nul_files_status=$? 53 54 if [ $has_2021_nul_files_status = 0 ] ; then 55 echo "" 56 echo "Copying (forcing 2021 files into 00000001 import area):" 57 echo " errata-categories/metadata-esc-year/*2021.nul -> $forced2021_dir/." 58 /bin/cp "errata-categories/metadata-esc-year/"*2021.nul "$forced2021_dir/." 59 fi 48 60 49 61 echo "" -
main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/07a-DOWNLOAD-SPOTIFY-LOD-DATA.sh
r35835 r35842 1 1 #!/bin/bash 2 3 . ./_local_prepare_config.sh 4 5 if [ $SPOTIPY_CLIENT_ID = "changeme" ] || [ "$SPOTIPY_CLIENT_SECRET" = "changeme" ] ; then 6 echo "" >&2 7 echo "!!!!" >&2 8 echo "! Environment variable SPOTIPY_CLIENT_ID and/or SPOTIPY_CLIENT_SECRET is 'changeme'" >&2 9 echo "! Have you edited the export entries in '_local_prepare_config.sh' to be your Spotify API credentials?" >&2 10 echo "! " >&2 11 echo "! Credentials are obtained by logging into the Spotify dashboard:" >&2 12 echo "! " >&2 13 echo "! https://developer.spotify.com/dashboard/applications" >&2 14 echo "! " >&2 15 echo "! to register the app (e.g., eurovision-lod) from which the app's OAuth 2.0 client-id and client-secret" >&2 16 echo "! can be obtained" >&2 17 echo "!!!!" >&2 18 echo "" >&2 19 20 exit 1 21 fi 2 22 3 23 prep_dir=spotify-musicbrainz 4 24 5 if [ ! -d ./my-python3 ] ; then 25 extra_args_filetail="" 26 if [ $# != 0 ] ; then 27 extra_args_filetail=`echo "$*" | sed 's/ //g'` 28 fi 29 30 ttl_file="cached--esc-mir-gold-rdf${extra_args_filetail}.ttl" 31 32 33 if [ ! -d ./$my_python ] ; then 6 34 echo "" 1>&2 7 echo "Failed to find: ./ my-python3" 1>&235 echo "Failed to find: ./$my_python" 1>&2 8 36 echo "Have you run:" 1>&2 9 37 echo " ./CREATE-PYTHON-VENV.sh" 1>&2 … … 13 41 fi 14 42 15 source ./ my-python3/bin/activate43 source ./$my_python/bin/activate 16 44 45 # David Weigl's github code repository for this is now been merged 46 # into the main SVN repository used by Greenstone, so the following 47 # block of code won't trigger anymore to check out the code. However, 48 # it is important that the pip requirements are run. This is a detail 49 # now covered off in the README.txt located in the 'prepare' directory 17 50 if [ ! -d "$prep_dir" ] ; then 18 51 echo "Checking out Spotify/MusicBrainz Alignment code:" … … 30 63 fi 31 64 32 if [ ! -f "$prep_dir/ cached--esc-mir-gold-rdf.ttl" ] ; then33 echo "Generating $prep_dir/ cached--esc-mir-gold-rdf.ttl"65 if [ ! -f "$prep_dir/$ttl_file" ] ; then 66 echo "Generating $prep_dir/$ttl_file" 34 67 35 echo "Running Spotify/MusicBrainz Alignment" > 07 -DOWNLOAD-runtime-log-$$.txt36 echo "Started: " >> 07 -DOWNLOAD-runtime-log-$$.txt37 date >> 07 -DOWNLOAD-runtime-log-$$.txt68 echo "Running Spotify/MusicBrainz Alignment" > 07a-DOWNLOAD-runtime-log-$$.txt 69 echo "Started: " >> 07a-DOWNLOAD-runtime-log-$$.txt 70 date >> 07a-DOWNLOAD-runtime-log-$$.txt 38 71 39 cd "$prep_dir" && ./RUN.sh && cd ..72 cd "$prep_dir" && ./RUN.sh $ttl_file $* && cd .. 40 73 41 74 if [ $? != 0 ] ; then … … 48 81 fi 49 82 50 echo "Finished: " > 07-DOWNLOAD-runtime-log-$$.txt 51 date >> 07-DOWNLOAD-runtime-log-$$.txt 52 83 echo "Finished: " >> 07a-DOWNLOAD-runtime-log-$$.txt 84 date >> 07a-DOWNLOAD-runtime-log-$$.txt 85 else 86 echo "" 87 echo "Detected cached TTL file:" 88 echo "" 89 echo " $prep_dir/$ttl_file" 90 echo "" 91 echo "=> Skipping Greenstone+Spotify+MusicBrainz SPARQL pull requests" 92 echo "" 93 53 94 fi 54 95 … … 56 97 echo "Reseting and then populating the triplestore graph-collection" 57 98 echo " eurovision-mir" 58 gs-triplestore-reset3 eurovision-mir && gs-triplestore-add3 eurovision-mir "$prep_dir/ esc-mir-gold-rdf.ttl"99 gs-triplestore-reset3 eurovision-mir && gs-triplestore-add3 eurovision-mir "$prep_dir/$ttl_file" 59 100 60 101 if [ $? != 0 ] ; then -
main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/spotify-musicbrainz/RUN.sh
r35141 r35842 1 1 #!/bin/bash 2 3 ttl_file="cached--esc-mir-gold-rdf.ttl" 4 5 if [ $# != 0 ] ; then 6 # Not running in most basic mode, which is no arguments supplied at all 7 # => for this script, if supplying arguments, first one must be the rdf-out file 8 ttl_file=$1 ; shift 9 fi 2 10 3 11 python3 ./eurosparqlify.py \ … … 5 13 --data-out esc-mir-gold-data.out \ 6 14 --mapping-out esc-mir-gold-mapping.out \ 7 --rdf-out cached--esc-mir-gold-rdf.ttl 15 --rdf-out $ttl_file \ 16 $* 8 17
Note:
See TracChangeset
for help on using the changeset viewer.