Changeset 35842 for main


Ignore:
Timestamp:
2022-01-01T21:39:55+13:00 (2 years ago)
Author:
davidb
Message:

Introduction of scripts for building a 'small' collection (based on just entries from 2015) useful for testing purposes; needed some mods to their companion ALL-CAPS scripts

Location:
main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare
Files:
5 added
7 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/01-DOWNLOAD-ESC-LOD-DATA.sh

    r35062 r35842  
    88echo "" >> "$logfile"
    99
    10 for f in \
    11     "local--countries-in-esc-by-year-in-1956--with-errata.sparql" \
    12     "local--countries-in-esc-by-year-after-1956--with-errata.sparql" \
    13     ; do
     10if [ $# == 0 ] ; then
     11    sparql_query_list="local--countries-in-esc-by-year-in-1956--with-errata.sparql local--countries-in-esc-by-year-after-1956--with-errata.sparql"
     12else
     13    sparql_query_list="$*"
     14fi
     15
     16for f in $sparql_query_list ; do
    1417
    1518    echo "Downloading JSON content via SPARQL query:" | tee -a "$logfile"
  • main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/02-EXPLODE-SPARQLRESULTS-TO-IMPORT.sh

    r35083 r35842  
    2727echo "Copying to '$tmp_cache' then exploding:"
    2828
    29 f="local--countries-in-esc-by-year-in-1956--with-errata.json"
    30 echo "  errata-lod/$f -> $tmp_cache/sparqlresults-$f"   
    31 /bin/cp "errata-lod/$f" "$tmp_cache/sparqlresults-$f"
     29#f="local--countries-in-esc-by-year-in-1956--with-errata.json"
     30#echo "  errata-lod/$f -> $tmp_cache/sparqlresults-$f"   
     31#/bin/cp "errata-lod/$f" "$tmp_cache/sparqlresults-$f"
    3232
    33 explode_metadata_database.pl \
    34     -collectdir $GSDL3SRCHOME/web/sites/eurovision-lod/collect \
    35     -collection $collection \
    36     -plugin_options "-metadata_merge_on_concat_fields Country,Year,TitleDisambiguation -OIDtype assigned -OIDmetadata Identifier" \
    37     -plugin JSONSPARQLResultPlugin  \
    38     $tmp_cache/sparqlresults-$f
    3933
    40 if [ $? != 0 ] ; then
    41     echo "Error encountered when exploding:" 1>&2
    42     echo "  $tmp_cache/sparqlresults-$f" 1>&2
    43     exit 1
     34if [ $# == 0 ] ; then
     35    f_list="local--countries-in-esc-by-year-in-1956--with-errata.json local--countries-in-esc-by-year-after-1956--with-errata.json"
     36else
     37    f_list="$*"
    4438fi
    4539
    4640
    47 f="local--countries-in-esc-by-year-after-1956--with-errata.json"
    48 echo "  errata-lod/$f -> $tmp_cache/sparqlresults-$f"   
    49 /bin/cp "errata-lod/$f" "$tmp_cache/sparqlresults-$f"
     41for f in $f_list ; do
    5042
    51 explode_metadata_database.pl \
    52     -collectdir $GSDL3SRCHOME/web/sites/eurovision-lod/collect \
    53     -collection $collection \
    54     -plugin_options "-metadata_merge_on_concat_fields Country,Year -OIDtype assigned -OIDmetadata Identifier" \
    55     -plugin JSONSPARQLResultPlugin  \
    56     $tmp_cache/sparqlresults-$f
     43    echo "  errata-lod/$f -> $tmp_cache/sparqlresults-$f"   
     44    /bin/cp "errata-lod/$f" "$tmp_cache/sparqlresults-$f"
    5745
    58 if [ $? != 0 ] ; then
    59     echo "Error encountered when exploding:" 1>&2
    60     echo "  $tmp_cache/sparqlresults-$f" 1>&2
    61     exit 1
    62 fi   
     46    fgrep in-1956 $f > /dev/null
     47    in_1956_status=$?
     48   
     49    if [ $in_1956_status = 0 ] ; then
     50    # matches substring check
     51    explode_metadata_database.pl \
     52        -collectdir $GSDL3SRCHOME/web/sites/eurovision-lod/collect \
     53        -collection $collection \
     54        -plugin_options "-metadata_merge_on_concat_fields Country,Year,TitleDisambiguation -OIDtype assigned -OIDmetadata Identifier" \
     55        -plugin JSONSPARQLResultPlugin  \
     56        $tmp_cache/sparqlresults-$f
     57    explode_status=$?
     58    else
     59    explode_metadata_database.pl \
     60        -collectdir $GSDL3SRCHOME/web/sites/eurovision-lod/collect \
     61        -collection $collection \
     62        -plugin_options "-metadata_merge_on_concat_fields Country,Year -OIDtype assigned -OIDmetadata Identifier" \
     63        -plugin JSONSPARQLResultPlugin  \
     64        $tmp_cache/sparqlresults-$f
     65    explode_status=$?
     66    fi
     67
     68    if [ $explode_status != 0 ] ; then
     69    echo "Error encountered when exploding:" 1>&2
     70    echo "  $tmp_cache/sparqlresults-$f" 1>&2
     71    exit 1
     72    fi
     73done
     74
     75
     76#f="local--countries-in-esc-by-year-after-1956--with-errata.json"
     77#echo "  errata-lod/$f -> $tmp_cache/sparqlresults-$f"   
     78#/bin/cp "errata-lod/$f" "$tmp_cache/sparqlresults-$f"
     79
     80#explode_metadata_database.pl \
     81#    -collectdir $GSDL3SRCHOME/web/sites/eurovision-lod/collect \
     82#    -collection $collection \
     83#    -plugin_options "-metadata_merge_on_concat_fields Country,Year -OIDtype assigned -OIDmetadata Identifier" \
     84#    -plugin JSONSPARQLResultPlugin  \
     85#    $tmp_cache/sparqlresults-$f
     86#
     87#if [ $? != 0 ] ; then
     88#    echo "Error encountered when exploding:" 1>&2
     89#    echo "  $tmp_cache/sparqlresults-$f" 1>&2
     90#    exit 1
     91#fi   
    6392
    6493echo ""
  • main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/03-GEN-VOTING-METADATA.sh

    r35185 r35842  
    11#!/bin/bash
     2
     3. ./_local_prepare_config.sh
    24
    35prep_dir=voting-excel
    46
    5 if [ ! -d ./my-python3 ] ; then
     7if [ ! -d "./$my_python" ] ; then
    68    echo "" 1>&2
    7     echo "Failed to find: ./my-python3" 1>&2
     9    echo "Failed to find: ./$my_python" 1>&2
    810    echo "Have you run:" 1>&2
    911    echo "  ./CREATE-PYTHON-VENV.sh" 1>&2
     
    4648exit_status=0
    4749
    48 source  ./my-python3/bin/activate
     50source  ./$my_python/bin/activate
    4951
    5052$prep_dir/xlsx-fromcountry-jsonmetadata.py \
  • main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/05-PARSE-ADDITIONAL-METADATA-FROM-WIKIPEDIA.sh

    r35185 r35842  
    11#!/bin/bash
     2
     3. ./_local_prepare_config.sh
     4
     5if [ $# = 2 ] ; then
     6    startyear=$1
     7    endyear=$2
     8else
     9    startyear=1956
     10    endyear=2021
     11fi
    212
    313prep_dir=errata-categories
    414
    5 if [ ! -d ./my-python3 ] ; then
     15if [ ! -d ./$my_python ] ; then
    616    echo "" 1>&2
    7     echo "Failed to find: ./my-python3" 1>&2
     17    echo "Failed to find: ./$my_python" 1>&2
    818    echo "Have you run:" 1>&2
    919    echo "  ./CREATE-PYTHON-VENV.sh" 1>&2
     
    2232exit_status=0
    2333
    24 source  my-python3/bin/activate
     34source  ./$my_python/bin/activate
    2535
    2636$prep_dir/esc-wikipedia-download-and-process-votes.py \
    27     --startyear 1956 \
    28     --endyear 2021 \
     37    --startyear $startyear \
     38    --endyear $endyear \
    2939    --cachedir $prep_dir/cache-wikipedia \
    3040    $prep_dir/metadata-esc-year/metadata_esc.json
     
    4050   
    4151    $prep_dir//esc-wikipedia-download-and-detect-missing-cat-entries.py \
    42     --startyear 1956 \
    43     --endyear 2021 \
     52    --startyear $startyear \
     53    --endyear $endyear \
    4454    --cachedir $prep_dir/cache-wikipedia \
    4555    --queryfile.sparql "$prep_dir/dbpedia--countries-missing-from-esc-category-in-the-year.sparql" \
  • main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/06-COPY-PARSED-ADDITIONAL-METADATA-TO-IMPORT.sh

    r35185 r35842  
    2222echo ""
    2323echo "Copying:"
    24 echo "  errata-categories/metadata-esc-year/*1956*.nul -> $ia_dir/."
    25 /bin/cp "errata-categories/metadata-esc-year/"*1956*.nul "$ia_dir/."
     24
     25ls errata-categories/metadata-esc-year/*1956*.nul >/dev/null 2>&1
     26has_1956_nul_files_status=$?
     27
     28if [ $has_1956_nul_files_status = 0 ] ; then
     29    echo "  errata-categories/metadata-esc-year/*1956*.nul -> $ia_dir/."
     30    /bin/cp "errata-categories/metadata-esc-year/"*1956*.nul "$ia_dir/."
     31fi
     32
    2633echo "  errata-categories/metadata-esc-year/metadata_esc.json -> $ia_dir/."
    2734/bin/cp "errata-categories/metadata-esc-year/metadata_esc.json" "$ia_dir/."
     
    4249forced2021_dir=../import/sparqlresults-local--countries-in-esc-by-year-after-1956--with-errata.00000001
    4350
    44 echo ""
    45 echo "Copying (forcing 2021 files into 00000001 import area):"
    46 echo "  errata-categories/metadata-esc-year/*2021.nul -> $forced2021_dir/."
    47 /bin/cp "errata-categories/metadata-esc-year/"*2021.nul "$forced2021_dir/."
     51ls errata-categories/metadata-esc-year/*2021*.nul >/dev/null 2>&1
     52has_2021_nul_files_status=$?
     53
     54if [ $has_2021_nul_files_status = 0 ] ; then
     55    echo ""
     56    echo "Copying (forcing 2021 files into 00000001 import area):"
     57    echo "  errata-categories/metadata-esc-year/*2021.nul -> $forced2021_dir/."
     58    /bin/cp "errata-categories/metadata-esc-year/"*2021.nul "$forced2021_dir/."
     59fi
    4860
    4961echo ""
  • main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/07a-DOWNLOAD-SPOTIFY-LOD-DATA.sh

    r35835 r35842  
    11#!/bin/bash
     2
     3. ./_local_prepare_config.sh
     4
     5if [ $SPOTIPY_CLIENT_ID = "changeme" ] || [ "$SPOTIPY_CLIENT_SECRET" = "changeme" ] ; then
     6    echo "" >&2
     7    echo "!!!!" >&2
     8    echo "! Environment variable SPOTIPY_CLIENT_ID and/or SPOTIPY_CLIENT_SECRET is 'changeme'" >&2
     9    echo "! Have you edited the export entries in '_local_prepare_config.sh' to be your Spotify API credentials?" >&2
     10    echo "! " >&2
     11    echo "! Credentials are obtained by logging into the Spotify dashboard:" >&2
     12    echo "! " >&2
     13    echo "!   https://developer.spotify.com/dashboard/applications" >&2
     14    echo "! " >&2
     15    echo "! to register the app (e.g., eurovision-lod) from which the app's OAuth 2.0 client-id and client-secret" >&2
     16    echo "! can be obtained" >&2
     17    echo "!!!!" >&2
     18    echo "" >&2
     19       
     20    exit 1
     21fi
    222
    323prep_dir=spotify-musicbrainz
    424
    5 if [ ! -d ./my-python3 ] ; then
     25extra_args_filetail=""
     26if [ $# != 0 ] ; then
     27    extra_args_filetail=`echo "$*" | sed 's/ //g'`
     28fi
     29
     30ttl_file="cached--esc-mir-gold-rdf${extra_args_filetail}.ttl"
     31
     32                         
     33if [ ! -d ./$my_python ] ; then
    634    echo "" 1>&2
    7     echo "Failed to find: ./my-python3" 1>&2
     35    echo "Failed to find: ./$my_python" 1>&2
    836    echo "Have you run:" 1>&2
    937    echo "  ./CREATE-PYTHON-VENV.sh" 1>&2
     
    1341fi
    1442
    15 source  ./my-python3/bin/activate
     43source  ./$my_python/bin/activate
    1644
     45# David Weigl's github code repository for this is now been merged
     46# into the main SVN repository used by Greenstone, so the following
     47# block of code won't trigger anymore to check out the code.  However,
     48# it is important that the pip requirements are run.  This is a detail
     49# now covered off in the README.txt located in the 'prepare' directory
    1750if [ ! -d "$prep_dir" ] ; then
    1851    echo "Checking out Spotify/MusicBrainz Alignment code:"
     
    3063fi
    3164
    32 if [ ! -f "$prep_dir/cached--esc-mir-gold-rdf.ttl" ] ; then
    33    echo "Generating $prep_dir/cached--esc-mir-gold-rdf.ttl"
     65if [ ! -f "$prep_dir/$ttl_file" ] ; then
     66   echo "Generating $prep_dir/$ttl_file"
    3467
    35    echo "Running Spotify/MusicBrainz Alignment" > 07-DOWNLOAD-runtime-log-$$.txt
    36    echo "Started: " >> 07-DOWNLOAD-runtime-log-$$.txt
    37    date >> 07-DOWNLOAD-runtime-log-$$.txt
     68   echo "Running Spotify/MusicBrainz Alignment" > 07a-DOWNLOAD-runtime-log-$$.txt
     69   echo "Started: " >> 07a-DOWNLOAD-runtime-log-$$.txt
     70   date >> 07a-DOWNLOAD-runtime-log-$$.txt
    3871
    39    cd "$prep_dir" && ./RUN.sh && cd ..
     72   cd "$prep_dir" && ./RUN.sh $ttl_file $* && cd ..
    4073
    4174   if [ $? != 0 ] ; then
     
    4881   fi   
    4982
    50    echo "Finished: " > 07-DOWNLOAD-runtime-log-$$.txt
    51    date >> 07-DOWNLOAD-runtime-log-$$.txt
    52      
     83   echo "Finished: " >> 07a-DOWNLOAD-runtime-log-$$.txt
     84   date >> 07a-DOWNLOAD-runtime-log-$$.txt
     85else
     86    echo ""
     87    echo "Detected cached TTL file:"
     88    echo ""
     89    echo "  $prep_dir/$ttl_file"
     90    echo ""
     91    echo "=> Skipping Greenstone+Spotify+MusicBrainz SPARQL pull requests"
     92    echo ""
     93   
    5394fi
    5495
     
    5697echo "Reseting and then populating the triplestore graph-collection"
    5798echo "  eurovision-mir"
    58 gs-triplestore-reset3 eurovision-mir &&  gs-triplestore-add3 eurovision-mir "$prep_dir/esc-mir-gold-rdf.ttl"
     99gs-triplestore-reset3 eurovision-mir &&  gs-triplestore-add3 eurovision-mir "$prep_dir/$ttl_file"
    59100
    60101if [ $? != 0 ] ; then
  • main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/spotify-musicbrainz/RUN.sh

    r35141 r35842  
    11#!/bin/bash
     2
     3ttl_file="cached--esc-mir-gold-rdf.ttl"
     4
     5if [ $# != 0 ] ; then
     6    # Not running in most basic mode, which is no arguments supplied at all
     7    # => for this script, if supplying arguments, first one must be the rdf-out file
     8    ttl_file=$1 ; shift
     9fi
    210
    311python3 ./eurosparqlify.py \
     
    513  --data-out esc-mir-gold-data.out \
    614  --mapping-out esc-mir-gold-mapping.out \
    7   --rdf-out cached--esc-mir-gold-rdf.ttl
     15  --rdf-out $ttl_file \
     16  $*
    817
Note: See TracChangeset for help on using the changeset viewer.