#!/bin/bash # PURPOSE # This is not a nightly script. You use it to regenerate the model-collections # if Greenstone has changed fundamentally, such as what HASH OIDs get assigned # to documents or something that changes the contents of the index and # archives folders. This has happened now with the commits # http://trac.greenstone.org/changeset/28022 and # http://trac.greenstone.org/changeset/28021 # These commits generate new stable HASH OIDs for the existing documents. # USAGE # Put this file in the toplevel of the Greenstone 2 binary/compiled SVN installation # that you want to generate the model collections with. # You can provide a list of collection names or none, in which case all the collections # are processed. # Pass in --svnupdate to copy across the contents of archives and index in the # rebuilt collection, overwriting their equivalents in the svn model collection, # but not removing any extraneous HASH folders already present. # Pass in --svndelete to remove what's in archives and index from svn and replace # the contents of archives and index with the matching contents from the rebuild # collection. Useful for when the HASH directory naming has changed and everything # in archives and index has to be wiped out and moved back in from the rebuilt col. # If neither is passed in, then the collections are rebuilt but the svn model-collect # is not updated and the repository is not updated. # This program leaves you to do the final commit on the (svn) model-collect folder! # Examples of usage: # ./gen-model-colls.sh # ./gen-model-colls.sh --svndelete # ./gen-model-colls.sh --svnupdate Tudor-Basic Tudor-Enhanced # The first just rebuilds all the collections in a new folder called collect and stops there # The second rebuilds all the collections and then removes all contents of the archives and the # index folders from the svn checked-out model-collect and removes them from svn. Then it copies # across all the contents of the rebuilt archives and index into model-collect and svn adds them. # The third example checks out all the model-collections again, but rebuilds only the 3 collections # specified in the new collect folder. Then it copies across the contents of the archives and index # folders of those 3 collections into their model-collect equivalents. # You then still have to do the final svn commit on the model-collect folder. # Also valid examples: # ./gen-model-colls.sh Tudor-Basic Tudor-Enhanced # ./gen-model-colls.sh --svndelete Tudor-Basic Tudor-Enhanced # ./gen-model-colls.sh --svnupdate # PSEUDOCODE # This script: # Checks out the model-collections folder from SVN # Makes a copy # In the copy: gets rid of their .svn folders, and builds each collection in turn, moving building to index once done # If --svndelete was passed in: svn removes model-collect/archives/* and model-collect/index/*, copies over index/* # and archives/* from collect into model-collect and svn adds model-collect/archives/* and model-collect/index/* # If --svnupdate was passed in: copies collect/archives/* into model-collect/archives/*, and copies collect/index/* # into model-collect/index/*, overwriting files that already existed but have now been updated upon rebuild. However, # --svnupdate will leave untouched any files and folders unique to model-collect. # svn update/delete a single collection function update_single_collection () { collection=$1 mode=$2 #escape the filename (in case of space) collection=`echo $collection | sed 's@ @\\\ @g'` #get just the basename collection=`basename $collection` if [ ! -e model-collect/$collection ]; then echo "$collection does not exist in model-collect, commit separately" return; fi # diff the svn model and rebuilt model collections diff_result=`diff -rq model-collect/$collection collect/$collection | grep -v ".svn"` # echo "Diff result for collection $collection: $diff_result" # if no differences in the current collection, then we're done if [ "x$diff_result" == "x" ]; then echo "No differences in collection $collection" return; fi # check that none of the lines mention files outside the archives or index folders # http://en.gibney.org/tell_the_bash_to_split_by_newline_charac # http://forums.gentoo.org/viewtopic-p-3130541.html # http://wi-fizzle.com/article/276 # http://stackoverflow.com/questions/918886/how-do-i-split-a-string-on-a-delimiter-in-bash # http://www.linuxquestions.org/questions/programming-9/split-a-string-on-newlines-bash-313206/ # http://unix.stackexchange.com/questions/39473/command-substitution-splitting-on-newline-but-not-space # store backup of Internal Field Separator value, then set IFS to newline for splitting on newline IFS_BAK=$IFS # IFS='\n' # splits on all whitespace IFS=' ' # in the lines returned from the diff, test for archives or newline # http://stackoverflow.com/questions/229551/string-contains-in-bash for line in `diff -rq model-collect/$collection collect/$collection | grep -v ".svn"`; do # echo "LINE: $line" if [[ "$line" != *archives* && "$line" != *index* ]]; then # the file that is different is neither in index nor in archives, send this diffline to the report echo $line >> report.txt fi done IFS=$IFS_BAK IFS_BAK= # Now svn remove what's unique to model-collect and svn add what's been rebuilt in index and archives # see # http://stackoverflow.com/questions/7502261/delete-folder-content-and-remove-from-version-control if [ "$mode" == "svndelete" ]; then svn rm model-collect/$collection/archives/* svn rm model-collect/$collection/index/* fi # copy across the contents of the rebuilt model-collection's index and archives to the svn model-collect cp -r collect/$collection/archives/* model-collect/$collection/archives/. cp -r collect/$collection/index/* model-collect/$collection/index/. if [ "$mode" == "svndelete" ]; then svn add model-collect/$collection/archives/* svn add model-collect/$collection/index/* fi # To undo the changes made by svndelete, run the following manually # svn revert --depth infinity ] [-p ]" 1>&2; exit 1; } echo "*******************************************" echo "Usage: $0 [--svnupdate|--svndelete] [col1, col2, col3,...]"; echo "If no collections are provided, all collections will be processed."; echo "If neither svnupdate nor svndelete are provided, svnupdate is assumed."; echo "*******************************************" exit 1; } # The program starts here # process optional command line arguments # http://blog.onetechnical.com/2012/07/16/bash-getopt-versus-getopts/ # Execute getopt ARGS=$(getopt -o ud -l "svnupdate,svndelete" -n "$0" -- "$@"); #Bad arguments if [ $? -ne 0 ];then usage exit 1 fi eval set -- "$ARGS"; # mode can be svndelete or svnupdate mode= # -n: http://tldp.org/LDP/abs/html/testconstructs.html while true; do case "$1" in -d|--svndelete) shift; if [ "x$mode" == "xsvnupdate" ]; then echo echo "Can't use both svndelete and svnupdate" usage exit 1 else mode=svndelete fi ;; -u|--svnupdate) shift; if [ "x$mode" == "xsvndelete" ]; then echo echo "Can't use both svndelete and svnupdate" usage exit 1 else mode=svnupdate fi ;; --) shift; break; ;; esac done # If no mode provided (svndelete|svnupdate) as cmd line arg, then don't modify # the svn model-collect folder. We leave it at rebuilding its copy in collect #http://www.cyberciti.biz/faq/linux-unix-sleep-bash-scripting/ #if [ "x$mode" == "x" ]; then # echo # echo "***Mode svndelete or svnupdate not provided. Defaulting to svnupdate... 3s to change to your mind" # echo # sleep 3s # mode=svnupdate #fi # the remaining arguments to the script are assumed to be collections # debugging #for collection in "$@"; do # collection=collect/$collection # echo "Collection: $collection" #done # finished processing arguments # report will contain the output of the diff for if [ -f report.txt ]; then rm report.txt fi # Need pdfbox for the PDFBox tutorial if [ ! -e ext/pdf-box ]; then cd ext if [ ! -e ext/pdf-box-java.tar.gz ]; then wget http://trac.greenstone.org/export/head/gs2-extensions/pdf-box/trunk/pdf-box-java.tar.gz tar -xvzf pdf-box-java.tar.gz fi cd .. fi if test -e model-collect; then svn up model-collect else svn co http://svn.greenstone.org/other-projects/nightly-tasks/diffcol/trunk/model-collect fi # move the existing collect folder out of the way if [ -e collect ] && [ ! -e collect_orig ] ; then mv collect collect_orig fi # make a copy of the model-collect named as the new collect # and remove the copy's .svn folders if [ -e collect_orig ]; then if [ -e collect ]; then rm -rf collect fi cp -r model-collect collect fi #cd collect #find . -name ".svn" -type d -exec rm -rf {} \; #cd .. find collect -name ".svn" -type d -exec rm -rf {} \; #2>&1 > /dev/null # Set up the Greenstone environment for building source setup.bash # parse arguments # http://stackoverflow.com/questions/12711786/bash-convert-command-line-arguments-into-array # http://stackoverflow.com/questions/255898/how-to-iterate-over-arguments-in-bash-script if [ "$1" == "" ]; then # all_collections #for each collection, import, build, move building to index for collection in collect/*; do single_collection $collection; if [ "x$mode" != "x" ]; then update_single_collection $collection $mode; fi done else # Command-line args are a list of collections, # process each command-line arg, after confirming such a collection exists for collection in "$@"; do collection=collect/$collection if test -e $collection; then single_collection $collection; if [ "x$mode" != "x" ]; then update_single_collection $collection $mode; fi else echo "Can't find collection $collection. Skipping." fi done fi echo echo "*****************************************" # if we were svn updating/deleting collections, then mode was set # if in that case a report was generated with additional differences, point the user to it if [ -f report.txt ] && [ "x$mode" != "x" ]; then echo "Some files outside of archives and index folders were different. See report.txt" echo fi echo "The original collect directory has been left renamed as collect_orig" echo if [ "x$mode" == "x" ]; then echo "The model-collect folder has not been altered. Changes have only been made to collect" else echo "You still need to run svn status and svn commit on the model-collect folder" fi echo "*****************************************" echo