#!/bin/bash # PURPOSE # This is not a nightly script. You use it to regenerate the model-collections # if Greenstone has changed fundamentally, such as what HASH OIDs get assigned # to documents or something that changes the contents of the index and # archives folders. This has happened now with the commits # http://trac.greenstone.org/changeset/28022 and # http://trac.greenstone.org/changeset/28021 # These commits generate new stable HASH OIDs for the existing documents. # USAGE # Put this file in the toplevel of the Greenstone 2 binary/compiled SVN installation # that you want to generate the model collections with. # You can provide a list of collection names or none, in which case all the collections # are processed. # Pass in --svnupdate to copy across the contents of archives and index in the # rebuilt collection, overwriting their equivalents in the svn model collection, # but not removing any extraneous HASH folders already present. # !!!!! IMPORTANT: if you pass in svnupdate, it leaves you to do the final commit on # the (svn) model-collect folder! # Pass in --svndelete to remove the archives and index from svn in the model-collect # and replace this with the rebuilt archives and index # The --svndelete is useful for when the HASH directory naming has changed and everything # in archives and index has to be wiped out and moved back in from the rebuilt col. # Passing in --svndelete will do the final commits on the model-collect folder. # If neither flag is passed in, then the collections are rebuilt but the svn model-collect # is not updated and the repository is not updated. # Examples of usage: # ./gen-model-colls.sh # ./gen-model-colls.sh --svndelete # ./gen-model-colls.sh --svnupdate Tudor-Basic Tudor-Enhanced # The first just rebuilds all the collections in a new folder called collect and stops there # The second rebuilds all the collections in collect and svn removes the archives and the index # folders in model-collect. Then it copies across the rebuilt archives and index into model-collect # and svn adds them. # The third example checks out all the model-collections again, but rebuilds only the 2 collections # specified in the new collect folder. Then it copies across the *contents* of the archives and # index folders of those 2 collections into their model-collect equivalents. You then still have to # do the final svn commit on the model-collect folder after looking over the differences. # Also valid examples: # ./gen-model-colls.sh Tudor-Basic Tudor-Enhanced # ./gen-model-colls.sh --svndelete Tudor-Basic Tudor-Enhanced # ./gen-model-colls.sh --svnupdate # PSEUDOCODE # This script: # Checks out the model-collections folder from SVN # Makes a copy # In the copy: gets rid of their .svn folders, and builds each collection in turn, moving building to index once done # If --svndelete was passed in: svn removes model-collect/archives and model-collect/index, copies over collect/index # and collect/archives into model-collect and svn adds model-collect/archives and model-collect/index. Then SVN COMMITS # model-collect/archives and model-collect/index. # If --svnupdate was passed in: copies collect/archives/* into model-collect/archives/*, and copies collect/index/* # into model-collect/index/*, overwriting files that already existed but have now been updated upon rebuild. However, # --svnupdate will leave untouched any files and folders unique to model-collect. No SVN commit, that's LEFT UP TO YOU. # See earlier version of this script: # To svn remove what's unique to model-collect and svn add what's been rebuilt in index and archives # see http://stackoverflow.com/questions/7502261/delete-folder-content-and-remove-from-version-control # http://stackoverflow.com/questions/5044214/how-do-i-detect-and-or-delete-empty-subversion-directories # http://stackoverflow.com/questions/1301203/removing-svn-files-from-all-directories #*******************************GLOBAL VARIABLES*************************** # mode can be svndelete or svnupdate mode= debug_mode=0 commit_message= #*****************************FUNCTIONS***************************** # DON'T ADD ANY FURTHER ECHO STATEMENTS IN FUNCTION get_col_basename # "you have to be really careful on what you have in this function, as having any code which will eventually echo will mean that you get incorrect return string." # see http://stackoverflow.com/questions/3236871/how-to-return-a-string-value-from-a-bash-function function get_col_basename () { collection=$1 #escape the filename (in case of space) collection=`echo $collection | sed 's@ @\\\ @g'` #get just the basename collection=`basename $collection` # returning a string does not work in bash # see http://stackoverflow.com/questions/3236871/how-to-return-a-string-value-from-a-bash-function #return $collection echo $collection } # model-collect>svn -R propset svn:ignore -F .customignore . # where .customignore is a file containing: # log # earliestDatestamp # cache # model-collect>svn proplist -v # shows the svn properties, including the svn:ignore property. So it shows what files svn will ignore function svn_add_new_collection () { collection=$1 #escape the filename (in case of space) and get just the basename collection=$(get_col_basename $collection) if [ -e model-collect/$collection ];then echo "svn_add_new_collection: $collection already exists in model-collect, can't add it to svn." return fi # Using rsync to copy folders while excluding files/subfolders, BUT rsync is not available on lsb # http://www.linuxquestions.org/questions/linux-software-2/copy-svn-working-dir-without-svn-hidden-dirs-and-files-620586/ # See also http://www.thegeekstuff.com/2011/01/rsync-exclude-files-and-folders/, # section "Exclude multiple files and directories at the same time" (can also use a file to blacklist folders/files) # for GS3 we have a custom ignore file # if [ "x$GSDL3SRCHOME" != "x" ]; then # mkdir model-collect/$collection # svn add --force model-collect/$collection # svn propset -R svn:ignore -F model-collect/.customignore model-collect/$collection # # if [ "x$commit_message" == "x" ]; then # commit_message="Adding new model collections 1/2: new empty collection dir with svn-ignore set." # fi # if [ "$debug_mode" -eq "0" ]; then # svn commit -m "AUTOCOMMIT by gen-model-colls.sh script. Message: $commit_message" model-collect/$collection # fi # fi # need slash on end of src dir collect/$collection/ ! rsync -r --exclude=.svn/ --exclude=log/ --exclude=cache/ --exclude=earliestDatestamp --exclude=fail.log --exclude=collectionConfig.bak collect/$collection/ model-collect/$collection # find collect/$collection -name ".svn" -type d -exec rm -rf {} \; # cp -r collect/$collection model-collect/$collection # http://www.thegeekstuff.com/2010/06/bash-array-tutorial/ # ignorelist=('log' 'cache' 'archives/earliestDatestamp'); # for ignored in "${ignorelist[@]}"; do # if [ -f model-collect/$collection/$ignorelist ]; then # rm model-collect/$collection/$ignorelist # elif [ -d model-collect/$collection/$ignorelist ]; then # rm -rf model-collect/$collection/$ignorelist # fi # done svn add --force model-collect/$collection # http://stackoverflow.com/questions/15880249/subclipse-svn-first-commit-ignore-certain-directories # http://wolfram.kriesing.de/blog/index.php/2005/svnignore-and-svnkeywords # http://boblet.tumblr.com/post/35755799/setting-up-and-using-svn-ignore-with-subversion # http://www.petefreitag.com/item/662.cfm # http://svnbook.red-bean.com/en/1.7/svn.advanced.props.special.ignore.html # http://stackoverflow.com/questions/116074/how-to-ignore-a-directory-with-svn # Dr Bainbridge's way of doing an svn ignore is better and involves fewer steps: # create the empty collection folder (-p for subcollections), svn add it, # svn:ignore all the files to be ignored # copy the contents of the collection across, # do an svn add --force on the collection folder #mkdir -p model-collect/$collection #svn add model-collect/$collection #ignorelist=('log' 'cache' 'archives/earliestDatestamp'); #for ignored in "${ignorelist[@]}"; do # svn propset svn:ignore $ignorelist model-collect/$collection/. #done # cp -r collect/$collection/* model-collect/$collection/* #svn add --force model-collect/$collection } # Function that handles the --svndelete flag (mode) of this script for a single collection function svn_delete () { # svn remove archives and index in each collection # commit them all # copy over newly rebuilt archives and index into each model-collection # svn add the new archives and index folders of each collection # commit them all if [ "x$1" == "x" ]; then for collection in collect/*; do _del_col_archives_index $collection done else for collection in "$@"; do _del_col_archives_index $collection done fi # svn commit all the svn rm statements done above in one go: # don't do `svn up` at this point, as doing so will then retrieve all the folders that just were svn-removed if [ "x$commit_message" == "x" ]; then commit_message="Clean rebuild of model collections 1/2. Clearing out deprecated archives and index." fi # Numerical comparisons: http://tldp.org/LDP/abs/html/comparison-ops.html if [ "$debug_mode" -eq "0" ]; then svn commit -m "AUTOCOMMIT by gen-model-colls.sh script. Message: $commit_message" model-collect fi # Having svn committed the deletes, do an svn up to locally delete what was svn-removed above, # BEFORE copying from the rebuilt archives and index folders if [ "$debug_mode" -eq "0" ]; then svn up model-collect fi # copy from the rebuilt archives and index over into the svn model-collect and svn add them if [ "x$1" == "x" ]; then for collection in collect/*; do _add_col_archives_index $collection done else for collection in "$@"; do _add_col_archives_index $collection done fi # commit all the svn add statements done just above in one go if [ "x$commit_message" == "x" ]; then commit_message="Clean rebuild of model collections 2/2. Adding rebuilt archives and index." fi if [ "$debug_mode" -eq "0" ]; then svn commit -m "AUTOCOMMIT by gen-model-colls.sh script. Message: $commit_message" model-collect fi echo echo "*********************" echo "Done svn-deleting rebuilt model-collection: $collection" echo "*********************" echo } # To undo the changes made by svndelete, run the following manually # svn revert --depth infinity > report.txt fi done IFS=$IFS_BAK IFS_BAK= } # Function that takes care of the --svnupdate flag mode of this script for a single collection function update_single_collection () { collection=$1 #escape the filename (in case of space) and get just the basename collection=$(get_col_basename $collection) if [ ! -e model-collect/$collection ]; then echo "update_single_collection: $collection does not exist in model-collect" return; fi # copy across the contents of the rebuilt model-collection's index and archives to the svn model-collect rsync -r --exclude=.svn/ --exclude=cache/ --exclude=earliestDatestamp collect/$collection/archives/* model-collect/$collection/archives rsync -r --exclude=.svn/ --exclude=cache/ collect/$collection/index/* model-collect/$collection/index # now svn add any and all the NEW items in model-collect's archives and index # see http://stackoverflow.com/questions/1071857/how-do-i-svn-add-all-unversioned-files-to-svn # see also http://stackoverflow.com/questions/116074/how-to-ignore-a-directory-with-svn # if [ "$debug_mode" -eq "0" ]; then svn add --force model-collect/$collection/archives/* --auto-props --parents --depth infinity -q svn add --force model-collect/$collection/index/* --auto-props --parents --depth infinity -q # fi echo "svn model-collect update process complete. CHECK AND COMMIT THE model-collect FOLDER!" # if etc/collect.cfg is different, copy it across too? echo echo "*********************" echo "Done updating the rebuilt LOCAL model-collection: model-collect/$collection" echo "*********************" echo } # re-build a single collection in "collect" which is a copy of model-collect function build_single_collection () { collection=$1 collection=$(get_col_basename $collection) # GS2 or GS3 building if [ "x$GSDL3SRCHOME" == "x" ]; then import.pl -removeold $collection; buildcol.pl -removeold $collection; else import.pl -site localsite -removeold $collection buildcol.pl -site localsite -removeold $collection fi rm -rf collect/$collection/index mv collect/$collection/building collect/$collection/index echo echo "*********************" echo "Done rebuilding model collection: $collection" echo "*********************" echo } # http://stackoverflow.com/questions/16483119/example-of-how-to-use-getopt-in-bash function usage() { # usage() { echo "Usage: $0 [-s <45|90>] [-p ]" 1>&2; exit 1; } echo "*******************************************" echo "Usage: $0 [--svnupdate|--svndelete|--svnaddnew] [--debug] [--message 'custom commit message'] [col1, col2, col3,...]"; echo "If no collections are provided, all collections will be processed."; echo "If neither svnupdate nor svndelete are provided, svnupdate is assumed."; echo "*******************************************" exit 1; } #*******************************MAIN PROGRAM*************************** # process optional command line arguments # http://blog.onetechnical.com/2012/07/16/bash-getopt-versus-getopts/ # Execute getopt ARGS=$(getopt -o m:uxadh -l "message:,svnupdate,svndelete,svnaddnew,debug,help" -n "$0" -- "$@"); #Bad arguments if [ $? -ne 0 ];then usage exit 1 fi eval set -- "$ARGS"; # -n: http://tldp.org/LDP/abs/html/testconstructs.html while true; do case "$1" in -h|--help) shift; usage exit 0 ;; -a|--svnaddnew) shift; if [ "x$mode" == "xsvnupdate" ] || [ "x$mode" == "xsvndelete" ]; then echo echo "Can't use both svnaddnew and svndelete/svnupdate" usage exit 1 else mode=svnaddnew # echo "svnaddnew not yet implemented" # exit 0 fi ;; -x|--svndelete) shift; if [ "x$mode" == "xsvnupdate" ] || [ "x$mode" == "xsvnaddnew" ]; then echo echo "Can't use both svndelete and svnupdate/svnaddnew" usage exit 1 else mode=svndelete fi ;; -u|--svnupdate) shift; if [ "x$mode" == "xsvndelete" ] || [ "x$mode" == "xsvnaddnew" ]; then echo echo "Can't use both svnupdate and svndelete/svnaddnew" usage exit 1 else mode=svnupdate fi ;; -d|--debug) shift; debug_mode=1 ;; -m|--message) shift; if [ -n "$1" ]; then commit_message=$1 shift; fi ;; --) shift; break; ;; esac done #echo "commit message: $commit_message" #echo "Debug mode is: $debug_mode" #exit # Set up the Greenstone environment, this is mainly for building, but also for locating a # Greenstone installation folder, in case this script doesn't live in one. # Then cd into the collect folder's parent for the Greenstone installation. # Test for GS3 home env then for GS2 home and if found, cd into the GS2/GS3 home location and # run setup, else try to find setup.bash/gs3-setup.bash in the current location and run it. # Else print a warning message saying that GSDLHOME is not set. if [ "$GSDL3SRCHOME" != "" ]; then echo "cd-ing into Greenstone 3 home directory: $GSDL3SRCHOME" cd "$GSDL3SRCHOME/web/sites/localsite" elif [ "$GSDLHOME" != "" ]; then echo "cd-ing into Greenstone home directory: $GSDLHOME" cd "$GSDLHOME" else if [ -e gs3-setup.sh ]; then source ./gs3-setup.sh cd "$GSDL3SRCHOME/web/sites/localsite" elif [ -e setup.bash ]; then source ./setup.bash else echo "No Greenstone Home set and no setup script found in current folder." echo "You need to source the setup script in a Greenstone installation. Exiting." exit -1 fi fi # If no mode provided (svndelete|svnupdate) as cmd line arg, then don't modify # the svn model-collect folder. Then this script stops after rebuilding the model-copy in collect # the remaining arguments to the script are assumed to be collections # debugging #for collection in "$@"; do # collection=collect/$collection # echo "Collection: $collection" #done # finished processing arguments # report will contain the output of the diff for if [ -f report.txt ]; then rm report.txt fi # Need the pdfbox extension for the PDFBox tutorial # The PDFBox ext has to be placed in the *GSDLHOME*/ext, # also in GS3's case where GSDLHOME is GS3/gs2build/ext # Go into ext and at end popd to get back into the collect folder's parent for the GS installation if [ ! -e "$GSDLHOME/ext/pdf-box" ]; then pushd "$GSDLHOME/ext" if [ ! -e "$GSDLHOME/ext/pdf-box-java.tar.gz" ]; then wget http://trac.greenstone.org/export/head/gs2-extensions/pdf-box/trunk/pdf-box-java.tar.gz fi tar -xvzf pdf-box-java.tar.gz popd fi # move the existing collect folder out of the way # unless we are adding a new collection to svn, in which case, we'll grab them from whatever collect folder exists if [ "x$mode" != "xsvnaddnew" ] && [ -e collect ] && [ ! -e collect_orig ]; then mv collect collect_orig fi # get model-collect from svn # if we already have it, svn update the entire model-collect folder if processing all collections # or svn update just any collections specified in the model-collect folder if test -e model-collect; then if [ "$1" == "" ]; then svn up model-collect else for collection in "$@"; do svn up model-collect/$collection done fi else if [ "$GSDL3SRCHOME" != "" ]; then svn co http://svn.greenstone.org/other-projects/nightly-tasks/diffcol/trunk/gs3-model-collect model-collect else svn co http://svn.greenstone.org/other-projects/nightly-tasks/diffcol/trunk/model-collect fi fi # Not using rsync to copy folders while excluding files/subfolders, since rsync is not available on lsb # http://www.linuxquestions.org/questions/linux-software-2/copy-svn-working-dir-without-svn-hidden-dirs-and-files-620586/ # rsync -r --exclude=.svn/ model-collect/ collect # Make a copy of the model-collect named as the new collect # (or if collections are specified in the cmdline arguments, copy just these over from model-collect into collect) # Then remove the copy's .svn folders if [ "x$mode" != "xsvnaddnew" ] && [ -e collect_orig ]; then echo "***********************************************" echo "Creating a copy of the model-collect folder as folder collect and removing the .svn subfolders from the copy:" echo if [ ! -e collect ]; then cp -r model-collect collect find collect -name ".svn" -type d -exec rm -rf {} \; #2>&1 > /dev/null else if [ "$1" == "" ]; then rm -rf collect cp -r model-collect collect find collect -name ".svn" -type d -exec rm -rf {} \; else for collection in "$@"; do if [ -e collect/$collection ]; then rm -rf collect/$collection fi cp -r model-collect/$collection collect/$collection find collect/$collection -name ".svn" -type d -exec rm -rf {} \; done fi fi echo "***********************************************" fi # parse arguments # http://stackoverflow.com/questions/12711786/bash-convert-command-line-arguments-into-array # http://stackoverflow.com/questions/255898/how-to-iterate-over-arguments-in-bash-script if [ "$1" == "" ]; then # all_collections #for each collection, import, build, move building to index for collection in collect/*; do build_single_collection $collection; if [ "x$mode" != "x" ]; then #svn_process_single_collection $collection if [ "x$mode" == "xsvnupdate" ]; then update_single_collection $collection elif [ "x$mode" == "xsvnaddnew" ]; then svn_add_new_collection $collection fi fi done # having rebuilt all the collections, just the processing for svndelete remains: if [ "x$mode" == "xsvndelete" ]; then svn_delete fi else # Command-line args are a list of collections, # process each command-line arg, after confirming such a collection exists for collection in "$@"; do collection=collect/$collection if test -e $collection; then build_single_collection $collection; if [ "x$mode" != "x" ]; then #svn_process_single_collection $collection if [ "x$mode" == "xsvnupdate" ]; then update_single_collection $collection elif [ "x$mode" == "xsvnaddnew" ]; then svn_add_new_collection $collection fi fi else echo echo "Can't find collection $collection. Skipping." echo fi done # having rebuilt the specified collections above, just the processing for svndelete remains if [ "x$mode" == "xsvndelete" ]; then svn_delete $@ fi fi echo echo "*****************************************" echo # NO LONGER NECESSARY: WE'RE DOING A DIFF BETWEEN collect AND model-collect AT THIS SCRIPT'S END # if we were svn updating/deleting collections, then mode was set # if in that case a report was generated with additional differences, point the user to it #if [ -f report.txt ] && [ "x$mode" != "x" ]; then # echo "Some files or folders outside of archives and index directories were different. See report.txt" # echo #fi # if not svnupdating or svndeleting, then inform the user that model-collect is unchanged # if svnupdating, then warn the user that model-collect still needs committing # if svndeleting, then inform the user that model-collect has been changed and committed if [ "x$mode" == "x" ]; then echo "* The model-collect folder has not been altered. Changes have only been made to collect" elif [ "x$mode" == "xsvnupdate" ]; then echo "* TO DO: You still need to run svn status and svn commit on the model-collect folder. Besides that:" elif [ "x$mode" == "xsvndelete" ]; then echo "* The model-collect folder's archives and index subfolders have been updated and committed to svn." elif [ "x$mode" == "xsvnaddnew" ]; then echo "* The new collection(s) have been built, copied to model-collect and added to svn." echo "* TO DO: You still need to run svn status and svn commit on the model-collect folder. Besides that:" fi echo if [ "x$mode" != "x" ]; then echo "* DIFFERENCES REMAINING BETWEEN model-collect AND collect (skipping .svn folders):" echo if [ "$1" == "" ]; then echo "---START DIFF---" diff -rq model-collect collect | grep -v ".svn" else for collection in "$@"; do echo "--COLLECTION: $collection" diff -rq model-collect/$collection collect/$collection | grep -v ".svn" echo "--" done fi echo "---END DIFF---" echo fi if [ -e collect_orig ]; then echo "* The original collect directory has been left renamed as collect_orig" echo fi if [ "$debug_mode" -eq "1" ]; then echo "* This script was run in DEBUG MODE, nothing has been changed in svn" fi echo echo "*****************************************" echo # deletes empty dirs # find collect/$collection/archives/HASH* -type d -empty -delete # find collect/$collection/index/assoc/HASH* -type d -empty -delete # To recursively delete all empty dirs in the copy of model-collect (since the dirs will not have .svn folders in them anymore) # http://www.commandlinefu.com/commands/view/5131/recursively-remove-all-empty-directories #find collect -type d -empty -delete # The following when put in a separate script file will delete all folders from model-collect that are # empty in the copied collection (all folders which contain only a .svn subfolder in model-collect) # --------------------------------------------- #!/bin/bash #for collection in collect/*; do #escape the filename (in case of space) # collection=`echo $collection | sed 's@ @\\\ @g'` #get just the basename # collection=`basename $collection` # HASH dirs that are empty in local collect's archives and index/assoc, # need to be removed from the svn in model-collect # for line in `find collect/$collection/archives/HASH* -type d -empty`; do # modelline="model-$line" # echo "LINE: $modelline" # remove from svn of model collect # svn rm $modelline ## rm -rf $modelline # remove physically from local collect # rm -rf $line # done # for line in `find collect/$collection/index/assoc/HASH* -type d -empty`; do # modelline="model-$line" # echo "LINE: $modelline" # remove from svn of model collect # svn rm $modelline ## rm -rf $modelline # remove physically from local collect # rm -rf $line # done #done # ---------------------------------------------