root/other-projects/nightly-tasks/diffcol/trunk/gen-model-colls.sh @ 28070

Revision 28070, 21.6 KB (checked in by ak19, 7 years ago)

No support for adding a new collection yet. At present the script is only concerned with rebuilding collections already added to SVN

Line 
1#!/bin/bash
2
3# PURPOSE
4# This is not a nightly script. You use it to regenerate the model-collections
5# if Greenstone has changed fundamentally, such as what HASH OIDs get assigned
6# to documents or something that changes the contents of the index and
7# archives folders. This has happened now with the commits
8# http://trac.greenstone.org/changeset/28022 and
9# http://trac.greenstone.org/changeset/28021
10# These commits generate new stable HASH OIDs for the existing documents.
11
12
13# USAGE
14# Put this file in the toplevel of the Greenstone 2 binary/compiled SVN installation
15# that you want to generate the model collections with.
16# You can provide a list of collection names or none, in which case all the collections
17# are processed.
18
19# Pass in --svnupdate to copy across the contents of archives and index in the
20# rebuilt collection, overwriting their equivalents in the svn model collection,
21# but not removing any extraneous HASH folders already present.
22# !!!!! IMPORTANT: if you pass in svnupdate, it leaves you to do the final commit on
23# the (svn) model-collect folder!
24
25# Pass in --svndelete to remove the archives and index from svn in the model-collect
26# and replace this with the rebuilt archives and index
27# The --svndelete is useful for when the HASH directory naming has changed and everything
28# in archives and index has to be wiped out and moved back in from the rebuilt col.
29# Passing in --svndelete will do the final commits on the model-collect folder.
30
31# If neither flag is passed in, then the collections are rebuilt but the svn model-collect
32# is not updated and the repository is not updated.
33
34# Examples of usage:
35# ./gen-model-colls.sh
36# ./gen-model-colls.sh --svndelete
37# ./gen-model-colls.sh --svnupdate Tudor-Basic Tudor-Enhanced
38
39# The first just rebuilds all the collections in a new folder called collect and stops there
40
41# The second rebuilds all the collections in collect and svn removes the archives and the index
42# folders in model-collect. Then it copies across the rebuilt archives and index into model-collect
43# and svn adds them.
44
45# The third example checks out all the model-collections again, but rebuilds only the 2 collections
46# specified in the new collect folder. Then it copies across the *contents* of the archives and
47# index folders of those 2 collections into their model-collect equivalents. You then still have to
48# do the final svn commit on the model-collect folder after looking over the differences.
49
50# Also valid examples:
51# ./gen-model-colls.sh Tudor-Basic Tudor-Enhanced
52# ./gen-model-colls.sh --svndelete Tudor-Basic Tudor-Enhanced
53# ./gen-model-colls.sh --svnupdate
54
55# PSEUDOCODE
56# This script:
57# Checks out the model-collections folder from SVN
58# Makes a copy
59# In the copy: gets rid of their .svn folders, and builds each collection in turn, moving building to index once done
60# If --svndelete was passed in: svn removes model-collect/archives and model-collect/index, copies over collect/index
61# and collect/archives into model-collect and svn adds model-collect/archives and model-collect/index. Then SVN COMMITS
62# model-collect/archives and model-collect/index.
63# If --svnupdate was passed in: copies collect/archives/* into model-collect/archives/*, and copies collect/index/*
64# into model-collect/index/*, overwriting files that already existed but have now been updated upon rebuild. However,
65# --svnupdate will leave untouched any files and folders unique to model-collect. No SVN commit, that's LEFT UP TO YOU.
66
67# See earlier version of this script:
68# To svn remove what's unique to model-collect and svn add what's been rebuilt in index and archives
69# see http://stackoverflow.com/questions/7502261/delete-folder-content-and-remove-from-version-control
70
71# http://stackoverflow.com/questions/5044214/how-do-i-detect-and-or-delete-empty-subversion-directories
72# http://stackoverflow.com/questions/1301203/removing-svn-files-from-all-directories
73
74#*******************************GLOBAL VARIABLES***************************
75
76# mode can be svndelete or svnupdate
77mode=
78debug_mode=0
79commit_message=
80
81#*****************************FUNCTIONS*****************************
82
83# DON'T ADD ANY FURTHER ECHO STATEMENTS IN FUNCTION get_col_basename
84# "you have to be really careful on what you have in this function, as having any code which will eventually echo will mean that you get incorrect return string."
85# see http://stackoverflow.com/questions/3236871/how-to-return-a-string-value-from-a-bash-function
86function get_col_basename () {
87    collection=$1
88
89    #escape the filename (in case of space)
90    collection=`echo $collection | sed 's@ @\\\ @g'`
91   
92    #get just the basename
93    collection=`basename $collection`
94
95    # returning a string does not work in bash
96    # see http://stackoverflow.com/questions/3236871/how-to-return-a-string-value-from-a-bash-function   
97
98    #return $collection
99    echo $collection
100}
101
102
103# Function that handles the --svndelete flag (mode) of this script for a single collection
104function svn_delete () {
105
106    # svn remove archives and index in each collection
107    # commit them all
108    # copy over newly rebuilt archives and index into each model-collection
109    # svn add the new archives and index folders of each collection
110    # commit them all
111   
112   
113    if [ "x$1" == "x" ]; then
114    for collection in collect/*; do     
115        _del_col_archives_index $collection
116    done
117    else
118    for collection in "$@"; do
119        _del_col_archives_index $collection
120    done
121    fi
122
123    # svn commit all the svn rm statements done above in one go:
124    # don't do `svn up` at this point, as doing so will then retrieve all the folders that just were svn-removed
125
126    if [ "x$commit_message" == "x" ]; then
127    commit_message="Clean rebuild of model collections 1/2. Clearing out deprecated archives and index."
128    fi
129
130    # Numerical comparisons: http://tldp.org/LDP/abs/html/comparison-ops.html
131    if [ "$debug_mode" -eq "0" ]; then
132    svn commit -m "AUTOCOMMIT by gen-model-colls.sh script. Message: $commit_message" model-collect
133    fi
134
135    # Having svn committed the deletes, do an svn up to locally delete what was svn-removed above,
136    # BEFORE copying from the rebuilt archives and index folders
137    if [ "$debug_mode" -eq "0" ]; then
138    svn up model-collect   
139    fi
140
141    # copy from the rebuilt archives and index over into the svn model-collect and svn add them
142    if [ "x$1" == "x" ]; then
143    for collection in collect/*; do     
144        _add_col_archives_index $collection
145    done
146    else
147    for collection in "$@"; do
148        _add_col_archives_index $collection
149    done
150    fi
151
152    # commit all the svn add statements done just above in one go
153    if [ "x$commit_message" == "x" ]; then
154    commit_message="Clean rebuild of model collections 2/2. Adding rebuilt archives and index."
155    fi
156
157    if [ "$debug_mode" -eq "0" ]; then
158    svn commit -m "AUTOCOMMIT by gen-model-colls.sh script. Message: $commit_message" model-collect
159    fi
160
161    echo
162    echo "*********************"
163    echo "Done svn-deleting rebuilt model-collection: $collection"
164    echo "*********************"
165    echo
166}
167
168# To undo the changes made by svndelete, run the following manually
169# svn revert --depth infinity <model-collect/$collection/archives/*
170# svn revert --depth infinity <model-collect/$collection/archives/*
171# then remove both the local archives and index, and do an svn up to get original checkout back
172
173# svn delete this collection's archives and index folders
174# (The commit will be done when in one step for all collections on which this function was called)
175function _del_col_archives_index () {
176    collection=$1
177
178    #escape the filename (in case of space) and get just the basename
179    collection=$(get_col_basename $collection)
180
181    if [ ! -e model-collect/$collection ]; then
182    echo "del_col_archives_index: $collection does not exist in model-collect"
183    return;
184    fi
185
186    # remove the entire archives and index folders from svn
187    if [ "$debug_mode" -eq "0" ]; then
188    svn rm model-collect/$collection/archives
189    svn rm model-collect/$collection/index
190    elif [ "$debug_mode" -eq "1" ]; then
191    rm -rf model-collect/$collection/archives
192    rm -rf model-collect/$collection/index
193    fi
194
195}
196
197
198# copy and then svn add the collection's archives and index folders
199function _add_col_archives_index () {
200    collection=$1
201
202    #escape the filename (in case of space) and get just the basename
203    collection=$(get_col_basename $collection)
204
205    if [ ! -e model-collect/$collection ]; then
206    echo "add_col_archives_index: $collection does not exist in model-collect"
207    return;
208    fi
209
210    # remove the entire archives and index folders from svn
211    cp -r collect/$collection/archives model-collect/$collection/.
212    cp -r collect/$collection/index model-collect/$collection/.
213
214    if [ "$debug_mode" -eq "0" ]; then
215    svn add model-collect/$collection/archives
216    svn add model-collect/$collection/index
217    fi
218}
219
220
221# UNUSED, but useful for spotting differences between the collect and model-collect
222# after rebuild, before svn updating/deleting, as opposed to at the end of the script
223function svn_process_single_collection () {
224    collection=$1
225   
226    #escape the filename (in case of space) and get just the basename
227    collection=$(get_col_basename $collection)
228
229    if [ ! -e model-collect/$collection ]; then
230    echo "svn_process_single_collection: $collection does not exist in model-collect"
231    return;
232    fi
233
234# return here if just deleting empty dirs
235#return
236
237    # diff the svn model and rebuilt model collections
238    diff_result=`diff -rq model-collect/$collection collect/$collection | grep -v ".svn"`
239#    echo "Diff result for collection $collection: $diff_result"
240
241    # if no differences in the current collection, then we're done
242    if [ "x$diff_result" == "x" ]; then
243    echo "No differences in collection $collection"
244    return;
245    fi
246
247    # check that none of the lines mention files outside the archives or index folders
248    # http://en.gibney.org/tell_the_bash_to_split_by_newline_charac
249    # http://forums.gentoo.org/viewtopic-p-3130541.html
250
251    # http://wi-fizzle.com/article/276
252    # http://stackoverflow.com/questions/918886/how-do-i-split-a-string-on-a-delimiter-in-bash
253    # http://www.linuxquestions.org/questions/programming-9/split-a-string-on-newlines-bash-313206/
254    # http://unix.stackexchange.com/questions/39473/command-substitution-splitting-on-newline-but-not-space
255
256    # store backup of Internal Field Separator value, then set IFS to newline for splitting on newline
257
258    IFS_BAK=$IFS
259#    IFS='\n' # splits on all whitespace
260IFS='
261'
262    # in the lines returned from the diff, test for archives or newline
263    # http://stackoverflow.com/questions/229551/string-contains-in-bash
264    for line in `diff -rq model-collect/$collection collect/$collection | grep -v ".svn"`; do
265    # echo "LINE: $line"   
266    if [[ "$line" != *archives* && "$line" != *index* ]]; then
267        # the file that is different is neither in index nor in archives, send this diffline to the report
268        echo $line >> report.txt
269    fi
270    done
271 
272    IFS=$IFS_BAK
273    IFS_BAK=
274}
275
276# Function that takes care of the --svnupdate flag mode of this script for a single collection
277function update_single_collection () {
278    collection=$1
279   
280    #escape the filename (in case of space) and get just the basename
281    collection=$(get_col_basename $collection)
282
283    if [ ! -e model-collect/$collection ]; then
284    echo "update_single_collection: $collection does not exist in model-collect"
285    return;
286    fi
287
288    # copy across the contents of the rebuilt model-collection's index and archives to the svn model-collect
289    cp -r collect/$collection/archives/* model-collect/$collection/archives/.
290    cp -r collect/$collection/index/* model-collect/$collection/index/.
291
292    # now svn add any and all the NEW items in model-collect's archives and index
293    # see http://stackoverflow.com/questions/1071857/how-do-i-svn-add-all-unversioned-files-to-svn
294#    if [ "$debug_mode" -eq "0" ]; then
295    svn add --force model-collect/$collection/archives/* --auto-props --parents --depth infinity -q
296    svn add --force model-collect/$collection/index/* --auto-props --parents --depth infinity -q
297#    fi
298
299    echo "svn model-collect update process complete. CHECK AND COMMIT THE model-collect FOLDER!"
300
301    # if etc/collect.cfg is different, copy it across too?
302
303    echo
304    echo "*********************"
305    echo "Done updating the rebuilt LOCAL model-collection: model-collect/$collection"
306    echo "*********************"
307    echo
308}
309
310
311# re-build a single collection in "collect" which is a copy of model-collect
312function build_single_collection () {
313    collection=$1
314   
315    collection=$(get_col_basename $collection)
316   
317    import.pl -removeold $collection
318    buildcol.pl -removeold $collection
319    rm -rf collect/$collection/index
320    mv collect/$collection/building collect/$collection/index
321   
322    echo
323    echo "*********************"
324    echo "Done rebuilding model collection: $collection"
325    echo "*********************"
326    echo
327}
328
329
330# http://stackoverflow.com/questions/16483119/example-of-how-to-use-getopt-in-bash
331function usage() {
332# usage() { echo "Usage: $0 [-s <45|90>] [-p <string>]" 1>&2; exit 1; }
333
334    echo "*******************************************"
335    echo "Usage: $0 [--svnupdate|--svndelete] [col1, col2, col3,...]";
336    echo "If no collections are provided, all collections will be processed.";
337    echo "If neither svnupdate nor svndelete are provided, svnupdate is assumed.";
338    echo "*******************************************"
339    exit 1;
340}
341
342
343#*******************************MAIN PROGRAM***************************
344
345# process optional command line arguments
346# http://blog.onetechnical.com/2012/07/16/bash-getopt-versus-getopts/
347# Execute getopt
348ARGS=$(getopt -o m:uxd -l "message:,svnupdate,svndelete,debug" -n "$0" -- "$@");
349
350#Bad arguments
351if [ $? -ne 0 ];then
352    usage
353    exit 1
354fi
355
356eval set -- "$ARGS";
357
358
359# -n: http://tldp.org/LDP/abs/html/testconstructs.html
360while true; do
361  case "$1" in
362    -x|--svndelete)
363      shift;
364      if [ "x$mode" == "xsvnupdate" ]; then
365      echo
366      echo "Can't use both svndelete and svnupdate"
367      usage
368      exit 1
369      else
370      mode=svndelete
371      fi
372      ;;
373    -u|--svnupdate)
374      shift;
375      if [ "x$mode" == "xsvndelete" ]; then
376      echo
377      echo "Can't use both svndelete and svnupdate"
378      usage
379      exit 1
380      else
381      mode=svnupdate
382      fi
383      ;;
384    -d|--debug)
385      shift;
386      debug_mode=1
387      ;;
388    -m|--message)
389      shift;
390      if [ -n "$1" ]; then
391      commit_message=$1
392          shift;
393      fi
394      ;;
395    --)
396      shift;
397      break;
398      ;;
399  esac
400done
401
402#echo "commit message: $commit_message"
403#echo "Debug mode is: $debug_mode"
404#exit
405
406
407# If no mode provided (svndelete|svnupdate) as cmd line arg, then don't modify
408# the svn model-collect folder. Then this script stops after rebuilding the model-copy in collect
409
410# the remaining arguments to the script are assumed to be collections
411
412# debugging
413#for collection in "$@"; do
414#    collection=collect/$collection
415#    echo "Collection: $collection"
416#done
417
418# finished processing arguments
419
420
421# report will contain the output of the diff for
422if [ -f report.txt ]; then
423    rm report.txt
424fi
425
426# Need pdfbox for the PDFBox tutorial
427if [ ! -e ext/pdf-box ]; then
428    cd ext
429    if [ ! -e ext/pdf-box-java.tar.gz ]; then
430    wget http://trac.greenstone.org/export/head/gs2-extensions/pdf-box/trunk/pdf-box-java.tar.gz
431    tar -xvzf pdf-box-java.tar.gz
432    fi
433    cd ..
434fi
435
436
437# move the existing collect folder out of the way
438if [ -e collect ] && [ ! -e collect_orig ] ; then
439    mv collect collect_orig
440fi
441
442
443# get model-collect from svn
444# if we already have it, svn update the entire model-collect folder if processing all collections
445# or svn update just any collections specified in the model-collect folder
446if test -e model-collect; then
447    if [ "$1" == "" ]; then
448    svn up model-collect
449    else
450    for collection in "$@"; do
451        svn up model-collect/$collection
452    done
453    fi
454else
455    svn co http://svn.greenstone.org/other-projects/nightly-tasks/diffcol/trunk/model-collect
456fi
457
458# Make a copy of the model-collect named as the new collect
459# (or if collections are specified in the cmdline arguments, copy just these over from model-collect into collect)
460# Then remove the copy's .svn folders
461echo "***********************************************"
462echo "Creating a copy of the model-collect folder as folder collect and removing the .svn subfolders from the copy:"
463echo
464if [ -e collect_orig ]; then
465    if [ ! -e collect ]; then
466    cp -r model-collect collect
467    find collect -name ".svn" -type d -exec rm -rf {} \; #2>&1 > /dev/null
468    else
469    if [ "$1" == "" ]; then
470        rm -rf collect
471        cp -r model-collect collect
472        find collect -name ".svn" -type d -exec rm -rf {} \;
473    else
474        for collection in "$@"; do
475        if [ -e collect/$collection ]; then
476            rm -rf collect/$collection
477        fi
478        cp -r model-collect/$collection collect/$collection
479        find collect/$collection -name ".svn" -type d -exec rm -rf {} \;
480        done
481    fi
482    fi
483fi
484echo "***********************************************"
485
486# Set up the Greenstone environment for building
487source setup.bash
488
489# parse arguments
490# http://stackoverflow.com/questions/12711786/bash-convert-command-line-arguments-into-array
491# http://stackoverflow.com/questions/255898/how-to-iterate-over-arguments-in-bash-script
492
493if [ "$1" == "" ]; then
494
495    # all_collections   
496    #for each collection, import, build, move building to index
497    for collection in collect/*; do
498    build_single_collection $collection;
499
500    if [ "x$mode" != "x" ]; then
501        #svn_process_single_collection $collection
502
503        if [ "x$mode" == "xsvnupdate" ]; then
504        update_single_collection $collection
505        fi
506    fi
507    done
508
509    # having rebuilt all the collections, just the processing for svndelete/update remains:
510    if [ "x$mode" == "xsvndelete" ]; then
511    svn_delete
512    fi
513
514else
515    # Command-line args are a list of collections,
516    # process each command-line arg, after confirming such a collection exists
517
518    for collection in "$@"; do
519    collection=collect/$collection
520    if test -e $collection; then   
521        build_single_collection $collection;
522
523        if [ "x$mode" != "x" ]; then
524        #svn_process_single_collection $collection
525
526        if [ "x$mode" == "xsvnupdate" ]; then
527            update_single_collection $collection
528        fi
529        fi
530    else
531        echo "Can't find collection $collection. Skipping."
532    fi
533    done
534
535    # having rebuilt the specified collections above, just the processing for svndelete/update remains
536    if [ "x$mode" == "xsvndelete" ]; then
537    svn_delete $@
538    fi
539
540fi
541
542
543echo
544echo "*****************************************"
545echo
546# NO LONGER NECESSARY: WE'RE DOING A DIFF BETWEEN collect AND model-collect AT THIS SCRIPT'S END
547# if we were svn updating/deleting collections, then mode was set
548# if in that case a report was generated with additional differences, point the user to it
549#if [ -f report.txt ] && [ "x$mode" != "x" ]; then
550#    echo "Some files or folders outside of archives and index directories were different. See report.txt"
551#    echo
552#fi
553
554# if not svnupdating or svndeleting, then inform the user that model-collect is unchanged
555# if svnupdating, then warn the user that model-collect still needs committing
556# if svndeleting, then inform the user that model-collect has been changed and committed
557if [ "x$mode" == "x" ]; then
558    echo "* The model-collect folder has not been altered. Changes have only been made to collect"
559elif [ "x$mode" == "xsvnupdate" ]; then
560    echo "* TO DO: You still need to run svn status and then svn commit on the model-collect folder. Besides that:"   
561elif [ "x$mode" == "xsvndelete" ]; then
562    echo "* The model-collect folder's archives and index subfolders have been updated and committed to svn."
563fi
564echo
565
566if [ "x$mode" != "x" ]; then
567    echo "* DIFFERENCES REMAINING BETWEEN model-collect AND collect (skipping .svn folders):"
568    echo
569    if [ "$1" == "" ]; then
570    echo "---START DIFF---"
571    diff -rq model-collect collect | grep -v ".svn"
572    else
573    for collection in "$@"; do
574        echo "--COLLECTION: $collection"
575        diff -rq model-collect/$collection collect/$collection | grep -v ".svn"
576        echo "--"
577    done
578    fi
579    echo "---END DIFF---"
580    echo
581fi
582
583echo "* The original collect directory has been left renamed as collect_orig"
584echo
585
586if [ "$debug_mode" -eq "1" ]; then
587    echo "* This script was run in DEBUG MODE, nothing has been changed in svn"
588fi
589echo
590echo "*****************************************"
591echo
592
593
594# deletes empty dirs
595#       find collect/$collection/archives/HASH* -type d -empty -delete
596#       find collect/$collection/index/assoc/HASH* -type d -empty -delete
597
598# To recursively delete all empty dirs in the copy of model-collect (since the dirs will not have .svn folders in them anymore)
599# http://www.commandlinefu.com/commands/view/5131/recursively-remove-all-empty-directories
600#find collect -type d -empty -delete
601
602# The following when put in a separate script file will delete all folders from model-collect that are
603# empty in the copied collection (all folders which contain only a .svn subfolder in model-collect)
604# ---------------------------------------------
605#!/bin/bash
606
607#for collection in collect/*; do
608    #escape the filename (in case of space)
609#    collection=`echo $collection | sed 's@ @\\\ @g'`
610   
611    #get just the basename
612#    collection=`basename $collection`
613
614    # HASH dirs that are empty in local collect's archives and index/assoc,
615    # need to be removed from the svn in model-collect
616
617#    for line in `find collect/$collection/archives/HASH* -type d -empty`; do
618#   modelline="model-$line"
619#   echo "LINE: $modelline"
620   
621    # remove from svn of model collect
622#   svn rm $modelline
623##  rm -rf $modelline
624    # remove physically from local collect
625#   rm -rf $line
626#    done
627
628#    for line in `find collect/$collection/index/assoc/HASH* -type d -empty`; do
629#   modelline="model-$line"
630#   echo "LINE: $modelline"
631   
632    # remove from svn of model collect
633#   svn rm $modelline
634##  rm -rf $modelline
635    # remove physically from local collect
636#   rm -rf $line
637#    done
638
639#done
640# ---------------------------------------------
Note: See TracBrowser for help on using the browser.