root/other-projects/nightly-tasks/diffcol/trunk/gen-model-colls.sh @ 28073

Revision 28073, 21.7 KB (checked in by ak19, 7 years ago)

Forgot to add the help cmdline option

Line 
1#!/bin/bash
2
3# PURPOSE
4# This is not a nightly script. You use it to regenerate the model-collections
5# if Greenstone has changed fundamentally, such as what HASH OIDs get assigned
6# to documents or something that changes the contents of the index and
7# archives folders. This has happened now with the commits
8# http://trac.greenstone.org/changeset/28022 and
9# http://trac.greenstone.org/changeset/28021
10# These commits generate new stable HASH OIDs for the existing documents.
11
12
13# USAGE
14# Put this file in the toplevel of the Greenstone 2 binary/compiled SVN installation
15# that you want to generate the model collections with.
16# You can provide a list of collection names or none, in which case all the collections
17# are processed.
18
19# Pass in --svnupdate to copy across the contents of archives and index in the
20# rebuilt collection, overwriting their equivalents in the svn model collection,
21# but not removing any extraneous HASH folders already present.
22# !!!!! IMPORTANT: if you pass in svnupdate, it leaves you to do the final commit on
23# the (svn) model-collect folder!
24
25# Pass in --svndelete to remove the archives and index from svn in the model-collect
26# and replace this with the rebuilt archives and index
27# The --svndelete is useful for when the HASH directory naming has changed and everything
28# in archives and index has to be wiped out and moved back in from the rebuilt col.
29# Passing in --svndelete will do the final commits on the model-collect folder.
30
31# If neither flag is passed in, then the collections are rebuilt but the svn model-collect
32# is not updated and the repository is not updated.
33
34# Examples of usage:
35# ./gen-model-colls.sh
36# ./gen-model-colls.sh --svndelete
37# ./gen-model-colls.sh --svnupdate Tudor-Basic Tudor-Enhanced
38
39# The first just rebuilds all the collections in a new folder called collect and stops there
40
41# The second rebuilds all the collections in collect and svn removes the archives and the index
42# folders in model-collect. Then it copies across the rebuilt archives and index into model-collect
43# and svn adds them.
44
45# The third example checks out all the model-collections again, but rebuilds only the 2 collections
46# specified in the new collect folder. Then it copies across the *contents* of the archives and
47# index folders of those 2 collections into their model-collect equivalents. You then still have to
48# do the final svn commit on the model-collect folder after looking over the differences.
49
50# Also valid examples:
51# ./gen-model-colls.sh Tudor-Basic Tudor-Enhanced
52# ./gen-model-colls.sh --svndelete Tudor-Basic Tudor-Enhanced
53# ./gen-model-colls.sh --svnupdate
54
55# PSEUDOCODE
56# This script:
57# Checks out the model-collections folder from SVN
58# Makes a copy
59# In the copy: gets rid of their .svn folders, and builds each collection in turn, moving building to index once done
60# If --svndelete was passed in: svn removes model-collect/archives and model-collect/index, copies over collect/index
61# and collect/archives into model-collect and svn adds model-collect/archives and model-collect/index. Then SVN COMMITS
62# model-collect/archives and model-collect/index.
63# If --svnupdate was passed in: copies collect/archives/* into model-collect/archives/*, and copies collect/index/*
64# into model-collect/index/*, overwriting files that already existed but have now been updated upon rebuild. However,
65# --svnupdate will leave untouched any files and folders unique to model-collect. No SVN commit, that's LEFT UP TO YOU.
66
67# See earlier version of this script:
68# To svn remove what's unique to model-collect and svn add what's been rebuilt in index and archives
69# see http://stackoverflow.com/questions/7502261/delete-folder-content-and-remove-from-version-control
70
71# http://stackoverflow.com/questions/5044214/how-do-i-detect-and-or-delete-empty-subversion-directories
72# http://stackoverflow.com/questions/1301203/removing-svn-files-from-all-directories
73
74#*******************************GLOBAL VARIABLES***************************
75
76# mode can be svndelete or svnupdate
77mode=
78debug_mode=0
79commit_message=
80
81#*****************************FUNCTIONS*****************************
82
83# DON'T ADD ANY FURTHER ECHO STATEMENTS IN FUNCTION get_col_basename
84# "you have to be really careful on what you have in this function, as having any code which will eventually echo will mean that you get incorrect return string."
85# see http://stackoverflow.com/questions/3236871/how-to-return-a-string-value-from-a-bash-function
86function get_col_basename () {
87    collection=$1
88
89    #escape the filename (in case of space)
90    collection=`echo $collection | sed 's@ @\\\ @g'`
91   
92    #get just the basename
93    collection=`basename $collection`
94
95    # returning a string does not work in bash
96    # see http://stackoverflow.com/questions/3236871/how-to-return-a-string-value-from-a-bash-function   
97
98    #return $collection
99    echo $collection
100}
101
102
103# Function that handles the --svndelete flag (mode) of this script for a single collection
104function svn_delete () {
105
106    # svn remove archives and index in each collection
107    # commit them all
108    # copy over newly rebuilt archives and index into each model-collection
109    # svn add the new archives and index folders of each collection
110    # commit them all
111   
112   
113    if [ "x$1" == "x" ]; then
114    for collection in collect/*; do     
115        _del_col_archives_index $collection
116    done
117    else
118    for collection in "$@"; do
119        _del_col_archives_index $collection
120    done
121    fi
122
123    # svn commit all the svn rm statements done above in one go:
124    # don't do `svn up` at this point, as doing so will then retrieve all the folders that just were svn-removed
125
126    if [ "x$commit_message" == "x" ]; then
127    commit_message="Clean rebuild of model collections 1/2. Clearing out deprecated archives and index."
128    fi
129
130    # Numerical comparisons: http://tldp.org/LDP/abs/html/comparison-ops.html
131    if [ "$debug_mode" -eq "0" ]; then
132    svn commit -m "AUTOCOMMIT by gen-model-colls.sh script. Message: $commit_message" model-collect
133    fi
134
135    # Having svn committed the deletes, do an svn up to locally delete what was svn-removed above,
136    # BEFORE copying from the rebuilt archives and index folders
137    if [ "$debug_mode" -eq "0" ]; then
138    svn up model-collect   
139    fi
140
141    # copy from the rebuilt archives and index over into the svn model-collect and svn add them
142    if [ "x$1" == "x" ]; then
143    for collection in collect/*; do     
144        _add_col_archives_index $collection
145    done
146    else
147    for collection in "$@"; do
148        _add_col_archives_index $collection
149    done
150    fi
151
152    # commit all the svn add statements done just above in one go
153    if [ "x$commit_message" == "x" ]; then
154    commit_message="Clean rebuild of model collections 2/2. Adding rebuilt archives and index."
155    fi
156
157    if [ "$debug_mode" -eq "0" ]; then
158    svn commit -m "AUTOCOMMIT by gen-model-colls.sh script. Message: $commit_message" model-collect
159    fi
160
161    echo
162    echo "*********************"
163    echo "Done svn-deleting rebuilt model-collection: $collection"
164    echo "*********************"
165    echo
166}
167
168# To undo the changes made by svndelete, run the following manually
169# svn revert --depth infinity <model-collect/$collection/archives/*
170# svn revert --depth infinity <model-collect/$collection/archives/*
171# then remove both the local archives and index, and do an svn up to get original checkout back
172
173# svn delete this collection's archives and index folders
174# (The commit will be done when in one step for all collections on which this function was called)
175function _del_col_archives_index () {
176    collection=$1
177
178    #escape the filename (in case of space) and get just the basename
179    collection=$(get_col_basename $collection)
180
181    if [ ! -e model-collect/$collection ]; then
182    echo "del_col_archives_index: $collection does not exist in model-collect"
183    return;
184    fi
185
186    # remove the entire archives and index folders from svn
187    if [ "$debug_mode" -eq "0" ]; then
188    svn rm model-collect/$collection/archives
189    svn rm model-collect/$collection/index
190    elif [ "$debug_mode" -eq "1" ]; then
191    rm -rf model-collect/$collection/archives
192    rm -rf model-collect/$collection/index
193    fi
194
195}
196
197
198# copy and then svn add the collection's archives and index folders
199function _add_col_archives_index () {
200    collection=$1
201
202    #escape the filename (in case of space) and get just the basename
203    collection=$(get_col_basename $collection)
204
205    if [ ! -e model-collect/$collection ]; then
206    echo "add_col_archives_index: $collection does not exist in model-collect"
207    return;
208    fi
209
210    # remove the entire archives and index folders from svn
211    cp -r collect/$collection/archives model-collect/$collection/.
212    cp -r collect/$collection/index model-collect/$collection/.
213
214    if [ "$debug_mode" -eq "0" ]; then
215    svn add model-collect/$collection/archives
216    svn add model-collect/$collection/index
217    fi
218}
219
220
221# UNUSED, but useful for spotting differences between the collect and model-collect
222# after rebuild, before svn updating/deleting, as opposed to at the end of the script
223function svn_process_single_collection () {
224    collection=$1
225   
226    #escape the filename (in case of space) and get just the basename
227    collection=$(get_col_basename $collection)
228
229    if [ ! -e model-collect/$collection ]; then
230    echo "svn_process_single_collection: $collection does not exist in model-collect"
231    return;
232    fi
233
234# return here if just deleting empty dirs
235#return
236
237    # diff the svn model and rebuilt model collections
238    diff_result=`diff -rq model-collect/$collection collect/$collection | grep -v ".svn"`
239#    echo "Diff result for collection $collection: $diff_result"
240
241    # if no differences in the current collection, then we're done
242    if [ "x$diff_result" == "x" ]; then
243    echo "No differences in collection $collection"
244    return;
245    fi
246
247    # check that none of the lines mention files outside the archives or index folders
248    # http://en.gibney.org/tell_the_bash_to_split_by_newline_charac
249    # http://forums.gentoo.org/viewtopic-p-3130541.html
250
251    # http://wi-fizzle.com/article/276
252    # http://stackoverflow.com/questions/918886/how-do-i-split-a-string-on-a-delimiter-in-bash
253    # http://www.linuxquestions.org/questions/programming-9/split-a-string-on-newlines-bash-313206/
254    # http://unix.stackexchange.com/questions/39473/command-substitution-splitting-on-newline-but-not-space
255
256    # store backup of Internal Field Separator value, then set IFS to newline for splitting on newline
257
258    IFS_BAK=$IFS
259#    IFS='\n' # splits on all whitespace
260IFS='
261'
262    # in the lines returned from the diff, test for archives or newline
263    # http://stackoverflow.com/questions/229551/string-contains-in-bash
264    for line in `diff -rq model-collect/$collection collect/$collection | grep -v ".svn"`; do
265    # echo "LINE: $line"   
266    if [[ "$line" != *archives* && "$line" != *index* ]]; then
267        # the file that is different is neither in index nor in archives, send this diffline to the report
268        echo $line >> report.txt
269    fi
270    done
271 
272    IFS=$IFS_BAK
273    IFS_BAK=
274}
275
276# Function that takes care of the --svnupdate flag mode of this script for a single collection
277function update_single_collection () {
278    collection=$1
279   
280    #escape the filename (in case of space) and get just the basename
281    collection=$(get_col_basename $collection)
282
283    if [ ! -e model-collect/$collection ]; then
284    echo "update_single_collection: $collection does not exist in model-collect"
285    return;
286    fi
287
288    # copy across the contents of the rebuilt model-collection's index and archives to the svn model-collect
289    cp -r collect/$collection/archives/* model-collect/$collection/archives/.
290    cp -r collect/$collection/index/* model-collect/$collection/index/.
291
292    # now svn add any and all the NEW items in model-collect's archives and index
293    # see http://stackoverflow.com/questions/1071857/how-do-i-svn-add-all-unversioned-files-to-svn
294#    if [ "$debug_mode" -eq "0" ]; then
295    svn add --force model-collect/$collection/archives/* --auto-props --parents --depth infinity -q
296    svn add --force model-collect/$collection/index/* --auto-props --parents --depth infinity -q
297#    fi
298
299    echo "svn model-collect update process complete. CHECK AND COMMIT THE model-collect FOLDER!"
300
301    # if etc/collect.cfg is different, copy it across too?
302
303    echo
304    echo "*********************"
305    echo "Done updating the rebuilt LOCAL model-collection: model-collect/$collection"
306    echo "*********************"
307    echo
308}
309
310
311# re-build a single collection in "collect" which is a copy of model-collect
312function build_single_collection () {
313    collection=$1
314   
315    collection=$(get_col_basename $collection)
316   
317    import.pl -removeold $collection
318    buildcol.pl -removeold $collection
319    rm -rf collect/$collection/index
320    mv collect/$collection/building collect/$collection/index
321   
322    echo
323    echo "*********************"
324    echo "Done rebuilding model collection: $collection"
325    echo "*********************"
326    echo
327}
328
329
330# http://stackoverflow.com/questions/16483119/example-of-how-to-use-getopt-in-bash
331function usage() {
332# usage() { echo "Usage: $0 [-s <45|90>] [-p <string>]" 1>&2; exit 1; }
333
334    echo "*******************************************"
335    echo "Usage: $0 [--svnupdate|--svndelete] [--debug] [--message 'custom commit message'] [col1, col2, col3,...]";
336    echo "If no collections are provided, all collections will be processed.";
337    echo "If neither svnupdate nor svndelete are provided, svnupdate is assumed.";
338    echo "*******************************************"
339    exit 1;
340}
341
342
343#*******************************MAIN PROGRAM***************************
344
345# process optional command line arguments
346# http://blog.onetechnical.com/2012/07/16/bash-getopt-versus-getopts/
347# Execute getopt
348ARGS=$(getopt -o m:uxdh -l "message:,svnupdate,svndelete,debug,help" -n "$0" -- "$@");
349
350#Bad arguments
351if [ $? -ne 0 ];then
352    usage
353    exit 1
354fi
355
356eval set -- "$ARGS";
357
358
359# -n: http://tldp.org/LDP/abs/html/testconstructs.html
360while true; do
361  case "$1" in
362    -h|--help)
363      shift;
364      usage
365      exit 0
366      ;;
367    -x|--svndelete)
368      shift;
369      if [ "x$mode" == "xsvnupdate" ]; then
370      echo
371      echo "Can't use both svndelete and svnupdate"
372      usage
373      exit 1
374      else
375      mode=svndelete
376      fi
377      ;;
378    -u|--svnupdate)
379      shift;
380      if [ "x$mode" == "xsvndelete" ]; then
381      echo
382      echo "Can't use both svndelete and svnupdate"
383      usage
384      exit 1
385      else
386      mode=svnupdate
387      fi
388      ;;
389    -d|--debug)
390      shift;
391      debug_mode=1
392      ;;
393    -m|--message)
394      shift;
395      if [ -n "$1" ]; then
396      commit_message=$1
397          shift;
398      fi
399      ;;
400    --)
401      shift;
402      break;
403      ;;
404  esac
405done
406
407#echo "commit message: $commit_message"
408#echo "Debug mode is: $debug_mode"
409#exit
410
411
412# If no mode provided (svndelete|svnupdate) as cmd line arg, then don't modify
413# the svn model-collect folder. Then this script stops after rebuilding the model-copy in collect
414
415# the remaining arguments to the script are assumed to be collections
416
417# debugging
418#for collection in "$@"; do
419#    collection=collect/$collection
420#    echo "Collection: $collection"
421#done
422
423# finished processing arguments
424
425
426# report will contain the output of the diff for
427if [ -f report.txt ]; then
428    rm report.txt
429fi
430
431# Need pdfbox for the PDFBox tutorial
432if [ ! -e ext/pdf-box ]; then
433    cd ext
434    if [ ! -e ext/pdf-box-java.tar.gz ]; then
435    wget http://trac.greenstone.org/export/head/gs2-extensions/pdf-box/trunk/pdf-box-java.tar.gz
436    tar -xvzf pdf-box-java.tar.gz
437    fi
438    cd ..
439fi
440
441
442# move the existing collect folder out of the way
443if [ -e collect ] && [ ! -e collect_orig ] ; then
444    mv collect collect_orig
445fi
446
447
448# get model-collect from svn
449# if we already have it, svn update the entire model-collect folder if processing all collections
450# or svn update just any collections specified in the model-collect folder
451if test -e model-collect; then
452    if [ "$1" == "" ]; then
453    svn up model-collect
454    else
455    for collection in "$@"; do
456        svn up model-collect/$collection
457    done
458    fi
459else
460    svn co http://svn.greenstone.org/other-projects/nightly-tasks/diffcol/trunk/model-collect
461fi
462
463# Make a copy of the model-collect named as the new collect
464# (or if collections are specified in the cmdline arguments, copy just these over from model-collect into collect)
465# Then remove the copy's .svn folders
466echo "***********************************************"
467echo "Creating a copy of the model-collect folder as folder collect and removing the .svn subfolders from the copy:"
468echo
469if [ -e collect_orig ]; then
470    if [ ! -e collect ]; then
471    cp -r model-collect collect
472    find collect -name ".svn" -type d -exec rm -rf {} \; #2>&1 > /dev/null
473    else
474    if [ "$1" == "" ]; then
475        rm -rf collect
476        cp -r model-collect collect
477        find collect -name ".svn" -type d -exec rm -rf {} \;
478    else
479        for collection in "$@"; do
480        if [ -e collect/$collection ]; then
481            rm -rf collect/$collection
482        fi
483        cp -r model-collect/$collection collect/$collection
484        find collect/$collection -name ".svn" -type d -exec rm -rf {} \;
485        done
486    fi
487    fi
488fi
489echo "***********************************************"
490
491# Set up the Greenstone environment for building
492source setup.bash
493
494# parse arguments
495# http://stackoverflow.com/questions/12711786/bash-convert-command-line-arguments-into-array
496# http://stackoverflow.com/questions/255898/how-to-iterate-over-arguments-in-bash-script
497
498if [ "$1" == "" ]; then
499
500    # all_collections   
501    #for each collection, import, build, move building to index
502    for collection in collect/*; do
503    build_single_collection $collection;
504
505    if [ "x$mode" != "x" ]; then
506        #svn_process_single_collection $collection
507
508        if [ "x$mode" == "xsvnupdate" ]; then
509        update_single_collection $collection
510        fi
511    fi
512    done
513
514    # having rebuilt all the collections, just the processing for svndelete/update remains:
515    if [ "x$mode" == "xsvndelete" ]; then
516    svn_delete
517    fi
518
519else
520    # Command-line args are a list of collections,
521    # process each command-line arg, after confirming such a collection exists
522
523    for collection in "$@"; do
524    collection=collect/$collection
525    if test -e $collection; then   
526        build_single_collection $collection;
527
528        if [ "x$mode" != "x" ]; then
529        #svn_process_single_collection $collection
530
531        if [ "x$mode" == "xsvnupdate" ]; then
532            update_single_collection $collection
533        fi
534        fi
535    else
536        echo "Can't find collection $collection. Skipping."
537    fi
538    done
539
540    # having rebuilt the specified collections above, just the processing for svndelete/update remains
541    if [ "x$mode" == "xsvndelete" ]; then
542    svn_delete $@
543    fi
544
545fi
546
547
548echo
549echo "*****************************************"
550echo
551# NO LONGER NECESSARY: WE'RE DOING A DIFF BETWEEN collect AND model-collect AT THIS SCRIPT'S END
552# if we were svn updating/deleting collections, then mode was set
553# if in that case a report was generated with additional differences, point the user to it
554#if [ -f report.txt ] && [ "x$mode" != "x" ]; then
555#    echo "Some files or folders outside of archives and index directories were different. See report.txt"
556#    echo
557#fi
558
559# if not svnupdating or svndeleting, then inform the user that model-collect is unchanged
560# if svnupdating, then warn the user that model-collect still needs committing
561# if svndeleting, then inform the user that model-collect has been changed and committed
562if [ "x$mode" == "x" ]; then
563    echo "* The model-collect folder has not been altered. Changes have only been made to collect"
564elif [ "x$mode" == "xsvnupdate" ]; then
565    echo "* TO DO: You still need to run svn status and then svn commit on the model-collect folder. Besides that:"   
566elif [ "x$mode" == "xsvndelete" ]; then
567    echo "* The model-collect folder's archives and index subfolders have been updated and committed to svn."
568fi
569echo
570
571if [ "x$mode" != "x" ]; then
572    echo "* DIFFERENCES REMAINING BETWEEN model-collect AND collect (skipping .svn folders):"
573    echo
574    if [ "$1" == "" ]; then
575    echo "---START DIFF---"
576    diff -rq model-collect collect | grep -v ".svn"
577    else
578    for collection in "$@"; do
579        echo "--COLLECTION: $collection"
580        diff -rq model-collect/$collection collect/$collection | grep -v ".svn"
581        echo "--"
582    done
583    fi
584    echo "---END DIFF---"
585    echo
586fi
587
588echo "* The original collect directory has been left renamed as collect_orig"
589echo
590
591if [ "$debug_mode" -eq "1" ]; then
592    echo "* This script was run in DEBUG MODE, nothing has been changed in svn"
593fi
594echo
595echo "*****************************************"
596echo
597
598
599# deletes empty dirs
600#       find collect/$collection/archives/HASH* -type d -empty -delete
601#       find collect/$collection/index/assoc/HASH* -type d -empty -delete
602
603# To recursively delete all empty dirs in the copy of model-collect (since the dirs will not have .svn folders in them anymore)
604# http://www.commandlinefu.com/commands/view/5131/recursively-remove-all-empty-directories
605#find collect -type d -empty -delete
606
607# The following when put in a separate script file will delete all folders from model-collect that are
608# empty in the copied collection (all folders which contain only a .svn subfolder in model-collect)
609# ---------------------------------------------
610#!/bin/bash
611
612#for collection in collect/*; do
613    #escape the filename (in case of space)
614#    collection=`echo $collection | sed 's@ @\\\ @g'`
615   
616    #get just the basename
617#    collection=`basename $collection`
618
619    # HASH dirs that are empty in local collect's archives and index/assoc,
620    # need to be removed from the svn in model-collect
621
622#    for line in `find collect/$collection/archives/HASH* -type d -empty`; do
623#   modelline="model-$line"
624#   echo "LINE: $modelline"
625   
626    # remove from svn of model collect
627#   svn rm $modelline
628##  rm -rf $modelline
629    # remove physically from local collect
630#   rm -rf $line
631#    done
632
633#    for line in `find collect/$collection/index/assoc/HASH* -type d -empty`; do
634#   modelline="model-$line"
635#   echo "LINE: $modelline"
636   
637    # remove from svn of model collect
638#   svn rm $modelline
639##  rm -rf $modelline
640    # remove physically from local collect
641#   rm -rf $line
642#    done
643
644#done
645# ---------------------------------------------
Note: See TracBrowser for help on using the browser.