root/other-projects/nightly-tasks/diffcol/trunk/gen-model-colls.sh @ 28074

Revision 28074, 23.1 KB (checked in by ak19, 7 years ago)

Adding in the svnaddnew cmdline option to add new collections to the model-collections on svn. Untested as yet.

Line 
1#!/bin/bash
2
3# PURPOSE
4# This is not a nightly script. You use it to regenerate the model-collections
5# if Greenstone has changed fundamentally, such as what HASH OIDs get assigned
6# to documents or something that changes the contents of the index and
7# archives folders. This has happened now with the commits
8# http://trac.greenstone.org/changeset/28022 and
9# http://trac.greenstone.org/changeset/28021
10# These commits generate new stable HASH OIDs for the existing documents.
11
12
13# USAGE
14# Put this file in the toplevel of the Greenstone 2 binary/compiled SVN installation
15# that you want to generate the model collections with.
16# You can provide a list of collection names or none, in which case all the collections
17# are processed.
18
19# Pass in --svnupdate to copy across the contents of archives and index in the
20# rebuilt collection, overwriting their equivalents in the svn model collection,
21# but not removing any extraneous HASH folders already present.
22# !!!!! IMPORTANT: if you pass in svnupdate, it leaves you to do the final commit on
23# the (svn) model-collect folder!
24
25# Pass in --svndelete to remove the archives and index from svn in the model-collect
26# and replace this with the rebuilt archives and index
27# The --svndelete is useful for when the HASH directory naming has changed and everything
28# in archives and index has to be wiped out and moved back in from the rebuilt col.
29# Passing in --svndelete will do the final commits on the model-collect folder.
30
31# If neither flag is passed in, then the collections are rebuilt but the svn model-collect
32# is not updated and the repository is not updated.
33
34# Examples of usage:
35# ./gen-model-colls.sh
36# ./gen-model-colls.sh --svndelete
37# ./gen-model-colls.sh --svnupdate Tudor-Basic Tudor-Enhanced
38
39# The first just rebuilds all the collections in a new folder called collect and stops there
40
41# The second rebuilds all the collections in collect and svn removes the archives and the index
42# folders in model-collect. Then it copies across the rebuilt archives and index into model-collect
43# and svn adds them.
44
45# The third example checks out all the model-collections again, but rebuilds only the 2 collections
46# specified in the new collect folder. Then it copies across the *contents* of the archives and
47# index folders of those 2 collections into their model-collect equivalents. You then still have to
48# do the final svn commit on the model-collect folder after looking over the differences.
49
50# Also valid examples:
51# ./gen-model-colls.sh Tudor-Basic Tudor-Enhanced
52# ./gen-model-colls.sh --svndelete Tudor-Basic Tudor-Enhanced
53# ./gen-model-colls.sh --svnupdate
54
55# PSEUDOCODE
56# This script:
57# Checks out the model-collections folder from SVN
58# Makes a copy
59# In the copy: gets rid of their .svn folders, and builds each collection in turn, moving building to index once done
60# If --svndelete was passed in: svn removes model-collect/archives and model-collect/index, copies over collect/index
61# and collect/archives into model-collect and svn adds model-collect/archives and model-collect/index. Then SVN COMMITS
62# model-collect/archives and model-collect/index.
63# If --svnupdate was passed in: copies collect/archives/* into model-collect/archives/*, and copies collect/index/*
64# into model-collect/index/*, overwriting files that already existed but have now been updated upon rebuild. However,
65# --svnupdate will leave untouched any files and folders unique to model-collect. No SVN commit, that's LEFT UP TO YOU.
66
67# See earlier version of this script:
68# To svn remove what's unique to model-collect and svn add what's been rebuilt in index and archives
69# see http://stackoverflow.com/questions/7502261/delete-folder-content-and-remove-from-version-control
70
71# http://stackoverflow.com/questions/5044214/how-do-i-detect-and-or-delete-empty-subversion-directories
72# http://stackoverflow.com/questions/1301203/removing-svn-files-from-all-directories
73
74#*******************************GLOBAL VARIABLES***************************
75
76# mode can be svndelete or svnupdate
77mode=
78debug_mode=0
79commit_message=
80
81#*****************************FUNCTIONS*****************************
82
83# DON'T ADD ANY FURTHER ECHO STATEMENTS IN FUNCTION get_col_basename
84# "you have to be really careful on what you have in this function, as having any code which will eventually echo will mean that you get incorrect return string."
85# see http://stackoverflow.com/questions/3236871/how-to-return-a-string-value-from-a-bash-function
86function get_col_basename () {
87    collection=$1
88
89    #escape the filename (in case of space)
90    collection=`echo $collection | sed 's@ @\\\ @g'`
91   
92    #get just the basename
93    collection=`basename $collection`
94
95    # returning a string does not work in bash
96    # see http://stackoverflow.com/questions/3236871/how-to-return-a-string-value-from-a-bash-function   
97
98    #return $collection
99    echo $collection
100}
101
102function svn_add_new_collection () {
103
104    collection=$1
105
106    #escape the filename (in case of space) and get just the basename
107    collection=$(get_col_basename $collection)
108
109    if [ -e model-collect/$collection ];then
110    echo "svn_add_new_collection: $collection already exists in model-collect, can't add it to svn."
111    return
112    fi
113
114    find collect/$collection -name ".svn" -type d -exec rm -rf {} \;
115    cp -r collect/$collection model-collect/$collection
116    svn add model-collect/$collection   
117}
118
119# Function that handles the --svndelete flag (mode) of this script for a single collection
120function svn_delete () {
121
122    # svn remove archives and index in each collection
123    # commit them all
124    # copy over newly rebuilt archives and index into each model-collection
125    # svn add the new archives and index folders of each collection
126    # commit them all
127   
128   
129    if [ "x$1" == "x" ]; then
130    for collection in collect/*; do     
131        _del_col_archives_index $collection
132    done
133    else
134    for collection in "$@"; do
135        _del_col_archives_index $collection
136    done
137    fi
138
139    # svn commit all the svn rm statements done above in one go:
140    # don't do `svn up` at this point, as doing so will then retrieve all the folders that just were svn-removed
141
142    if [ "x$commit_message" == "x" ]; then
143    commit_message="Clean rebuild of model collections 1/2. Clearing out deprecated archives and index."
144    fi
145
146    # Numerical comparisons: http://tldp.org/LDP/abs/html/comparison-ops.html
147    if [ "$debug_mode" -eq "0" ]; then
148    svn commit -m "AUTOCOMMIT by gen-model-colls.sh script. Message: $commit_message" model-collect
149    fi
150
151    # Having svn committed the deletes, do an svn up to locally delete what was svn-removed above,
152    # BEFORE copying from the rebuilt archives and index folders
153    if [ "$debug_mode" -eq "0" ]; then
154    svn up model-collect   
155    fi
156
157    # copy from the rebuilt archives and index over into the svn model-collect and svn add them
158    if [ "x$1" == "x" ]; then
159    for collection in collect/*; do     
160        _add_col_archives_index $collection
161    done
162    else
163    for collection in "$@"; do
164        _add_col_archives_index $collection
165    done
166    fi
167
168    # commit all the svn add statements done just above in one go
169    if [ "x$commit_message" == "x" ]; then
170    commit_message="Clean rebuild of model collections 2/2. Adding rebuilt archives and index."
171    fi
172
173    if [ "$debug_mode" -eq "0" ]; then
174    svn commit -m "AUTOCOMMIT by gen-model-colls.sh script. Message: $commit_message" model-collect
175    fi
176
177    echo
178    echo "*********************"
179    echo "Done svn-deleting rebuilt model-collection: $collection"
180    echo "*********************"
181    echo
182}
183
184# To undo the changes made by svndelete, run the following manually
185# svn revert --depth infinity <model-collect/$collection/archives/*
186# svn revert --depth infinity <model-collect/$collection/archives/*
187# then remove both the local archives and index, and do an svn up to get original checkout back
188
189# svn delete this collection's archives and index folders
190# (The commit will be done when in one step for all collections on which this function was called)
191function _del_col_archives_index () {
192    collection=$1
193
194    #escape the filename (in case of space) and get just the basename
195    collection=$(get_col_basename $collection)
196
197    if [ ! -e model-collect/$collection ]; then
198    echo "del_col_archives_index: $collection does not exist in model-collect"
199    return;
200    fi
201
202    # remove the entire archives and index folders from svn
203    if [ "$debug_mode" -eq "0" ]; then
204    svn rm model-collect/$collection/archives
205    svn rm model-collect/$collection/index
206    elif [ "$debug_mode" -eq "1" ]; then
207    rm -rf model-collect/$collection/archives
208    rm -rf model-collect/$collection/index
209    fi
210
211}
212
213
214# copy and then svn add the collection's archives and index folders
215function _add_col_archives_index () {
216    collection=$1
217
218    #escape the filename (in case of space) and get just the basename
219    collection=$(get_col_basename $collection)
220
221    if [ ! -e model-collect/$collection ]; then
222    echo "add_col_archives_index: $collection does not exist in model-collect"
223    return;
224    fi
225
226    # remove the entire archives and index folders from svn
227    cp -r collect/$collection/archives model-collect/$collection/.
228    cp -r collect/$collection/index model-collect/$collection/.
229
230    if [ "$debug_mode" -eq "0" ]; then
231    svn add model-collect/$collection/archives
232    svn add model-collect/$collection/index
233    fi
234}
235
236
237# UNUSED, but useful for spotting differences between the collect and model-collect
238# after rebuild, before svn updating/deleting, as opposed to at the end of the script
239function svn_process_single_collection () {
240    collection=$1
241   
242    #escape the filename (in case of space) and get just the basename
243    collection=$(get_col_basename $collection)
244
245    if [ ! -e model-collect/$collection ]; then
246    echo "svn_process_single_collection: $collection does not exist in model-collect"
247    return;
248    fi
249
250# return here if just deleting empty dirs
251#return
252
253    # diff the svn model and rebuilt model collections
254    diff_result=`diff -rq model-collect/$collection collect/$collection | grep -v ".svn"`
255#    echo "Diff result for collection $collection: $diff_result"
256
257    # if no differences in the current collection, then we're done
258    if [ "x$diff_result" == "x" ]; then
259    echo "No differences in collection $collection"
260    return;
261    fi
262
263    # check that none of the lines mention files outside the archives or index folders
264    # http://en.gibney.org/tell_the_bash_to_split_by_newline_charac
265    # http://forums.gentoo.org/viewtopic-p-3130541.html
266
267    # http://wi-fizzle.com/article/276
268    # http://stackoverflow.com/questions/918886/how-do-i-split-a-string-on-a-delimiter-in-bash
269    # http://www.linuxquestions.org/questions/programming-9/split-a-string-on-newlines-bash-313206/
270    # http://unix.stackexchange.com/questions/39473/command-substitution-splitting-on-newline-but-not-space
271
272    # store backup of Internal Field Separator value, then set IFS to newline for splitting on newline
273
274    IFS_BAK=$IFS
275#    IFS='\n' # splits on all whitespace
276IFS='
277'
278    # in the lines returned from the diff, test for archives or newline
279    # http://stackoverflow.com/questions/229551/string-contains-in-bash
280    for line in `diff -rq model-collect/$collection collect/$collection | grep -v ".svn"`; do
281    # echo "LINE: $line"   
282    if [[ "$line" != *archives* && "$line" != *index* ]]; then
283        # the file that is different is neither in index nor in archives, send this diffline to the report
284        echo $line >> report.txt
285    fi
286    done
287 
288    IFS=$IFS_BAK
289    IFS_BAK=
290}
291
292# Function that takes care of the --svnupdate flag mode of this script for a single collection
293function update_single_collection () {
294    collection=$1
295   
296    #escape the filename (in case of space) and get just the basename
297    collection=$(get_col_basename $collection)
298
299    if [ ! -e model-collect/$collection ]; then
300    echo "update_single_collection: $collection does not exist in model-collect"
301    return;
302    fi
303
304    # copy across the contents of the rebuilt model-collection's index and archives to the svn model-collect
305    cp -r collect/$collection/archives/* model-collect/$collection/archives/.
306    cp -r collect/$collection/index/* model-collect/$collection/index/.
307
308    # now svn add any and all the NEW items in model-collect's archives and index
309    # see http://stackoverflow.com/questions/1071857/how-do-i-svn-add-all-unversioned-files-to-svn
310#    if [ "$debug_mode" -eq "0" ]; then
311    svn add --force model-collect/$collection/archives/* --auto-props --parents --depth infinity -q
312    svn add --force model-collect/$collection/index/* --auto-props --parents --depth infinity -q
313#    fi
314
315    echo "svn model-collect update process complete. CHECK AND COMMIT THE model-collect FOLDER!"
316
317    # if etc/collect.cfg is different, copy it across too?
318
319    echo
320    echo "*********************"
321    echo "Done updating the rebuilt LOCAL model-collection: model-collect/$collection"
322    echo "*********************"
323    echo
324}
325
326
327# re-build a single collection in "collect" which is a copy of model-collect
328function build_single_collection () {
329    collection=$1
330   
331    collection=$(get_col_basename $collection)
332   
333    import.pl -removeold $collection
334    buildcol.pl -removeold $collection
335    rm -rf collect/$collection/index
336    mv collect/$collection/building collect/$collection/index
337   
338    echo
339    echo "*********************"
340    echo "Done rebuilding model collection: $collection"
341    echo "*********************"
342    echo
343}
344
345
346# http://stackoverflow.com/questions/16483119/example-of-how-to-use-getopt-in-bash
347function usage() {
348# usage() { echo "Usage: $0 [-s <45|90>] [-p <string>]" 1>&2; exit 1; }
349
350    echo "*******************************************"
351    echo "Usage: $0 [--svnupdate|--svndelete] [--debug] [--message 'custom commit message'] [col1, col2, col3,...]";
352    echo "If no collections are provided, all collections will be processed.";
353    echo "If neither svnupdate nor svndelete are provided, svnupdate is assumed.";
354    echo "*******************************************"
355    exit 1;
356}
357
358
359#*******************************MAIN PROGRAM***************************
360
361# process optional command line arguments
362# http://blog.onetechnical.com/2012/07/16/bash-getopt-versus-getopts/
363# Execute getopt
364ARGS=$(getopt -o m:uxadh -l "message:,svnupdate,svndelete,svnaddnew,debug,help" -n "$0" -- "$@");
365
366#Bad arguments
367if [ $? -ne 0 ];then
368    usage
369    exit 1
370fi
371
372eval set -- "$ARGS";
373
374
375# -n: http://tldp.org/LDP/abs/html/testconstructs.html
376while true; do
377  case "$1" in
378    -h|--help)
379      shift;
380      usage
381      exit 0
382      ;;
383    -a|--svnaddnew)
384      shift;
385      if [ "x$mode" == "xsvnupdate" ] || [ "x$mode" == "xsvndelete" ]; then
386      echo
387      echo "Can't use both svnaddnew and svndelete/svnupdate"
388      usage
389      exit 1
390      else
391      mode=svnaddnew
392#     echo "svnaddnew not yet implemented"
393#     exit 0
394      fi
395      ;;
396    -x|--svndelete)
397      shift;
398      if [ "x$mode" == "xsvnupdate" ] || [ "x$mode" == "xsvnaddnew" ]; then
399      echo
400      echo "Can't use both svndelete and svnupdate/svnaddnew"
401      usage
402      exit 1
403      else
404      mode=svndelete
405      fi
406      ;;
407    -u|--svnupdate)
408      shift;
409      if [ "x$mode" == "xsvndelete" ] || [ "x$mode" == "xsvnaddnew" ]; then
410      echo
411      echo "Can't use both svnupdate and svndelete/svnaddnew"
412      usage
413      exit 1
414      else
415      mode=svnupdate
416      fi
417      ;;
418    -d|--debug)
419      shift;
420      debug_mode=1
421      ;;
422    -m|--message)
423      shift;
424      if [ -n "$1" ]; then
425      commit_message=$1
426          shift;
427      fi
428      ;;
429    --)
430      shift;
431      break;
432      ;;
433  esac
434done
435
436#echo "commit message: $commit_message"
437#echo "Debug mode is: $debug_mode"
438#exit
439
440
441# If no mode provided (svndelete|svnupdate) as cmd line arg, then don't modify
442# the svn model-collect folder. Then this script stops after rebuilding the model-copy in collect
443
444# the remaining arguments to the script are assumed to be collections
445
446# debugging
447#for collection in "$@"; do
448#    collection=collect/$collection
449#    echo "Collection: $collection"
450#done
451
452# finished processing arguments
453
454
455# report will contain the output of the diff for
456if [ -f report.txt ]; then
457    rm report.txt
458fi
459
460# Need pdfbox for the PDFBox tutorial
461if [ ! -e ext/pdf-box ]; then
462    cd ext
463    if [ ! -e ext/pdf-box-java.tar.gz ]; then
464    wget http://trac.greenstone.org/export/head/gs2-extensions/pdf-box/trunk/pdf-box-java.tar.gz
465    tar -xvzf pdf-box-java.tar.gz
466    fi
467    cd ..
468fi
469
470
471# move the existing collect folder out of the way
472# unless we are adding a new collection to svn, in which case, we'll grab them from whatever collect folder exists
473if [ "x$mode" != "xsvnaddnew" ] && [ -e collect ] && [ ! -e collect_orig ]; then
474    mv collect collect_orig
475fi
476
477
478# get model-collect from svn
479# if we already have it, svn update the entire model-collect folder if processing all collections
480# or svn update just any collections specified in the model-collect folder
481if test -e model-collect; then
482    if [ "$1" == "" ]; then
483    svn up model-collect
484    else
485    for collection in "$@"; do
486        svn up model-collect/$collection
487    done
488    fi
489else
490    svn co http://svn.greenstone.org/other-projects/nightly-tasks/diffcol/trunk/model-collect
491fi
492
493# Make a copy of the model-collect named as the new collect
494# (or if collections are specified in the cmdline arguments, copy just these over from model-collect into collect)
495# Then remove the copy's .svn folders
496if [ "x$mode" != "xsvnaddnew" ] && [ -e collect_orig ]; then
497
498    echo "***********************************************"
499    echo "Creating a copy of the model-collect folder as folder collect and removing the .svn subfolders from the copy:"
500    echo
501
502    if [ ! -e collect ]; then
503    cp -r model-collect collect
504    find collect -name ".svn" -type d -exec rm -rf {} \; #2>&1 > /dev/null
505    else
506    if [ "$1" == "" ]; then
507        rm -rf collect
508        cp -r model-collect collect
509        find collect -name ".svn" -type d -exec rm -rf {} \;
510    else
511        for collection in "$@"; do
512        if [ -e collect/$collection ]; then
513            rm -rf collect/$collection
514        fi
515        cp -r model-collect/$collection collect/$collection
516        find collect/$collection -name ".svn" -type d -exec rm -rf {} \;
517        done
518    fi
519    fi
520    echo "***********************************************"
521fi
522
523
524# Set up the Greenstone environment for building
525source setup.bash
526
527# parse arguments
528# http://stackoverflow.com/questions/12711786/bash-convert-command-line-arguments-into-array
529# http://stackoverflow.com/questions/255898/how-to-iterate-over-arguments-in-bash-script
530
531if [ "$1" == "" ]; then
532
533    # all_collections   
534    #for each collection, import, build, move building to index
535    for collection in collect/*; do
536    build_single_collection $collection;
537
538    if [ "x$mode" != "x" ]; then
539        #svn_process_single_collection $collection
540
541        if [ "x$mode" == "xsvnupdate" ]; then
542        update_single_collection $collection
543        elif [ "x$mode" == "xsvnaddnew" ]; then
544        svn_add_new_collection $collection
545        fi
546    fi
547    done
548
549    # having rebuilt all the collections, just the processing for svndelete remains:
550    if [ "x$mode" == "xsvndelete" ]; then
551    svn_delete
552    fi
553
554else
555    # Command-line args are a list of collections,
556    # process each command-line arg, after confirming such a collection exists
557
558    for collection in "$@"; do
559    collection=collect/$collection
560    if test -e $collection; then   
561        build_single_collection $collection;
562
563        if [ "x$mode" != "x" ]; then
564        #svn_process_single_collection $collection
565
566        if [ "x$mode" == "xsvnupdate" ]; then
567            update_single_collection $collection
568        elif [ "x$mode" == "xsvnaddnew" ]; then
569            svn_add_new_collection $collection
570        fi
571        fi
572    else
573        echo
574        echo "Can't find collection $collection. Skipping."
575        echo
576    fi
577    done
578
579    # having rebuilt the specified collections above, just the processing for svndelete remains
580    if [ "x$mode" == "xsvndelete" ]; then
581    svn_delete $@
582    fi
583fi
584
585
586echo
587echo "*****************************************"
588echo
589# NO LONGER NECESSARY: WE'RE DOING A DIFF BETWEEN collect AND model-collect AT THIS SCRIPT'S END
590# if we were svn updating/deleting collections, then mode was set
591# if in that case a report was generated with additional differences, point the user to it
592#if [ -f report.txt ] && [ "x$mode" != "x" ]; then
593#    echo "Some files or folders outside of archives and index directories were different. See report.txt"
594#    echo
595#fi
596
597# if not svnupdating or svndeleting, then inform the user that model-collect is unchanged
598# if svnupdating, then warn the user that model-collect still needs committing
599# if svndeleting, then inform the user that model-collect has been changed and committed
600if [ "x$mode" == "x" ]; then
601    echo "* The model-collect folder has not been altered. Changes have only been made to collect"
602elif [ "x$mode" == "xsvnupdate" ]; then
603    echo "* TO DO: You still need to run svn status and then svn commit on the model-collect folder. Besides that:"   
604elif [ "x$mode" == "xsvndelete" ]; then
605    echo "* The model-collect folder's archives and index subfolders have been updated and committed to svn."
606elif [ "x$mode" == "xsvnaddnew" ]; then
607    echo "* The new collection(s) have been built, copied to model-collect and added to svn."
608fi
609echo
610
611if [ "x$mode" != "x" ]; then
612    echo "* DIFFERENCES REMAINING BETWEEN model-collect AND collect (skipping .svn folders):"
613    echo
614    if [ "$1" == "" ]; then
615    echo "---START DIFF---"
616    diff -rq model-collect collect | grep -v ".svn"
617    else
618    for collection in "$@"; do
619        echo "--COLLECTION: $collection"
620        diff -rq model-collect/$collection collect/$collection | grep -v ".svn"
621        echo "--"
622    done
623    fi
624    echo "---END DIFF---"
625    echo
626fi
627
628if [ -e collect_orig ]; then
629    echo "* The original collect directory has been left renamed as collect_orig"
630    echo
631fi
632
633if [ "$debug_mode" -eq "1" ]; then
634    echo "* This script was run in DEBUG MODE, nothing has been changed in svn"
635fi
636echo
637echo "*****************************************"
638echo
639
640
641# deletes empty dirs
642#       find collect/$collection/archives/HASH* -type d -empty -delete
643#       find collect/$collection/index/assoc/HASH* -type d -empty -delete
644
645# To recursively delete all empty dirs in the copy of model-collect (since the dirs will not have .svn folders in them anymore)
646# http://www.commandlinefu.com/commands/view/5131/recursively-remove-all-empty-directories
647#find collect -type d -empty -delete
648
649# The following when put in a separate script file will delete all folders from model-collect that are
650# empty in the copied collection (all folders which contain only a .svn subfolder in model-collect)
651# ---------------------------------------------
652#!/bin/bash
653
654#for collection in collect/*; do
655    #escape the filename (in case of space)
656#    collection=`echo $collection | sed 's@ @\\\ @g'`
657   
658    #get just the basename
659#    collection=`basename $collection`
660
661    # HASH dirs that are empty in local collect's archives and index/assoc,
662    # need to be removed from the svn in model-collect
663
664#    for line in `find collect/$collection/archives/HASH* -type d -empty`; do
665#   modelline="model-$line"
666#   echo "LINE: $modelline"
667   
668    # remove from svn of model collect
669#   svn rm $modelline
670##  rm -rf $modelline
671    # remove physically from local collect
672#   rm -rf $line
673#    done
674
675#    for line in `find collect/$collection/index/assoc/HASH* -type d -empty`; do
676#   modelline="model-$line"
677#   echo "LINE: $modelline"
678   
679    # remove from svn of model collect
680#   svn rm $modelline
681##  rm -rf $modelline
682    # remove physically from local collect
683#   rm -rf $line
684#    done
685
686#done
687# ---------------------------------------------
Note: See TracBrowser for help on using the browser.