source: other-projects/nightly-tasks/diffcol/trunk/gen-model-colls.sh@ 28073

Last change on this file since 28073 was 28073, checked in by ak19, 11 years ago

Forgot to add the help cmdline option

File size: 21.7 KB
RevLine 
[28026]1#!/bin/bash
2
3# PURPOSE
4# This is not a nightly script. You use it to regenerate the model-collections
5# if Greenstone has changed fundamentally, such as what HASH OIDs get assigned
6# to documents or something that changes the contents of the index and
7# archives folders. This has happened now with the commits
8# http://trac.greenstone.org/changeset/28022 and
9# http://trac.greenstone.org/changeset/28021
10# These commits generate new stable HASH OIDs for the existing documents.
11
12
13# USAGE
[28037]14# Put this file in the toplevel of the Greenstone 2 binary/compiled SVN installation
[28026]15# that you want to generate the model collections with.
[28037]16# You can provide a list of collection names or none, in which case all the collections
17# are processed.
[28026]18
[28037]19# Pass in --svnupdate to copy across the contents of archives and index in the
20# rebuilt collection, overwriting their equivalents in the svn model collection,
21# but not removing any extraneous HASH folders already present.
[28049]22# !!!!! IMPORTANT: if you pass in svnupdate, it leaves you to do the final commit on
23# the (svn) model-collect folder!
[28037]24
[28049]25# Pass in --svndelete to remove the archives and index from svn in the model-collect
26# and replace this with the rebuilt archives and index
27# The --svndelete is useful for when the HASH directory naming has changed and everything
[28037]28# in archives and index has to be wiped out and moved back in from the rebuilt col.
[28049]29# Passing in --svndelete will do the final commits on the model-collect folder.
[28037]30
[28049]31# If neither flag is passed in, then the collections are rebuilt but the svn model-collect
[28037]32# is not updated and the repository is not updated.
33
34# Examples of usage:
35# ./gen-model-colls.sh
36# ./gen-model-colls.sh --svndelete
37# ./gen-model-colls.sh --svnupdate Tudor-Basic Tudor-Enhanced
38
39# The first just rebuilds all the collections in a new folder called collect and stops there
40
[28049]41# The second rebuilds all the collections in collect and svn removes the archives and the index
42# folders in model-collect. Then it copies across the rebuilt archives and index into model-collect
43# and svn adds them.
[28037]44
[28049]45# The third example checks out all the model-collections again, but rebuilds only the 2 collections
46# specified in the new collect folder. Then it copies across the *contents* of the archives and
47# index folders of those 2 collections into their model-collect equivalents. You then still have to
48# do the final svn commit on the model-collect folder after looking over the differences.
49
[28037]50# Also valid examples:
51# ./gen-model-colls.sh Tudor-Basic Tudor-Enhanced
52# ./gen-model-colls.sh --svndelete Tudor-Basic Tudor-Enhanced
53# ./gen-model-colls.sh --svnupdate
54
[28026]55# PSEUDOCODE
56# This script:
57# Checks out the model-collections folder from SVN
58# Makes a copy
59# In the copy: gets rid of their .svn folders, and builds each collection in turn, moving building to index once done
[28049]60# If --svndelete was passed in: svn removes model-collect/archives and model-collect/index, copies over collect/index
61# and collect/archives into model-collect and svn adds model-collect/archives and model-collect/index. Then SVN COMMITS
62# model-collect/archives and model-collect/index.
[28037]63# If --svnupdate was passed in: copies collect/archives/* into model-collect/archives/*, and copies collect/index/*
64# into model-collect/index/*, overwriting files that already existed but have now been updated upon rebuild. However,
[28049]65# --svnupdate will leave untouched any files and folders unique to model-collect. No SVN commit, that's LEFT UP TO YOU.
[28026]66
[28048]67# See earlier version of this script:
68# To svn remove what's unique to model-collect and svn add what's been rebuilt in index and archives
69# see http://stackoverflow.com/questions/7502261/delete-folder-content-and-remove-from-version-control
70
71# http://stackoverflow.com/questions/5044214/how-do-i-detect-and-or-delete-empty-subversion-directories
72# http://stackoverflow.com/questions/1301203/removing-svn-files-from-all-directories
73
[28069]74#*******************************GLOBAL VARIABLES***************************
[28048]75
[28069]76# mode can be svndelete or svnupdate
77mode=
78debug_mode=0
79commit_message=
80
81#*****************************FUNCTIONS*****************************
82
[28048]83# DON'T ADD ANY FURTHER ECHO STATEMENTS IN FUNCTION get_col_basename
84# "you have to be really careful on what you have in this function, as having any code which will eventually echo will mean that you get incorrect return string."
85# see http://stackoverflow.com/questions/3236871/how-to-return-a-string-value-from-a-bash-function
86function get_col_basename () {
[28037]87 collection=$1
[28026]88
[28037]89 #escape the filename (in case of space)
90 collection=`echo $collection | sed 's@ @\\\ @g'`
91
92 #get just the basename
93 collection=`basename $collection`
[28026]94
[28048]95 # returning a string does not work in bash
96 # see http://stackoverflow.com/questions/3236871/how-to-return-a-string-value-from-a-bash-function
97
98 #return $collection
99 echo $collection
100}
101
102
[28049]103# Function that handles the --svndelete flag (mode) of this script for a single collection
[28048]104function svn_delete () {
105
106 # svn remove archives and index in each collection
107 # commit them all
108 # copy over newly rebuilt archives and index into each model-collection
109 # svn add the new archives and index folders of each collection
110 # commit them all
111
112
113 if [ "x$1" == "x" ]; then
114 for collection in collect/*; do
115 _del_col_archives_index $collection
116 done
117 else
118 for collection in "$@"; do
119 _del_col_archives_index $collection
120 done
121 fi
122
[28069]123 # svn commit all the svn rm statements done above in one go:
124 # don't do `svn up` at this point, as doing so will then retrieve all the folders that just were svn-removed
[28048]125
[28069]126 if [ "x$commit_message" == "x" ]; then
127 commit_message="Clean rebuild of model collections 1/2. Clearing out deprecated archives and index."
128 fi
[28048]129
[28069]130 # Numerical comparisons: http://tldp.org/LDP/abs/html/comparison-ops.html
131 if [ "$debug_mode" -eq "0" ]; then
132 svn commit -m "AUTOCOMMIT by gen-model-colls.sh script. Message: $commit_message" model-collect
133 fi
134
135 # Having svn committed the deletes, do an svn up to locally delete what was svn-removed above,
136 # BEFORE copying from the rebuilt archives and index folders
137 if [ "$debug_mode" -eq "0" ]; then
138 svn up model-collect
139 fi
140
141 # copy from the rebuilt archives and index over into the svn model-collect and svn add them
[28048]142 if [ "x$1" == "x" ]; then
143 for collection in collect/*; do
144 _add_col_archives_index $collection
145 done
146 else
147 for collection in "$@"; do
148 _add_col_archives_index $collection
149 done
150 fi
151
152 # commit all the svn add statements done just above in one go
[28069]153 if [ "x$commit_message" == "x" ]; then
154 commit_message="Clean rebuild of model collections 2/2. Adding rebuilt archives and index."
155 fi
[28049]156
[28069]157 if [ "$debug_mode" -eq "0" ]; then
158 svn commit -m "AUTOCOMMIT by gen-model-colls.sh script. Message: $commit_message" model-collect
159 fi
160
[28049]161 echo
162 echo "*********************"
163 echo "Done svn-deleting rebuilt model-collection: $collection"
164 echo "*********************"
165 echo
[28048]166}
167
168# To undo the changes made by svndelete, run the following manually
169# svn revert --depth infinity <model-collect/$collection/archives/*
170# svn revert --depth infinity <model-collect/$collection/archives/*
171# then remove both the local archives and index, and do an svn up to get original checkout back
172
173# svn delete this collection's archives and index folders
174# (The commit will be done when in one step for all collections on which this function was called)
175function _del_col_archives_index () {
176 collection=$1
177
178 #escape the filename (in case of space) and get just the basename
179 collection=$(get_col_basename $collection)
180
[28037]181 if [ ! -e model-collect/$collection ]; then
[28070]182 echo "del_col_archives_index: $collection does not exist in model-collect"
[28037]183 return;
184 fi
185
[28048]186 # remove the entire archives and index folders from svn
[28069]187 if [ "$debug_mode" -eq "0" ]; then
188 svn rm model-collect/$collection/archives
189 svn rm model-collect/$collection/index
190 elif [ "$debug_mode" -eq "1" ]; then
191 rm -rf model-collect/$collection/archives
192 rm -rf model-collect/$collection/index
193 fi
[28048]194
195}
196
197
198# copy and then svn add the collection's archives and index folders
199function _add_col_archives_index () {
200 collection=$1
201
202 #escape the filename (in case of space) and get just the basename
203 collection=$(get_col_basename $collection)
204
205 if [ ! -e model-collect/$collection ]; then
[28070]206 echo "add_col_archives_index: $collection does not exist in model-collect"
[28048]207 return;
208 fi
209
210 # remove the entire archives and index folders from svn
211 cp -r collect/$collection/archives model-collect/$collection/.
212 cp -r collect/$collection/index model-collect/$collection/.
[28069]213
214 if [ "$debug_mode" -eq "0" ]; then
215 svn add model-collect/$collection/archives
216 svn add model-collect/$collection/index
217 fi
[28048]218}
219
220
[28049]221# UNUSED, but useful for spotting differences between the collect and model-collect
222# after rebuild, before svn updating/deleting, as opposed to at the end of the script
[28048]223function svn_process_single_collection () {
224 collection=$1
225
226 #escape the filename (in case of space) and get just the basename
227 collection=$(get_col_basename $collection)
228
229 if [ ! -e model-collect/$collection ]; then
[28070]230 echo "svn_process_single_collection: $collection does not exist in model-collect"
[28048]231 return;
232 fi
233
[28046]234# return here if just deleting empty dirs
235#return
236
[28037]237 # diff the svn model and rebuilt model collections
238 diff_result=`diff -rq model-collect/$collection collect/$collection | grep -v ".svn"`
239# echo "Diff result for collection $collection: $diff_result"
240
241 # if no differences in the current collection, then we're done
242 if [ "x$diff_result" == "x" ]; then
243 echo "No differences in collection $collection"
244 return;
245 fi
246
247 # check that none of the lines mention files outside the archives or index folders
248 # http://en.gibney.org/tell_the_bash_to_split_by_newline_charac
249 # http://forums.gentoo.org/viewtopic-p-3130541.html
250
251 # http://wi-fizzle.com/article/276
252 # http://stackoverflow.com/questions/918886/how-do-i-split-a-string-on-a-delimiter-in-bash
253 # http://www.linuxquestions.org/questions/programming-9/split-a-string-on-newlines-bash-313206/
254 # http://unix.stackexchange.com/questions/39473/command-substitution-splitting-on-newline-but-not-space
255
256 # store backup of Internal Field Separator value, then set IFS to newline for splitting on newline
257
258 IFS_BAK=$IFS
259# IFS='\n' # splits on all whitespace
260IFS='
261'
262 # in the lines returned from the diff, test for archives or newline
263 # http://stackoverflow.com/questions/229551/string-contains-in-bash
264 for line in `diff -rq model-collect/$collection collect/$collection | grep -v ".svn"`; do
265 # echo "LINE: $line"
266 if [[ "$line" != *archives* && "$line" != *index* ]]; then
267 # the file that is different is neither in index nor in archives, send this diffline to the report
268 echo $line >> report.txt
269 fi
270 done
271
272 IFS=$IFS_BAK
273 IFS_BAK=
[28048]274}
[28037]275
[28049]276# Function that takes care of the --svnupdate flag mode of this script for a single collection
[28048]277function update_single_collection () {
[28070]278 collection=$1
279
280 #escape the filename (in case of space) and get just the basename
281 collection=$(get_col_basename $collection)
[28037]282
[28070]283 if [ ! -e model-collect/$collection ]; then
284 echo "update_single_collection: $collection does not exist in model-collect"
285 return;
286 fi
287
[28037]288 # copy across the contents of the rebuilt model-collection's index and archives to the svn model-collect
289 cp -r collect/$collection/archives/* model-collect/$collection/archives/.
290 cp -r collect/$collection/index/* model-collect/$collection/index/.
291
[28069]292 # now svn add any and all the NEW items in model-collect's archives and index
293 # see http://stackoverflow.com/questions/1071857/how-do-i-svn-add-all-unversioned-files-to-svn
294# if [ "$debug_mode" -eq "0" ]; then
295 svn add --force model-collect/$collection/archives/* --auto-props --parents --depth infinity -q
296 svn add --force model-collect/$collection/index/* --auto-props --parents --depth infinity -q
297# fi
298
[28048]299 echo "svn model-collect update process complete. CHECK AND COMMIT THE model-collect FOLDER!"
[28037]300
301 # if etc/collect.cfg is different, copy it across too?
[28049]302
303 echo
304 echo "*********************"
305 echo "Done updating the rebuilt LOCAL model-collection: model-collect/$collection"
306 echo "*********************"
307 echo
[28046]308}
[28037]309
310
311# re-build a single collection in "collect" which is a copy of model-collect
[28048]312function build_single_collection () {
313 collection=$1
314
315 collection=$(get_col_basename $collection)
[28026]316
317 import.pl -removeold $collection
318 buildcol.pl -removeold $collection
319 rm -rf collect/$collection/index
320 mv collect/$collection/building collect/$collection/index
321
322 echo
323 echo "*********************"
[28049]324 echo "Done rebuilding model collection: $collection"
[28026]325 echo "*********************"
326 echo
327}
328
329
[28037]330# http://stackoverflow.com/questions/16483119/example-of-how-to-use-getopt-in-bash
331function usage() {
332# usage() { echo "Usage: $0 [-s <45|90>] [-p <string>]" 1>&2; exit 1; }
[28026]333
[28037]334 echo "*******************************************"
[28073]335 echo "Usage: $0 [--svnupdate|--svndelete] [--debug] [--message 'custom commit message'] [col1, col2, col3,...]";
[28037]336 echo "If no collections are provided, all collections will be processed.";
337 echo "If neither svnupdate nor svndelete are provided, svnupdate is assumed.";
338 echo "*******************************************"
339 exit 1;
340}
341
342
[28069]343#*******************************MAIN PROGRAM***************************
[28027]344
[28037]345# process optional command line arguments
346# http://blog.onetechnical.com/2012/07/16/bash-getopt-versus-getopts/
347# Execute getopt
[28073]348ARGS=$(getopt -o m:uxdh -l "message:,svnupdate,svndelete,debug,help" -n "$0" -- "$@");
[28037]349
350#Bad arguments
351if [ $? -ne 0 ];then
352 usage
353 exit 1
354fi
355
356eval set -- "$ARGS";
357
358
359# -n: http://tldp.org/LDP/abs/html/testconstructs.html
360while true; do
361 case "$1" in
[28073]362 -h|--help)
363 shift;
364 usage
365 exit 0
366 ;;
[28069]367 -x|--svndelete)
[28037]368 shift;
369 if [ "x$mode" == "xsvnupdate" ]; then
370 echo
371 echo "Can't use both svndelete and svnupdate"
372 usage
373 exit 1
374 else
375 mode=svndelete
376 fi
377 ;;
378 -u|--svnupdate)
379 shift;
380 if [ "x$mode" == "xsvndelete" ]; then
381 echo
382 echo "Can't use both svndelete and svnupdate"
383 usage
384 exit 1
385 else
386 mode=svnupdate
387 fi
388 ;;
[28069]389 -d|--debug)
390 shift;
391 debug_mode=1
392 ;;
393 -m|--message)
394 shift;
395 if [ -n "$1" ]; then
396 commit_message=$1
397 shift;
398 fi
399 ;;
[28037]400 --)
401 shift;
402 break;
403 ;;
404 esac
405done
406
[28069]407#echo "commit message: $commit_message"
408#echo "Debug mode is: $debug_mode"
409#exit
410
[28070]411
[28037]412# If no mode provided (svndelete|svnupdate) as cmd line arg, then don't modify
[28048]413# the svn model-collect folder. Then this script stops after rebuilding the model-copy in collect
[28037]414
415# the remaining arguments to the script are assumed to be collections
416
417# debugging
418#for collection in "$@"; do
419# collection=collect/$collection
420# echo "Collection: $collection"
421#done
422
423# finished processing arguments
424
425
426# report will contain the output of the diff for
427if [ -f report.txt ]; then
428 rm report.txt
429fi
430
[28027]431# Need pdfbox for the PDFBox tutorial
432if [ ! -e ext/pdf-box ]; then
433 cd ext
434 if [ ! -e ext/pdf-box-java.tar.gz ]; then
435 wget http://trac.greenstone.org/export/head/gs2-extensions/pdf-box/trunk/pdf-box-java.tar.gz
436 tar -xvzf pdf-box-java.tar.gz
437 fi
438 cd ..
439fi
440
441
[28026]442# move the existing collect folder out of the way
443if [ -e collect ] && [ ! -e collect_orig ] ; then
444 mv collect collect_orig
445fi
446
[28048]447
448# get model-collect from svn
449# if we already have it, svn update the entire model-collect folder if processing all collections
450# or svn update just any collections specified in the model-collect folder
451if test -e model-collect; then
452 if [ "$1" == "" ]; then
453 svn up model-collect
454 else
455 for collection in "$@"; do
[28069]456 svn up model-collect/$collection
[28048]457 done
458 fi
459else
460 svn co http://svn.greenstone.org/other-projects/nightly-tasks/diffcol/trunk/model-collect
461fi
462
463# Make a copy of the model-collect named as the new collect
464# (or if collections are specified in the cmdline arguments, copy just these over from model-collect into collect)
465# Then remove the copy's .svn folders
[28049]466echo "***********************************************"
467echo "Creating a copy of the model-collect folder as folder collect and removing the .svn subfolders from the copy:"
468echo
[28026]469if [ -e collect_orig ]; then
[28048]470 if [ ! -e collect ]; then
471 cp -r model-collect collect
472 find collect -name ".svn" -type d -exec rm -rf {} \; #2>&1 > /dev/null
473 else
474 if [ "$1" == "" ]; then
475 rm -rf collect
476 cp -r model-collect collect
477 find collect -name ".svn" -type d -exec rm -rf {} \;
478 else
479 for collection in "$@"; do
480 if [ -e collect/$collection ]; then
481 rm -rf collect/$collection
482 fi
483 cp -r model-collect/$collection collect/$collection
484 find collect/$collection -name ".svn" -type d -exec rm -rf {} \;
485 done
486 fi
[28026]487 fi
488fi
[28049]489echo "***********************************************"
[28026]490
491# Set up the Greenstone environment for building
492source setup.bash
493
494# parse arguments
495# http://stackoverflow.com/questions/12711786/bash-convert-command-line-arguments-into-array
496# http://stackoverflow.com/questions/255898/how-to-iterate-over-arguments-in-bash-script
497
498if [ "$1" == "" ]; then
[28048]499
500 # all_collections
[28037]501 #for each collection, import, build, move building to index
502 for collection in collect/*; do
[28048]503 build_single_collection $collection;
504
[28037]505 if [ "x$mode" != "x" ]; then
[28049]506 #svn_process_single_collection $collection
[28048]507
508 if [ "x$mode" == "xsvnupdate" ]; then
509 update_single_collection $collection
510 fi
[28037]511 fi
512 done
[28048]513
514 # having rebuilt all the collections, just the processing for svndelete/update remains:
515 if [ "x$mode" == "xsvndelete" ]; then
516 svn_delete
517 fi
518
[28026]519else
520 # Command-line args are a list of collections,
521 # process each command-line arg, after confirming such a collection exists
522
[28037]523 for collection in "$@"; do
524 collection=collect/$collection
[28026]525 if test -e $collection; then
[28048]526 build_single_collection $collection;
527
[28037]528 if [ "x$mode" != "x" ]; then
[28049]529 #svn_process_single_collection $collection
[28048]530
[28049]531 if [ "x$mode" == "xsvnupdate" ]; then
[28048]532 update_single_collection $collection
533 fi
[28037]534 fi
[28026]535 else
536 echo "Can't find collection $collection. Skipping."
537 fi
538 done
[28048]539
540 # having rebuilt the specified collections above, just the processing for svndelete/update remains
541 if [ "x$mode" == "xsvndelete" ]; then
542 svn_delete $@
543 fi
544
[28026]545fi
[28037]546
547
548echo
549echo "*****************************************"
[28049]550echo
551# NO LONGER NECESSARY: WE'RE DOING A DIFF BETWEEN collect AND model-collect AT THIS SCRIPT'S END
[28037]552# if we were svn updating/deleting collections, then mode was set
553# if in that case a report was generated with additional differences, point the user to it
[28049]554#if [ -f report.txt ] && [ "x$mode" != "x" ]; then
555# echo "Some files or folders outside of archives and index directories were different. See report.txt"
556# echo
557#fi
558
559# if not svnupdating or svndeleting, then inform the user that model-collect is unchanged
560# if svnupdating, then warn the user that model-collect still needs committing
561# if svndeleting, then inform the user that model-collect has been changed and committed
562if [ "x$mode" == "x" ]; then
563 echo "* The model-collect folder has not been altered. Changes have only been made to collect"
564elif [ "x$mode" == "xsvnupdate" ]; then
565 echo "* TO DO: You still need to run svn status and then svn commit on the model-collect folder. Besides that:"
566elif [ "x$mode" == "xsvndelete" ]; then
567 echo "* The model-collect folder's archives and index subfolders have been updated and committed to svn."
568fi
569echo
570
571if [ "x$mode" != "x" ]; then
572 echo "* DIFFERENCES REMAINING BETWEEN model-collect AND collect (skipping .svn folders):"
[28037]573 echo
[28069]574 if [ "$1" == "" ]; then
575 echo "---START DIFF---"
576 diff -rq model-collect collect | grep -v ".svn"
577 else
578 for collection in "$@"; do
579 echo "--COLLECTION: $collection"
580 diff -rq model-collect/$collection collect/$collection | grep -v ".svn"
581 echo "--"
582 done
583 fi
[28049]584 echo "---END DIFF---"
585 echo
[28037]586fi
587
[28049]588echo "* The original collect directory has been left renamed as collect_orig"
[28037]589echo
[28069]590
591if [ "$debug_mode" -eq "1" ]; then
592 echo "* This script was run in DEBUG MODE, nothing has been changed in svn"
593fi
594echo
[28037]595echo "*****************************************"
596echo
597
598
[28048]599# deletes empty dirs
600# find collect/$collection/archives/HASH* -type d -empty -delete
601# find collect/$collection/index/assoc/HASH* -type d -empty -delete
602
603# To recursively delete all empty dirs in the copy of model-collect (since the dirs will not have .svn folders in them anymore)
604# http://www.commandlinefu.com/commands/view/5131/recursively-remove-all-empty-directories
605#find collect -type d -empty -delete
606
607# The following when put in a separate script file will delete all folders from model-collect that are
[28046]608# empty in the copied collection (all folders which contain only a .svn subfolder in model-collect)
[28049]609# ---------------------------------------------
[28048]610#!/bin/bash
[28046]611
612#for collection in collect/*; do
613 #escape the filename (in case of space)
614# collection=`echo $collection | sed 's@ @\\\ @g'`
615
616 #get just the basename
617# collection=`basename $collection`
618
619 # HASH dirs that are empty in local collect's archives and index/assoc,
620 # need to be removed from the svn in model-collect
621
622# for line in `find collect/$collection/archives/HASH* -type d -empty`; do
623# modelline="model-$line"
624# echo "LINE: $modelline"
625
626 # remove from svn of model collect
627# svn rm $modelline
628## rm -rf $modelline
629 # remove physically from local collect
630# rm -rf $line
631# done
632
633# for line in `find collect/$collection/index/assoc/HASH* -type d -empty`; do
634# modelline="model-$line"
635# echo "LINE: $modelline"
636
637 # remove from svn of model collect
638# svn rm $modelline
639## rm -rf $modelline
640 # remove physically from local collect
641# rm -rf $line
642# done
643
[28048]644#done
[28049]645# ---------------------------------------------
Note: See TracBrowser for help on using the repository browser.