source: other-projects/nightly-tasks/diffcol/trunk/gen-model-colls.sh@ 28070

Last change on this file since 28070 was 28070, checked in by ak19, 11 years ago

No support for adding a new collection yet. At present the script is only concerned with rebuilding collections already added to SVN

File size: 21.6 KB
Line 
1#!/bin/bash
2
3# PURPOSE
4# This is not a nightly script. You use it to regenerate the model-collections
5# if Greenstone has changed fundamentally, such as what HASH OIDs get assigned
6# to documents or something that changes the contents of the index and
7# archives folders. This has happened now with the commits
8# http://trac.greenstone.org/changeset/28022 and
9# http://trac.greenstone.org/changeset/28021
10# These commits generate new stable HASH OIDs for the existing documents.
11
12
13# USAGE
14# Put this file in the toplevel of the Greenstone 2 binary/compiled SVN installation
15# that you want to generate the model collections with.
16# You can provide a list of collection names or none, in which case all the collections
17# are processed.
18
19# Pass in --svnupdate to copy across the contents of archives and index in the
20# rebuilt collection, overwriting their equivalents in the svn model collection,
21# but not removing any extraneous HASH folders already present.
22# !!!!! IMPORTANT: if you pass in svnupdate, it leaves you to do the final commit on
23# the (svn) model-collect folder!
24
25# Pass in --svndelete to remove the archives and index from svn in the model-collect
26# and replace this with the rebuilt archives and index
27# The --svndelete is useful for when the HASH directory naming has changed and everything
28# in archives and index has to be wiped out and moved back in from the rebuilt col.
29# Passing in --svndelete will do the final commits on the model-collect folder.
30
31# If neither flag is passed in, then the collections are rebuilt but the svn model-collect
32# is not updated and the repository is not updated.
33
34# Examples of usage:
35# ./gen-model-colls.sh
36# ./gen-model-colls.sh --svndelete
37# ./gen-model-colls.sh --svnupdate Tudor-Basic Tudor-Enhanced
38
39# The first just rebuilds all the collections in a new folder called collect and stops there
40
41# The second rebuilds all the collections in collect and svn removes the archives and the index
42# folders in model-collect. Then it copies across the rebuilt archives and index into model-collect
43# and svn adds them.
44
45# The third example checks out all the model-collections again, but rebuilds only the 2 collections
46# specified in the new collect folder. Then it copies across the *contents* of the archives and
47# index folders of those 2 collections into their model-collect equivalents. You then still have to
48# do the final svn commit on the model-collect folder after looking over the differences.
49
50# Also valid examples:
51# ./gen-model-colls.sh Tudor-Basic Tudor-Enhanced
52# ./gen-model-colls.sh --svndelete Tudor-Basic Tudor-Enhanced
53# ./gen-model-colls.sh --svnupdate
54
55# PSEUDOCODE
56# This script:
57# Checks out the model-collections folder from SVN
58# Makes a copy
59# In the copy: gets rid of their .svn folders, and builds each collection in turn, moving building to index once done
60# If --svndelete was passed in: svn removes model-collect/archives and model-collect/index, copies over collect/index
61# and collect/archives into model-collect and svn adds model-collect/archives and model-collect/index. Then SVN COMMITS
62# model-collect/archives and model-collect/index.
63# If --svnupdate was passed in: copies collect/archives/* into model-collect/archives/*, and copies collect/index/*
64# into model-collect/index/*, overwriting files that already existed but have now been updated upon rebuild. However,
65# --svnupdate will leave untouched any files and folders unique to model-collect. No SVN commit, that's LEFT UP TO YOU.
66
67# See earlier version of this script:
68# To svn remove what's unique to model-collect and svn add what's been rebuilt in index and archives
69# see http://stackoverflow.com/questions/7502261/delete-folder-content-and-remove-from-version-control
70
71# http://stackoverflow.com/questions/5044214/how-do-i-detect-and-or-delete-empty-subversion-directories
72# http://stackoverflow.com/questions/1301203/removing-svn-files-from-all-directories
73
74#*******************************GLOBAL VARIABLES***************************
75
76# mode can be svndelete or svnupdate
77mode=
78debug_mode=0
79commit_message=
80
81#*****************************FUNCTIONS*****************************
82
83# DON'T ADD ANY FURTHER ECHO STATEMENTS IN FUNCTION get_col_basename
84# "you have to be really careful on what you have in this function, as having any code which will eventually echo will mean that you get incorrect return string."
85# see http://stackoverflow.com/questions/3236871/how-to-return-a-string-value-from-a-bash-function
86function get_col_basename () {
87 collection=$1
88
89 #escape the filename (in case of space)
90 collection=`echo $collection | sed 's@ @\\\ @g'`
91
92 #get just the basename
93 collection=`basename $collection`
94
95 # returning a string does not work in bash
96 # see http://stackoverflow.com/questions/3236871/how-to-return-a-string-value-from-a-bash-function
97
98 #return $collection
99 echo $collection
100}
101
102
103# Function that handles the --svndelete flag (mode) of this script for a single collection
104function svn_delete () {
105
106 # svn remove archives and index in each collection
107 # commit them all
108 # copy over newly rebuilt archives and index into each model-collection
109 # svn add the new archives and index folders of each collection
110 # commit them all
111
112
113 if [ "x$1" == "x" ]; then
114 for collection in collect/*; do
115 _del_col_archives_index $collection
116 done
117 else
118 for collection in "$@"; do
119 _del_col_archives_index $collection
120 done
121 fi
122
123 # svn commit all the svn rm statements done above in one go:
124 # don't do `svn up` at this point, as doing so will then retrieve all the folders that just were svn-removed
125
126 if [ "x$commit_message" == "x" ]; then
127 commit_message="Clean rebuild of model collections 1/2. Clearing out deprecated archives and index."
128 fi
129
130 # Numerical comparisons: http://tldp.org/LDP/abs/html/comparison-ops.html
131 if [ "$debug_mode" -eq "0" ]; then
132 svn commit -m "AUTOCOMMIT by gen-model-colls.sh script. Message: $commit_message" model-collect
133 fi
134
135 # Having svn committed the deletes, do an svn up to locally delete what was svn-removed above,
136 # BEFORE copying from the rebuilt archives and index folders
137 if [ "$debug_mode" -eq "0" ]; then
138 svn up model-collect
139 fi
140
141 # copy from the rebuilt archives and index over into the svn model-collect and svn add them
142 if [ "x$1" == "x" ]; then
143 for collection in collect/*; do
144 _add_col_archives_index $collection
145 done
146 else
147 for collection in "$@"; do
148 _add_col_archives_index $collection
149 done
150 fi
151
152 # commit all the svn add statements done just above in one go
153 if [ "x$commit_message" == "x" ]; then
154 commit_message="Clean rebuild of model collections 2/2. Adding rebuilt archives and index."
155 fi
156
157 if [ "$debug_mode" -eq "0" ]; then
158 svn commit -m "AUTOCOMMIT by gen-model-colls.sh script. Message: $commit_message" model-collect
159 fi
160
161 echo
162 echo "*********************"
163 echo "Done svn-deleting rebuilt model-collection: $collection"
164 echo "*********************"
165 echo
166}
167
168# To undo the changes made by svndelete, run the following manually
169# svn revert --depth infinity <model-collect/$collection/archives/*
170# svn revert --depth infinity <model-collect/$collection/archives/*
171# then remove both the local archives and index, and do an svn up to get original checkout back
172
173# svn delete this collection's archives and index folders
174# (The commit will be done when in one step for all collections on which this function was called)
175function _del_col_archives_index () {
176 collection=$1
177
178 #escape the filename (in case of space) and get just the basename
179 collection=$(get_col_basename $collection)
180
181 if [ ! -e model-collect/$collection ]; then
182 echo "del_col_archives_index: $collection does not exist in model-collect"
183 return;
184 fi
185
186 # remove the entire archives and index folders from svn
187 if [ "$debug_mode" -eq "0" ]; then
188 svn rm model-collect/$collection/archives
189 svn rm model-collect/$collection/index
190 elif [ "$debug_mode" -eq "1" ]; then
191 rm -rf model-collect/$collection/archives
192 rm -rf model-collect/$collection/index
193 fi
194
195}
196
197
198# copy and then svn add the collection's archives and index folders
199function _add_col_archives_index () {
200 collection=$1
201
202 #escape the filename (in case of space) and get just the basename
203 collection=$(get_col_basename $collection)
204
205 if [ ! -e model-collect/$collection ]; then
206 echo "add_col_archives_index: $collection does not exist in model-collect"
207 return;
208 fi
209
210 # remove the entire archives and index folders from svn
211 cp -r collect/$collection/archives model-collect/$collection/.
212 cp -r collect/$collection/index model-collect/$collection/.
213
214 if [ "$debug_mode" -eq "0" ]; then
215 svn add model-collect/$collection/archives
216 svn add model-collect/$collection/index
217 fi
218}
219
220
221# UNUSED, but useful for spotting differences between the collect and model-collect
222# after rebuild, before svn updating/deleting, as opposed to at the end of the script
223function svn_process_single_collection () {
224 collection=$1
225
226 #escape the filename (in case of space) and get just the basename
227 collection=$(get_col_basename $collection)
228
229 if [ ! -e model-collect/$collection ]; then
230 echo "svn_process_single_collection: $collection does not exist in model-collect"
231 return;
232 fi
233
234# return here if just deleting empty dirs
235#return
236
237 # diff the svn model and rebuilt model collections
238 diff_result=`diff -rq model-collect/$collection collect/$collection | grep -v ".svn"`
239# echo "Diff result for collection $collection: $diff_result"
240
241 # if no differences in the current collection, then we're done
242 if [ "x$diff_result" == "x" ]; then
243 echo "No differences in collection $collection"
244 return;
245 fi
246
247 # check that none of the lines mention files outside the archives or index folders
248 # http://en.gibney.org/tell_the_bash_to_split_by_newline_charac
249 # http://forums.gentoo.org/viewtopic-p-3130541.html
250
251 # http://wi-fizzle.com/article/276
252 # http://stackoverflow.com/questions/918886/how-do-i-split-a-string-on-a-delimiter-in-bash
253 # http://www.linuxquestions.org/questions/programming-9/split-a-string-on-newlines-bash-313206/
254 # http://unix.stackexchange.com/questions/39473/command-substitution-splitting-on-newline-but-not-space
255
256 # store backup of Internal Field Separator value, then set IFS to newline for splitting on newline
257
258 IFS_BAK=$IFS
259# IFS='\n' # splits on all whitespace
260IFS='
261'
262 # in the lines returned from the diff, test for archives or newline
263 # http://stackoverflow.com/questions/229551/string-contains-in-bash
264 for line in `diff -rq model-collect/$collection collect/$collection | grep -v ".svn"`; do
265 # echo "LINE: $line"
266 if [[ "$line" != *archives* && "$line" != *index* ]]; then
267 # the file that is different is neither in index nor in archives, send this diffline to the report
268 echo $line >> report.txt
269 fi
270 done
271
272 IFS=$IFS_BAK
273 IFS_BAK=
274}
275
276# Function that takes care of the --svnupdate flag mode of this script for a single collection
277function update_single_collection () {
278 collection=$1
279
280 #escape the filename (in case of space) and get just the basename
281 collection=$(get_col_basename $collection)
282
283 if [ ! -e model-collect/$collection ]; then
284 echo "update_single_collection: $collection does not exist in model-collect"
285 return;
286 fi
287
288 # copy across the contents of the rebuilt model-collection's index and archives to the svn model-collect
289 cp -r collect/$collection/archives/* model-collect/$collection/archives/.
290 cp -r collect/$collection/index/* model-collect/$collection/index/.
291
292 # now svn add any and all the NEW items in model-collect's archives and index
293 # see http://stackoverflow.com/questions/1071857/how-do-i-svn-add-all-unversioned-files-to-svn
294# if [ "$debug_mode" -eq "0" ]; then
295 svn add --force model-collect/$collection/archives/* --auto-props --parents --depth infinity -q
296 svn add --force model-collect/$collection/index/* --auto-props --parents --depth infinity -q
297# fi
298
299 echo "svn model-collect update process complete. CHECK AND COMMIT THE model-collect FOLDER!"
300
301 # if etc/collect.cfg is different, copy it across too?
302
303 echo
304 echo "*********************"
305 echo "Done updating the rebuilt LOCAL model-collection: model-collect/$collection"
306 echo "*********************"
307 echo
308}
309
310
311# re-build a single collection in "collect" which is a copy of model-collect
312function build_single_collection () {
313 collection=$1
314
315 collection=$(get_col_basename $collection)
316
317 import.pl -removeold $collection
318 buildcol.pl -removeold $collection
319 rm -rf collect/$collection/index
320 mv collect/$collection/building collect/$collection/index
321
322 echo
323 echo "*********************"
324 echo "Done rebuilding model collection: $collection"
325 echo "*********************"
326 echo
327}
328
329
330# http://stackoverflow.com/questions/16483119/example-of-how-to-use-getopt-in-bash
331function usage() {
332# usage() { echo "Usage: $0 [-s <45|90>] [-p <string>]" 1>&2; exit 1; }
333
334 echo "*******************************************"
335 echo "Usage: $0 [--svnupdate|--svndelete] [col1, col2, col3,...]";
336 echo "If no collections are provided, all collections will be processed.";
337 echo "If neither svnupdate nor svndelete are provided, svnupdate is assumed.";
338 echo "*******************************************"
339 exit 1;
340}
341
342
343#*******************************MAIN PROGRAM***************************
344
345# process optional command line arguments
346# http://blog.onetechnical.com/2012/07/16/bash-getopt-versus-getopts/
347# Execute getopt
348ARGS=$(getopt -o m:uxd -l "message:,svnupdate,svndelete,debug" -n "$0" -- "$@");
349
350#Bad arguments
351if [ $? -ne 0 ];then
352 usage
353 exit 1
354fi
355
356eval set -- "$ARGS";
357
358
359# -n: http://tldp.org/LDP/abs/html/testconstructs.html
360while true; do
361 case "$1" in
362 -x|--svndelete)
363 shift;
364 if [ "x$mode" == "xsvnupdate" ]; then
365 echo
366 echo "Can't use both svndelete and svnupdate"
367 usage
368 exit 1
369 else
370 mode=svndelete
371 fi
372 ;;
373 -u|--svnupdate)
374 shift;
375 if [ "x$mode" == "xsvndelete" ]; then
376 echo
377 echo "Can't use both svndelete and svnupdate"
378 usage
379 exit 1
380 else
381 mode=svnupdate
382 fi
383 ;;
384 -d|--debug)
385 shift;
386 debug_mode=1
387 ;;
388 -m|--message)
389 shift;
390 if [ -n "$1" ]; then
391 commit_message=$1
392 shift;
393 fi
394 ;;
395 --)
396 shift;
397 break;
398 ;;
399 esac
400done
401
402#echo "commit message: $commit_message"
403#echo "Debug mode is: $debug_mode"
404#exit
405
406
407# If no mode provided (svndelete|svnupdate) as cmd line arg, then don't modify
408# the svn model-collect folder. Then this script stops after rebuilding the model-copy in collect
409
410# the remaining arguments to the script are assumed to be collections
411
412# debugging
413#for collection in "$@"; do
414# collection=collect/$collection
415# echo "Collection: $collection"
416#done
417
418# finished processing arguments
419
420
421# report will contain the output of the diff for
422if [ -f report.txt ]; then
423 rm report.txt
424fi
425
426# Need pdfbox for the PDFBox tutorial
427if [ ! -e ext/pdf-box ]; then
428 cd ext
429 if [ ! -e ext/pdf-box-java.tar.gz ]; then
430 wget http://trac.greenstone.org/export/head/gs2-extensions/pdf-box/trunk/pdf-box-java.tar.gz
431 tar -xvzf pdf-box-java.tar.gz
432 fi
433 cd ..
434fi
435
436
437# move the existing collect folder out of the way
438if [ -e collect ] && [ ! -e collect_orig ] ; then
439 mv collect collect_orig
440fi
441
442
443# get model-collect from svn
444# if we already have it, svn update the entire model-collect folder if processing all collections
445# or svn update just any collections specified in the model-collect folder
446if test -e model-collect; then
447 if [ "$1" == "" ]; then
448 svn up model-collect
449 else
450 for collection in "$@"; do
451 svn up model-collect/$collection
452 done
453 fi
454else
455 svn co http://svn.greenstone.org/other-projects/nightly-tasks/diffcol/trunk/model-collect
456fi
457
458# Make a copy of the model-collect named as the new collect
459# (or if collections are specified in the cmdline arguments, copy just these over from model-collect into collect)
460# Then remove the copy's .svn folders
461echo "***********************************************"
462echo "Creating a copy of the model-collect folder as folder collect and removing the .svn subfolders from the copy:"
463echo
464if [ -e collect_orig ]; then
465 if [ ! -e collect ]; then
466 cp -r model-collect collect
467 find collect -name ".svn" -type d -exec rm -rf {} \; #2>&1 > /dev/null
468 else
469 if [ "$1" == "" ]; then
470 rm -rf collect
471 cp -r model-collect collect
472 find collect -name ".svn" -type d -exec rm -rf {} \;
473 else
474 for collection in "$@"; do
475 if [ -e collect/$collection ]; then
476 rm -rf collect/$collection
477 fi
478 cp -r model-collect/$collection collect/$collection
479 find collect/$collection -name ".svn" -type d -exec rm -rf {} \;
480 done
481 fi
482 fi
483fi
484echo "***********************************************"
485
486# Set up the Greenstone environment for building
487source setup.bash
488
489# parse arguments
490# http://stackoverflow.com/questions/12711786/bash-convert-command-line-arguments-into-array
491# http://stackoverflow.com/questions/255898/how-to-iterate-over-arguments-in-bash-script
492
493if [ "$1" == "" ]; then
494
495 # all_collections
496 #for each collection, import, build, move building to index
497 for collection in collect/*; do
498 build_single_collection $collection;
499
500 if [ "x$mode" != "x" ]; then
501 #svn_process_single_collection $collection
502
503 if [ "x$mode" == "xsvnupdate" ]; then
504 update_single_collection $collection
505 fi
506 fi
507 done
508
509 # having rebuilt all the collections, just the processing for svndelete/update remains:
510 if [ "x$mode" == "xsvndelete" ]; then
511 svn_delete
512 fi
513
514else
515 # Command-line args are a list of collections,
516 # process each command-line arg, after confirming such a collection exists
517
518 for collection in "$@"; do
519 collection=collect/$collection
520 if test -e $collection; then
521 build_single_collection $collection;
522
523 if [ "x$mode" != "x" ]; then
524 #svn_process_single_collection $collection
525
526 if [ "x$mode" == "xsvnupdate" ]; then
527 update_single_collection $collection
528 fi
529 fi
530 else
531 echo "Can't find collection $collection. Skipping."
532 fi
533 done
534
535 # having rebuilt the specified collections above, just the processing for svndelete/update remains
536 if [ "x$mode" == "xsvndelete" ]; then
537 svn_delete $@
538 fi
539
540fi
541
542
543echo
544echo "*****************************************"
545echo
546# NO LONGER NECESSARY: WE'RE DOING A DIFF BETWEEN collect AND model-collect AT THIS SCRIPT'S END
547# if we were svn updating/deleting collections, then mode was set
548# if in that case a report was generated with additional differences, point the user to it
549#if [ -f report.txt ] && [ "x$mode" != "x" ]; then
550# echo "Some files or folders outside of archives and index directories were different. See report.txt"
551# echo
552#fi
553
554# if not svnupdating or svndeleting, then inform the user that model-collect is unchanged
555# if svnupdating, then warn the user that model-collect still needs committing
556# if svndeleting, then inform the user that model-collect has been changed and committed
557if [ "x$mode" == "x" ]; then
558 echo "* The model-collect folder has not been altered. Changes have only been made to collect"
559elif [ "x$mode" == "xsvnupdate" ]; then
560 echo "* TO DO: You still need to run svn status and then svn commit on the model-collect folder. Besides that:"
561elif [ "x$mode" == "xsvndelete" ]; then
562 echo "* The model-collect folder's archives and index subfolders have been updated and committed to svn."
563fi
564echo
565
566if [ "x$mode" != "x" ]; then
567 echo "* DIFFERENCES REMAINING BETWEEN model-collect AND collect (skipping .svn folders):"
568 echo
569 if [ "$1" == "" ]; then
570 echo "---START DIFF---"
571 diff -rq model-collect collect | grep -v ".svn"
572 else
573 for collection in "$@"; do
574 echo "--COLLECTION: $collection"
575 diff -rq model-collect/$collection collect/$collection | grep -v ".svn"
576 echo "--"
577 done
578 fi
579 echo "---END DIFF---"
580 echo
581fi
582
583echo "* The original collect directory has been left renamed as collect_orig"
584echo
585
586if [ "$debug_mode" -eq "1" ]; then
587 echo "* This script was run in DEBUG MODE, nothing has been changed in svn"
588fi
589echo
590echo "*****************************************"
591echo
592
593
594# deletes empty dirs
595# find collect/$collection/archives/HASH* -type d -empty -delete
596# find collect/$collection/index/assoc/HASH* -type d -empty -delete
597
598# To recursively delete all empty dirs in the copy of model-collect (since the dirs will not have .svn folders in them anymore)
599# http://www.commandlinefu.com/commands/view/5131/recursively-remove-all-empty-directories
600#find collect -type d -empty -delete
601
602# The following when put in a separate script file will delete all folders from model-collect that are
603# empty in the copied collection (all folders which contain only a .svn subfolder in model-collect)
604# ---------------------------------------------
605#!/bin/bash
606
607#for collection in collect/*; do
608 #escape the filename (in case of space)
609# collection=`echo $collection | sed 's@ @\\\ @g'`
610
611 #get just the basename
612# collection=`basename $collection`
613
614 # HASH dirs that are empty in local collect's archives and index/assoc,
615 # need to be removed from the svn in model-collect
616
617# for line in `find collect/$collection/archives/HASH* -type d -empty`; do
618# modelline="model-$line"
619# echo "LINE: $modelline"
620
621 # remove from svn of model collect
622# svn rm $modelline
623## rm -rf $modelline
624 # remove physically from local collect
625# rm -rf $line
626# done
627
628# for line in `find collect/$collection/index/assoc/HASH* -type d -empty`; do
629# modelline="model-$line"
630# echo "LINE: $modelline"
631
632 # remove from svn of model collect
633# svn rm $modelline
634## rm -rf $modelline
635 # remove physically from local collect
636# rm -rf $line
637# done
638
639#done
640# ---------------------------------------------
Note: See TracBrowser for help on using the repository browser.