source: other-projects/nightly-tasks/diffcol/trunk/gen-model-colls.sh@ 28048

Last change on this file since 28048 was 28048, checked in by ak19, 11 years ago

Major overhaul to script generating the model-collections. This script works, but the version with updated comments and a diff rq at the end will be committed hereafter

File size: 17.7 KB
Line 
1#!/bin/bash
2
3# PURPOSE
4# This is not a nightly script. You use it to regenerate the model-collections
5# if Greenstone has changed fundamentally, such as what HASH OIDs get assigned
6# to documents or something that changes the contents of the index and
7# archives folders. This has happened now with the commits
8# http://trac.greenstone.org/changeset/28022 and
9# http://trac.greenstone.org/changeset/28021
10# These commits generate new stable HASH OIDs for the existing documents.
11
12
13# USAGE
14# Put this file in the toplevel of the Greenstone 2 binary/compiled SVN installation
15# that you want to generate the model collections with.
16# You can provide a list of collection names or none, in which case all the collections
17# are processed.
18
19# Pass in --svnupdate to copy across the contents of archives and index in the
20# rebuilt collection, overwriting their equivalents in the svn model collection,
21# but not removing any extraneous HASH folders already present.
22
23# Pass in --svndelete to remove what's in archives and index from svn and replace
24# the contents of archives and index with the matching contents from the rebuild
25# collection. Useful for when the HASH directory naming has changed and everything
26# in archives and index has to be wiped out and moved back in from the rebuilt col.
27
28# If neither is passed in, then the collections are rebuilt but the svn model-collect
29# is not updated and the repository is not updated.
30
31# This program leaves you to do the final commit on the (svn) model-collect folder!
32
33# Examples of usage:
34# ./gen-model-colls.sh
35# ./gen-model-colls.sh --svndelete
36# ./gen-model-colls.sh --svnupdate Tudor-Basic Tudor-Enhanced
37
38# The first just rebuilds all the collections in a new folder called collect and stops there
39# The second rebuilds all the collections and then removes all contents of the archives and the
40# index folders from the svn checked-out model-collect and removes them from svn. Then it copies
41# across all the contents of the rebuilt archives and index into model-collect and svn adds them.
42# The third example checks out all the model-collections again, but rebuilds only the 3 collections
43# specified in the new collect folder. Then it copies across the contents of the archives and index
44# folders of those 3 collections into their model-collect equivalents.
45
46# You then still have to do the final svn commit on the model-collect folder.
47
48# Also valid examples:
49# ./gen-model-colls.sh Tudor-Basic Tudor-Enhanced
50# ./gen-model-colls.sh --svndelete Tudor-Basic Tudor-Enhanced
51# ./gen-model-colls.sh --svnupdate
52
53# PSEUDOCODE
54# This script:
55# Checks out the model-collections folder from SVN
56# Makes a copy
57# In the copy: gets rid of their .svn folders, and builds each collection in turn, moving building to index once done
58# If --svndelete was passed in: svn removes model-collect/archives/* and model-collect/index/*, copies over index/*
59# and archives/* from collect into model-collect and svn adds model-collect/archives/* and model-collect/index/*
60# If --svnupdate was passed in: copies collect/archives/* into model-collect/archives/*, and copies collect/index/*
61# into model-collect/index/*, overwriting files that already existed but have now been updated upon rebuild. However,
62# --svnupdate will leave untouched any files and folders unique to model-collect.
63
64# See earlier version of this script:
65# To svn remove what's unique to model-collect and svn add what's been rebuilt in index and archives
66# see http://stackoverflow.com/questions/7502261/delete-folder-content-and-remove-from-version-control
67
68# http://stackoverflow.com/questions/5044214/how-do-i-detect-and-or-delete-empty-subversion-directories
69# http://stackoverflow.com/questions/1301203/removing-svn-files-from-all-directories
70
71
72# DON'T ADD ANY FURTHER ECHO STATEMENTS IN FUNCTION get_col_basename
73# "you have to be really careful on what you have in this function, as having any code which will eventually echo will mean that you get incorrect return string."
74# see http://stackoverflow.com/questions/3236871/how-to-return-a-string-value-from-a-bash-function
75function get_col_basename () {
76 collection=$1
77
78 #escape the filename (in case of space)
79 collection=`echo $collection | sed 's@ @\\\ @g'`
80
81 #get just the basename
82 collection=`basename $collection`
83
84 # returning a string does not work in bash
85 # see http://stackoverflow.com/questions/3236871/how-to-return-a-string-value-from-a-bash-function
86
87 #return $collection
88 echo $collection
89}
90
91
92# Function that handles the --svndelete flag (mode) of this script
93function svn_delete () {
94
95 # svn remove archives and index in each collection
96 # commit them all
97 # copy over newly rebuilt archives and index into each model-collection
98 # svn add the new archives and index folders of each collection
99 # commit them all
100
101
102 if [ "x$1" == "x" ]; then
103 for collection in collect/*; do
104 _del_col_archives_index $collection
105 done
106 else
107 for collection in "$@"; do
108 _del_col_archives_index $collection
109 done
110 fi
111
112 # commit all the svn rm statements done above in one go:
113 # don't do `svn up` here, as this will then retrieve all the folders that were svn-removed
114 svn commit -m "Clean rebuild of model collections 1/2. Clearing out deprecated archives and index." model-collect
115
116 # do an svn up to locally delete what was svn-removed above, BEFORE copying from the rebuilt archives and index folders
117 svn up model-collect
118
119 if [ "x$1" == "x" ]; then
120 for collection in collect/*; do
121 _add_col_archives_index $collection
122 done
123 else
124 for collection in "$@"; do
125 _add_col_archives_index $collection
126 done
127 fi
128
129 # commit all the svn add statements done just above in one go
130 svn commit -m "Clean rebuild of model collections 2/2. Adding rebuilt archives and index." model-collect
131}
132
133# To undo the changes made by svndelete, run the following manually
134# svn revert --depth infinity <model-collect/$collection/archives/*
135# svn revert --depth infinity <model-collect/$collection/archives/*
136# then remove both the local archives and index, and do an svn up to get original checkout back
137
138# svn delete this collection's archives and index folders
139# (The commit will be done when in one step for all collections on which this function was called)
140function _del_col_archives_index () {
141 collection=$1
142
143 #escape the filename (in case of space) and get just the basename
144 collection=$(get_col_basename $collection)
145
146 if [ ! -e model-collect/$collection ]; then
147 echo "del_col_archives_index: $collection does not exist in model-collect, will svn add this new collection shortly"
148 return;
149 fi
150
151 # remove the entire archives and index folders from svn
152 svn rm model-collect/$collection/archives
153 svn rm model-collect/$collection/index
154
155# for TESTING purposes:
156# rm -rf model-collect/$collection/archives
157# rm -rf model-collect/$collection/index
158
159}
160
161
162# copy and then svn add the collection's archives and index folders
163function _add_col_archives_index () {
164 collection=$1
165
166 #escape the filename (in case of space) and get just the basename
167 collection=$(get_col_basename $collection)
168
169 if [ ! -e model-collect/$collection ]; then
170 echo "add_col_archives_index: Adding the new collection $collection to SVN"
171 return;
172 fi
173
174 # remove the entire archives and index folders from svn
175 cp -r collect/$collection/archives model-collect/$collection/.
176 cp -r collect/$collection/index model-collect/$collection/.
177
178 svn add model-collect/$collection/archives
179 svn add model-collect/$collection/index
180}
181
182
183# svn update/delete a single collection
184function svn_process_single_collection () {
185 collection=$1
186
187 #escape the filename (in case of space) and get just the basename
188 collection=$(get_col_basename $collection)
189
190 if [ ! -e model-collect/$collection ]; then
191 echo "update_single_collection: Adding new collection $collection to SVN"
192 return;
193 fi
194
195# return here if just deleting empty dirs
196#return
197
198 # diff the svn model and rebuilt model collections
199 diff_result=`diff -rq model-collect/$collection collect/$collection | grep -v ".svn"`
200# echo "Diff result for collection $collection: $diff_result"
201
202 # if no differences in the current collection, then we're done
203 if [ "x$diff_result" == "x" ]; then
204 echo "No differences in collection $collection"
205 return;
206 fi
207
208 # check that none of the lines mention files outside the archives or index folders
209 # http://en.gibney.org/tell_the_bash_to_split_by_newline_charac
210 # http://forums.gentoo.org/viewtopic-p-3130541.html
211
212 # http://wi-fizzle.com/article/276
213 # http://stackoverflow.com/questions/918886/how-do-i-split-a-string-on-a-delimiter-in-bash
214 # http://www.linuxquestions.org/questions/programming-9/split-a-string-on-newlines-bash-313206/
215 # http://unix.stackexchange.com/questions/39473/command-substitution-splitting-on-newline-but-not-space
216
217 # store backup of Internal Field Separator value, then set IFS to newline for splitting on newline
218
219 IFS_BAK=$IFS
220# IFS='\n' # splits on all whitespace
221IFS='
222'
223 # in the lines returned from the diff, test for archives or newline
224 # http://stackoverflow.com/questions/229551/string-contains-in-bash
225 for line in `diff -rq model-collect/$collection collect/$collection | grep -v ".svn"`; do
226 # echo "LINE: $line"
227 if [[ "$line" != *archives* && "$line" != *index* ]]; then
228 # the file that is different is neither in index nor in archives, send this diffline to the report
229 echo $line >> report.txt
230 fi
231 done
232
233 IFS=$IFS_BAK
234 IFS_BAK=
235}
236
237# Function that takes care of the --svnupdate flag mode of this script
238function update_single_collection () {
239
240 # copy across the contents of the rebuilt model-collection's index and archives to the svn model-collect
241 cp -r collect/$collection/archives/* model-collect/$collection/archives/.
242 cp -r collect/$collection/index/* model-collect/$collection/index/.
243
244 echo "svn model-collect update process complete. CHECK AND COMMIT THE model-collect FOLDER!"
245
246 # if etc/collect.cfg is different, copy it across too?
247}
248
249
250# re-build a single collection in "collect" which is a copy of model-collect
251function build_single_collection () {
252 collection=$1
253
254 collection=$(get_col_basename $collection)
255
256 import.pl -removeold $collection
257 buildcol.pl -removeold $collection
258 rm -rf collect/$collection/index
259 mv collect/$collection/building collect/$collection/index
260
261 echo
262 echo "*********************"
263 echo "Done processing $collection"
264 echo "*********************"
265 echo
266}
267
268
269# http://stackoverflow.com/questions/16483119/example-of-how-to-use-getopt-in-bash
270function usage() {
271# usage() { echo "Usage: $0 [-s <45|90>] [-p <string>]" 1>&2; exit 1; }
272
273 echo "*******************************************"
274 echo "Usage: $0 [--svnupdate|--svndelete] [col1, col2, col3,...]";
275 echo "If no collections are provided, all collections will be processed.";
276 echo "If neither svnupdate nor svndelete are provided, svnupdate is assumed.";
277 echo "*******************************************"
278 exit 1;
279}
280
281
282# The program starts here
283
284# process optional command line arguments
285# http://blog.onetechnical.com/2012/07/16/bash-getopt-versus-getopts/
286# Execute getopt
287ARGS=$(getopt -o ud -l "svnupdate,svndelete" -n "$0" -- "$@");
288
289#Bad arguments
290if [ $? -ne 0 ];then
291 usage
292 exit 1
293fi
294
295eval set -- "$ARGS";
296
297# mode can be svndelete or svnupdate
298mode=
299
300# -n: http://tldp.org/LDP/abs/html/testconstructs.html
301while true; do
302 case "$1" in
303 -d|--svndelete)
304 shift;
305 if [ "x$mode" == "xsvnupdate" ]; then
306 echo
307 echo "Can't use both svndelete and svnupdate"
308 usage
309 exit 1
310 else
311 mode=svndelete
312 fi
313 ;;
314 -u|--svnupdate)
315 shift;
316 if [ "x$mode" == "xsvndelete" ]; then
317 echo
318 echo "Can't use both svndelete and svnupdate"
319 usage
320 exit 1
321 else
322 mode=svnupdate
323 fi
324 ;;
325 --)
326 shift;
327 break;
328 ;;
329 esac
330done
331
332# If no mode provided (svndelete|svnupdate) as cmd line arg, then don't modify
333# the svn model-collect folder. Then this script stops after rebuilding the model-copy in collect
334
335# the remaining arguments to the script are assumed to be collections
336
337# debugging
338#for collection in "$@"; do
339# collection=collect/$collection
340# echo "Collection: $collection"
341#done
342
343# finished processing arguments
344
345
346# report will contain the output of the diff for
347if [ -f report.txt ]; then
348 rm report.txt
349fi
350
351# Need pdfbox for the PDFBox tutorial
352if [ ! -e ext/pdf-box ]; then
353 cd ext
354 if [ ! -e ext/pdf-box-java.tar.gz ]; then
355 wget http://trac.greenstone.org/export/head/gs2-extensions/pdf-box/trunk/pdf-box-java.tar.gz
356 tar -xvzf pdf-box-java.tar.gz
357 fi
358 cd ..
359fi
360
361
362# move the existing collect folder out of the way
363if [ -e collect ] && [ ! -e collect_orig ] ; then
364 mv collect collect_orig
365fi
366
367
368# get model-collect from svn
369# if we already have it, svn update the entire model-collect folder if processing all collections
370# or svn update just any collections specified in the model-collect folder
371if test -e model-collect; then
372 if [ "$1" == "" ]; then
373 svn up model-collect
374 else
375 for collection in "$@"; do
376 if [ -e model-collect/$collection ]; then
377 svn up model-collect/$collection
378 else
379 svn up model-collect
380 fi
381 done
382 fi
383else
384 svn co http://svn.greenstone.org/other-projects/nightly-tasks/diffcol/trunk/model-collect
385fi
386
387# Make a copy of the model-collect named as the new collect
388# (or if collections are specified in the cmdline arguments, copy just these over from model-collect into collect)
389# Then remove the copy's .svn folders
390if [ -e collect_orig ]; then
391 if [ ! -e collect ]; then
392 cp -r model-collect collect
393 find collect -name ".svn" -type d -exec rm -rf {} \; #2>&1 > /dev/null
394 else
395 if [ "$1" == "" ]; then
396 rm -rf collect
397 cp -r model-collect collect
398 find collect -name ".svn" -type d -exec rm -rf {} \;
399 else
400 for collection in "$@"; do
401 if [ -e collect/$collection ]; then
402 rm -rf collect/$collection
403 fi
404 cp -r model-collect/$collection collect/$collection
405 find collect/$collection -name ".svn" -type d -exec rm -rf {} \;
406 done
407 fi
408 fi
409fi
410
411# Set up the Greenstone environment for building
412source setup.bash
413
414# parse arguments
415# http://stackoverflow.com/questions/12711786/bash-convert-command-line-arguments-into-array
416# http://stackoverflow.com/questions/255898/how-to-iterate-over-arguments-in-bash-script
417
418if [ "$1" == "" ]; then
419
420 # all_collections
421 #for each collection, import, build, move building to index
422 for collection in collect/*; do
423 build_single_collection $collection;
424
425 if [ "x$mode" != "x" ]; then
426 svn_process_single_collection $collection
427
428 if [ "x$mode" == "xsvnupdate" ]; then
429 update_single_collection $collection
430 fi
431 fi
432 done
433
434 # having rebuilt all the collections, just the processing for svndelete/update remains:
435 if [ "x$mode" == "xsvndelete" ]; then
436 svn_delete
437 fi
438
439else
440 # Command-line args are a list of collections,
441 # process each command-line arg, after confirming such a collection exists
442
443 for collection in "$@"; do
444 collection=collect/$collection
445 if test -e $collection; then
446 build_single_collection $collection;
447
448 if [ "x$mode" != "x" ]; then
449 svn_process_single_collection $collection
450
451 if [ "x$mode" != "x" ]; then
452 update_single_collection $collection
453 fi
454 fi
455 else
456 echo "Can't find collection $collection. Skipping."
457 fi
458 done
459
460 # having rebuilt the specified collections above, just the processing for svndelete/update remains
461 if [ "x$mode" == "xsvndelete" ]; then
462 svn_delete $@
463 fi
464
465fi
466
467
468echo
469echo "*****************************************"
470# if we were svn updating/deleting collections, then mode was set
471# if in that case a report was generated with additional differences, point the user to it
472if [ -f report.txt ] && [ "x$mode" != "x" ]; then
473 echo "Some files outside of archives and index folders were different. See report.txt"
474 echo
475fi
476
477echo "The original collect directory has been left renamed as collect_orig"
478echo
479if [ "x$mode" == "x" ]; then
480 echo "The model-collect folder has not been altered. Changes have only been made to collect"
481else
482 echo "You still need to run svn status and svn commit on the model-collect folder"
483fi
484echo "*****************************************"
485echo
486
487
488# deletes empty dirs
489# find collect/$collection/archives/HASH* -type d -empty -delete
490# find collect/$collection/index/assoc/HASH* -type d -empty -delete
491
492# To recursively delete all empty dirs in the copy of model-collect (since the dirs will not have .svn folders in them anymore)
493# http://www.commandlinefu.com/commands/view/5131/recursively-remove-all-empty-directories
494#find collect -type d -empty -delete
495
496# The following when put in a separate script file will delete all folders from model-collect that are
497# empty in the copied collection (all folders which contain only a .svn subfolder in model-collect)
498#!/bin/bash
499
500#for collection in collect/*; do
501 #escape the filename (in case of space)
502# collection=`echo $collection | sed 's@ @\\\ @g'`
503
504 #get just the basename
505# collection=`basename $collection`
506
507 # HASH dirs that are empty in local collect's archives and index/assoc,
508 # need to be removed from the svn in model-collect
509
510# for line in `find collect/$collection/archives/HASH* -type d -empty`; do
511# modelline="model-$line"
512# echo "LINE: $modelline"
513
514 # remove from svn of model collect
515# svn rm $modelline
516## rm -rf $modelline
517 # remove physically from local collect
518# rm -rf $line
519# done
520
521# for line in `find collect/$collection/index/assoc/HASH* -type d -empty`; do
522# modelline="model-$line"
523# echo "LINE: $modelline"
524
525 # remove from svn of model collect
526# svn rm $modelline
527## rm -rf $modelline
528 # remove physically from local collect
529# rm -rf $line
530# done
531
532#done
Note: See TracBrowser for help on using the repository browser.