source: other-projects/nightly-tasks/diffcol/trunk/gen-model-colls.sh@ 28046

Last change on this file since 28046 was 28046, checked in by ak19, 11 years ago

Committing an intermediate version of the gen-model-colls script, which has some useful sections of code for future use, before committing the completely overhauled version of the same script file (currently being tested)

File size: 15.3 KB
Line 
1#!/bin/bash
2
3# PURPOSE
4# This is not a nightly script. You use it to regenerate the model-collections
5# if Greenstone has changed fundamentally, such as what HASH OIDs get assigned
6# to documents or something that changes the contents of the index and
7# archives folders. This has happened now with the commits
8# http://trac.greenstone.org/changeset/28022 and
9# http://trac.greenstone.org/changeset/28021
10# These commits generate new stable HASH OIDs for the existing documents.
11
12
13# USAGE
14# Put this file in the toplevel of the Greenstone 2 binary/compiled SVN installation
15# that you want to generate the model collections with.
16# You can provide a list of collection names or none, in which case all the collections
17# are processed.
18
19# Pass in --svnupdate to copy across the contents of archives and index in the
20# rebuilt collection, overwriting their equivalents in the svn model collection,
21# but not removing any extraneous HASH folders already present.
22
23# Pass in --svndelete to remove what's in archives and index from svn and replace
24# the contents of archives and index with the matching contents from the rebuild
25# collection. Useful for when the HASH directory naming has changed and everything
26# in archives and index has to be wiped out and moved back in from the rebuilt col.
27
28# If neither is passed in, then the collections are rebuilt but the svn model-collect
29# is not updated and the repository is not updated.
30
31# This program leaves you to do the final commit on the (svn) model-collect folder!
32
33# Examples of usage:
34# ./gen-model-colls.sh
35# ./gen-model-colls.sh --svndelete
36# ./gen-model-colls.sh --svnupdate Tudor-Basic Tudor-Enhanced
37
38# The first just rebuilds all the collections in a new folder called collect and stops there
39# The second rebuilds all the collections and then removes all contents of the archives and the
40# index folders from the svn checked-out model-collect and removes them from svn. Then it copies
41# across all the contents of the rebuilt archives and index into model-collect and svn adds them.
42# The third example checks out all the model-collections again, but rebuilds only the 3 collections
43# specified in the new collect folder. Then it copies across the contents of the archives and index
44# folders of those 3 collections into their model-collect equivalents.
45
46# You then still have to do the final svn commit on the model-collect folder.
47
48# Also valid examples:
49# ./gen-model-colls.sh Tudor-Basic Tudor-Enhanced
50# ./gen-model-colls.sh --svndelete Tudor-Basic Tudor-Enhanced
51# ./gen-model-colls.sh --svnupdate
52
53# PSEUDOCODE
54# This script:
55# Checks out the model-collections folder from SVN
56# Makes a copy
57# In the copy: gets rid of their .svn folders, and builds each collection in turn, moving building to index once done
58# If --svndelete was passed in: svn removes model-collect/archives/* and model-collect/index/*, copies over index/*
59# and archives/* from collect into model-collect and svn adds model-collect/archives/* and model-collect/index/*
60# If --svnupdate was passed in: copies collect/archives/* into model-collect/archives/*, and copies collect/index/*
61# into model-collect/index/*, overwriting files that already existed but have now been updated upon rebuild. However,
62# --svnupdate will leave untouched any files and folders unique to model-collect.
63
64# svn update/delete a single collection
65function update_single_collection () {
66 collection=$1
67 mode=$2
68
69 #escape the filename (in case of space)
70 collection=`echo $collection | sed 's@ @\\\ @g'`
71
72 #get just the basename
73 collection=`basename $collection`
74
75 if [ ! -e model-collect/$collection ]; then
76 echo "$collection does not exist in model-collect, commit separately"
77 return;
78 fi
79
80# return here if just deleting empty dirs
81#return
82
83 # diff the svn model and rebuilt model collections
84 diff_result=`diff -rq model-collect/$collection collect/$collection | grep -v ".svn"`
85# echo "Diff result for collection $collection: $diff_result"
86
87 # if no differences in the current collection, then we're done
88 if [ "x$diff_result" == "x" ]; then
89 echo "No differences in collection $collection"
90 return;
91 fi
92
93 # check that none of the lines mention files outside the archives or index folders
94 # http://en.gibney.org/tell_the_bash_to_split_by_newline_charac
95 # http://forums.gentoo.org/viewtopic-p-3130541.html
96
97 # http://wi-fizzle.com/article/276
98 # http://stackoverflow.com/questions/918886/how-do-i-split-a-string-on-a-delimiter-in-bash
99 # http://www.linuxquestions.org/questions/programming-9/split-a-string-on-newlines-bash-313206/
100 # http://unix.stackexchange.com/questions/39473/command-substitution-splitting-on-newline-but-not-space
101
102 # store backup of Internal Field Separator value, then set IFS to newline for splitting on newline
103
104 IFS_BAK=$IFS
105# IFS='\n' # splits on all whitespace
106IFS='
107'
108 # in the lines returned from the diff, test for archives or newline
109 # http://stackoverflow.com/questions/229551/string-contains-in-bash
110 for line in `diff -rq model-collect/$collection collect/$collection | grep -v ".svn"`; do
111 # echo "LINE: $line"
112 if [[ "$line" != *archives* && "$line" != *index* ]]; then
113 # the file that is different is neither in index nor in archives, send this diffline to the report
114 echo $line >> report.txt
115 fi
116 done
117
118 IFS=$IFS_BAK
119 IFS_BAK=
120
121 # Now svn remove what's unique to model-collect and svn add what's been rebuilt in index and archives
122 # see http://stackoverflow.com/questions/7502261/delete-folder-content-and-remove-from-version-control
123 # And doing: rm -rfv mydir/* should leave .hidden folders in mydir, see
124 # http://askubuntu.com/questions/72446/how-to-remove-all-files-and-subdirectories-in-a-directory-without-deleting-the-d
125 if [ "$mode" == "svndelete" ]; then
126 svn rm model-collect/$collection/archives/*
127 svn rm model-collect/$collection/index/*
128# rm -rfv model-collect/$collection/archives/*
129# rm -rfv model-collect/$collection/index/*
130 # from testing, the above will remove all the contents of archives and index
131 # except the .svn folder of toplevel archives/index
132 fi
133
134 # http://stackoverflow.com/questions/5044214/how-do-i-detect-and-or-delete-empty-subversion-directories
135 # http://stackoverflow.com/questions/1301203/removing-svn-files-from-all-directories
136
137 # copy across the contents of the rebuilt model-collection's index and archives to the svn model-collect
138 cp -r collect/$collection/archives/* model-collect/$collection/archives/.
139 cp -r collect/$collection/index/* model-collect/$collection/index/.
140
141 if [ "$mode" == "svndelete" ]; then
142 svn add model-collect/$collection/archives/*
143 svn add model-collect/$collection/index/*
144 fi
145
146 # To undo the changes made by svndelete, run the following manually
147 # svn revert --depth infinity <model-collect/$collection/archives/*
148 # svn revert --depth infinity <model-collect/$collection/archives/*
149 # then remove both the local archives and index, and do an svn up to get original checkout back
150
151 # if etc/collect.cfg is different, copy it across too?
152}
153
154
155# re-build a single collection in "collect" which is a copy of model-collect
156function single_collection () {
157 collection=$1
158
159 #escape the filename (in case of space)
160 collection=`echo $collection | sed 's@ @\\\ @g'`
161
162 #get just the basename
163 collection=`basename $collection`
164
165 # clean up empty directories:
166 # HASH dirs that are empty in local (non-svn) collect's archives and index/assoc,
167 # need to be removed from the svn in model-collect.
168 # They're almost empty in the mode-collect, except for the .svn folder they contain
169
170 for coldir in `find collect/$collection/archives/HASH* -type d -empty`; do
171 modeldir="model-$coldir"
172 echo "Removing empty dir from svn: $modeldir"
173
174 # remove from svn of model collect
175 svn rm $modeldir
176 rm -rf $modeldir
177 # remove physically from local collect (not necessary, as a rebuild will sort this out too)
178 rm -rf $coldir
179 done
180
181 for coldir in `find collect/$collection/index/assoc/HASH* -type d -empty`; do
182 modeldir="model-$coldir"
183 echo "Removing empty dir from svn: $modeldir"
184
185 # remove from svn of model collect
186 svn rm $modeldir
187 rm -rf $modeldir
188 # remove physically from local collect (not necessary, as a rebuild will sort this out too)
189 rm -rf $coldir
190 done
191
192 import.pl -removeold $collection
193 buildcol.pl -removeold $collection
194 rm -rf collect/$collection/index
195 mv collect/$collection/building collect/$collection/index
196
197 echo
198 echo "*********************"
199 echo "Done processing $collection"
200 echo "*********************"
201 echo
202}
203
204# unused
205# re-build all collections in "collect" which is a copy of model-collect
206function all_collections() {
207
208 #for each collection, import, build, move building to index
209 for collection in collect/*; do
210 single_collection $collection;
211 #update_single_collection $collection;
212 done
213}
214
215# http://stackoverflow.com/questions/16483119/example-of-how-to-use-getopt-in-bash
216function usage() {
217# usage() { echo "Usage: $0 [-s <45|90>] [-p <string>]" 1>&2; exit 1; }
218
219 echo "*******************************************"
220 echo "Usage: $0 [--svnupdate|--svndelete] [col1, col2, col3,...]";
221 echo "If no collections are provided, all collections will be processed.";
222 echo "If neither svnupdate nor svndelete are provided, svnupdate is assumed.";
223 echo "*******************************************"
224 exit 1;
225}
226
227
228# The program starts here
229
230# process optional command line arguments
231# http://blog.onetechnical.com/2012/07/16/bash-getopt-versus-getopts/
232# Execute getopt
233ARGS=$(getopt -o ud -l "svnupdate,svndelete" -n "$0" -- "$@");
234
235#Bad arguments
236if [ $? -ne 0 ];then
237 usage
238 exit 1
239fi
240
241eval set -- "$ARGS";
242
243# mode can be svndelete or svnupdate
244mode=
245
246# -n: http://tldp.org/LDP/abs/html/testconstructs.html
247while true; do
248 case "$1" in
249 -d|--svndelete)
250 shift;
251 if [ "x$mode" == "xsvnupdate" ]; then
252 echo
253 echo "Can't use both svndelete and svnupdate"
254 usage
255 exit 1
256 else
257 mode=svndelete
258 fi
259 ;;
260 -u|--svnupdate)
261 shift;
262 if [ "x$mode" == "xsvndelete" ]; then
263 echo
264 echo "Can't use both svndelete and svnupdate"
265 usage
266 exit 1
267 else
268 mode=svnupdate
269 fi
270 ;;
271 --)
272 shift;
273 break;
274 ;;
275 esac
276done
277
278# If no mode provided (svndelete|svnupdate) as cmd line arg, then don't modify
279# the svn model-collect folder. We leave it at rebuilding its copy in collect
280
281#http://www.cyberciti.biz/faq/linux-unix-sleep-bash-scripting/
282#if [ "x$mode" == "x" ]; then
283# echo
284# echo "***Mode svndelete or svnupdate not provided. Defaulting to svnupdate... 3s to change to your mind"
285# echo
286# sleep 3s
287# mode=svnupdate
288#fi
289
290# the remaining arguments to the script are assumed to be collections
291
292# debugging
293#for collection in "$@"; do
294# collection=collect/$collection
295# echo "Collection: $collection"
296#done
297
298# finished processing arguments
299
300
301# report will contain the output of the diff for
302if [ -f report.txt ]; then
303 rm report.txt
304fi
305
306# Need pdfbox for the PDFBox tutorial
307if [ ! -e ext/pdf-box ]; then
308 cd ext
309 if [ ! -e ext/pdf-box-java.tar.gz ]; then
310 wget http://trac.greenstone.org/export/head/gs2-extensions/pdf-box/trunk/pdf-box-java.tar.gz
311 tar -xvzf pdf-box-java.tar.gz
312 fi
313 cd ..
314fi
315
316
317if test -e model-collect; then
318 svn up model-collect
319else
320 svn co http://svn.greenstone.org/other-projects/nightly-tasks/diffcol/trunk/model-collect
321fi
322
323# move the existing collect folder out of the way
324if [ -e collect ] && [ ! -e collect_orig ] ; then
325 mv collect collect_orig
326fi
327
328# make a copy of the model-collect named as the new collect
329# and remove the copy's .svn folders
330if [ -e collect_orig ]; then
331 if [ -e collect ]; then
332 rm -rf collect
333 fi
334 cp -r model-collect collect
335fi
336
337#cd collect
338#find . -name ".svn" -type d -exec rm -rf {} \;
339#cd ..
340find collect -name ".svn" -type d -exec rm -rf {} \; #2>&1 > /dev/null
341
342
343# Now delete all empty dirs in the copy of model-collect (since the dirs will not have .svn folders in them anymore)
344# http://www.commandlinefu.com/commands/view/5131/recursively-remove-all-empty-directories
345#find collect -type d -empty -delete
346
347# Set up the Greenstone environment for building
348source setup.bash
349
350# parse arguments
351# http://stackoverflow.com/questions/12711786/bash-convert-command-line-arguments-into-array
352# http://stackoverflow.com/questions/255898/how-to-iterate-over-arguments-in-bash-script
353
354if [ "$1" == "" ]; then
355 # all_collections
356 #for each collection, import, build, move building to index
357 for collection in collect/*; do
358 single_collection $collection;
359 if [ "x$mode" != "x" ]; then
360 # Now delete all empty dirs in the copy of model-collect (since the dirs will not have .svn folders in them anymore)
361 # http://www.commandlinefu.com/commands/view/5131/recursively-remove-all-empty-directories
362# find collect/$collection/archives/HASH* -type d -empty -delete
363# find collect/$collection/index/assoc/HASH* -type d -empty -delete
364 update_single_collection $collection $mode;
365 fi
366 done
367else
368 # Command-line args are a list of collections,
369 # process each command-line arg, after confirming such a collection exists
370
371 for collection in "$@"; do
372 collection=collect/$collection
373 if test -e $collection; then
374 single_collection $collection;
375 if [ "x$mode" != "x" ]; then
376# find collect/$collection/archives/HASH* -type d -empty -delete
377# find collect/$collection/index/assoc/HASH* -type d -empty -delete
378 update_single_collection $collection $mode;
379 fi
380 else
381 echo "Can't find collection $collection. Skipping."
382 fi
383 done
384fi
385
386
387echo
388echo "*****************************************"
389# if we were svn updating/deleting collections, then mode was set
390# if in that case a report was generated with additional differences, point the user to it
391if [ -f report.txt ] && [ "x$mode" != "x" ]; then
392 echo "Some files outside of archives and index folders were different. See report.txt"
393 echo
394fi
395
396echo "The original collect directory has been left renamed as collect_orig"
397echo
398if [ "x$mode" == "x" ]; then
399 echo "The model-collect folder has not been altered. Changes have only been made to collect"
400else
401 echo "You still need to run svn status and svn commit on the model-collect folder"
402fi
403echo "*****************************************"
404echo
405
406
407# The following in a separate script file will delete all folders from model-collect that are
408# empty in the copied collection (all folders which contain only a .svn subfolder in model-collect)
409
410#for collection in collect/*; do
411 #escape the filename (in case of space)
412# collection=`echo $collection | sed 's@ @\\\ @g'`
413
414 #get just the basename
415# collection=`basename $collection`
416
417 # HASH dirs that are empty in local collect's archives and index/assoc,
418 # need to be removed from the svn in model-collect
419
420# for line in `find collect/$collection/archives/HASH* -type d -empty`; do
421# modelline="model-$line"
422# echo "LINE: $modelline"
423
424 # remove from svn of model collect
425# svn rm $modelline
426## rm -rf $modelline
427 # remove physically from local collect
428# rm -rf $line
429# done
430
431# for line in `find collect/$collection/index/assoc/HASH* -type d -empty`; do
432# modelline="model-$line"
433# echo "LINE: $modelline"
434
435 # remove from svn of model collect
436# svn rm $modelline
437## rm -rf $modelline
438 # remove physically from local collect
439# rm -rf $line
440# done
441
442#done
Note: See TracBrowser for help on using the repository browser.