source: other-projects/nightly-tasks/diffcol/trunk/gen-model-colls.sh@ 28037

Last change on this file since 28037 was 28037, checked in by ak19, 11 years ago

The script to generate the MODEL collections now also handles the svn rm and svn add if passed the svndelete flag, and just the copying over of the rebuilt archives and index into the model-collect (svn) folder if passed in the svnupdate flag.

File size: 11.9 KB
Line 
1#!/bin/bash
2
3# PURPOSE
4# This is not a nightly script. You use it to regenerate the model-collections
5# if Greenstone has changed fundamentally, such as what HASH OIDs get assigned
6# to documents or something that changes the contents of the index and
7# archives folders. This has happened now with the commits
8# http://trac.greenstone.org/changeset/28022 and
9# http://trac.greenstone.org/changeset/28021
10# These commits generate new stable HASH OIDs for the existing documents.
11
12
13# USAGE
14# Put this file in the toplevel of the Greenstone 2 binary/compiled SVN installation
15# that you want to generate the model collections with.
16# You can provide a list of collection names or none, in which case all the collections
17# are processed.
18
19# Pass in --svnupdate to copy across the contents of archives and index in the
20# rebuilt collection, overwriting their equivalents in the svn model collection,
21# but not removing any extraneous HASH folders already present.
22
23# Pass in --svndelete to remove what's in archives and index from svn and replace
24# the contents of archives and index with the matching contents from the rebuild
25# collection. Useful for when the HASH directory naming has changed and everything
26# in archives and index has to be wiped out and moved back in from the rebuilt col.
27
28# If neither is passed in, then the collections are rebuilt but the svn model-collect
29# is not updated and the repository is not updated.
30
31# This program leaves you to do the final commit on the (svn) model-collect folder!
32
33# Examples of usage:
34# ./gen-model-colls.sh
35# ./gen-model-colls.sh --svndelete
36# ./gen-model-colls.sh --svnupdate Tudor-Basic Tudor-Enhanced
37
38# The first just rebuilds all the collections in a new folder called collect and stops there
39# The second rebuilds all the collections and then removes all contents of the archives and the
40# index folders from the svn checked-out model-collect and removes them from svn. Then it copies
41# across all the contents of the rebuilt archives and index into model-collect and svn adds them.
42# The third example checks out all the model-collections again, but rebuilds only the 3 collections
43# specified in the new collect folder. Then it copies across the contents of the archives and index
44# folders of those 3 collections into their model-collect equivalents.
45
46# You then still have to do the final svn commit on the model-collect folder.
47
48# Also valid examples:
49# ./gen-model-colls.sh Tudor-Basic Tudor-Enhanced
50# ./gen-model-colls.sh --svndelete Tudor-Basic Tudor-Enhanced
51# ./gen-model-colls.sh --svnupdate
52
53# PSEUDOCODE
54# This script:
55# Checks out the model-collections folder from SVN
56# Makes a copy
57# In the copy: gets rid of their .svn folders, and builds each collection in turn, moving building to index once done
58# If --svndelete was passed in: svn removes model-collect/archives/* and model-collect/index/*, copies over index/*
59# and archives/* from collect into model-collect and svn adds model-collect/archives/* and model-collect/index/*
60# If --svnupdate was passed in: copies collect/archives/* into model-collect/archives/*, and copies collect/index/*
61# into model-collect/index/*, overwriting files that already existed but have now been updated upon rebuild. However,
62# --svnupdate will leave untouched any files and folders unique to model-collect.
63
64# svn update/delete a single collection
65function update_single_collection () {
66 collection=$1
67 mode=$2
68
69 #escape the filename (in case of space)
70 collection=`echo $collection | sed 's@ @\\\ @g'`
71
72 #get just the basename
73 collection=`basename $collection`
74
75 if [ ! -e model-collect/$collection ]; then
76 echo "$collection does not exist in model-collect, commit separately"
77 return;
78 fi
79
80 # diff the svn model and rebuilt model collections
81 diff_result=`diff -rq model-collect/$collection collect/$collection | grep -v ".svn"`
82# echo "Diff result for collection $collection: $diff_result"
83
84 # if no differences in the current collection, then we're done
85 if [ "x$diff_result" == "x" ]; then
86 echo "No differences in collection $collection"
87 return;
88 fi
89
90 # check that none of the lines mention files outside the archives or index folders
91 # http://en.gibney.org/tell_the_bash_to_split_by_newline_charac
92 # http://forums.gentoo.org/viewtopic-p-3130541.html
93
94 # http://wi-fizzle.com/article/276
95 # http://stackoverflow.com/questions/918886/how-do-i-split-a-string-on-a-delimiter-in-bash
96 # http://www.linuxquestions.org/questions/programming-9/split-a-string-on-newlines-bash-313206/
97 # http://unix.stackexchange.com/questions/39473/command-substitution-splitting-on-newline-but-not-space
98
99 # store backup of Internal Field Separator value, then set IFS to newline for splitting on newline
100
101 IFS_BAK=$IFS
102# IFS='\n' # splits on all whitespace
103IFS='
104'
105 # in the lines returned from the diff, test for archives or newline
106 # http://stackoverflow.com/questions/229551/string-contains-in-bash
107 for line in `diff -rq model-collect/$collection collect/$collection | grep -v ".svn"`; do
108 # echo "LINE: $line"
109 if [[ "$line" != *archives* && "$line" != *index* ]]; then
110 # the file that is different is neither in index nor in archives, send this diffline to the report
111 echo $line >> report.txt
112 fi
113 done
114
115 IFS=$IFS_BAK
116 IFS_BAK=
117
118 # Now svn remove what's unique to model-collect and svn add what's been rebuilt in index and archives
119 # see # http://stackoverflow.com/questions/7502261/delete-folder-content-and-remove-from-version-control
120
121 if [ "$mode" == "svndelete" ]; then
122 svn rm model-collect/$collection/archives/*
123 svn rm model-collect/$collection/index/*
124 fi
125
126 # copy across the contents of the rebuilt model-collection's index and archives to the svn model-collect
127 cp -r collect/$collection/archives/* model-collect/$collection/archives/.
128 cp -r collect/$collection/index/* model-collect/$collection/index/.
129
130 if [ "$mode" == "svndelete" ]; then
131 svn add model-collect/$collection/archives/*
132 svn add model-collect/$collection/index/*
133 fi
134
135 # To undo the changes made by svndelete, run the following manually
136 # svn revert --depth infinity <model-collect/$collection/archives/*
137 # svn revert --depth infinity <model-collect/$collection/archives/*
138 # then remove both the local archives and index, and do an svn up to get original checkout back
139
140 # if etc/collect.cfg is different, copy it across too?
141
142
143}
144
145# re-build a single collection in "collect" which is a copy of model-collect
146function single_collection () {
147 collection=$1
148
149 #escape the filename (in case of space)
150 collection=`echo $collection | sed 's@ @\\\ @g'`
151
152 #get just the basename
153 collection=`basename $collection`
154
155 import.pl -removeold $collection
156 buildcol.pl -removeold $collection
157 rm -rf collect/$collection/index
158 mv collect/$collection/building collect/$collection/index
159
160 echo
161 echo "*********************"
162 echo "Done processing $collection"
163 echo "*********************"
164 echo
165}
166
167# unused
168# re-build all collections in "collect" which is a copy of model-collect
169function all_collections() {
170
171 #for each collection, import, build, move building to index
172 for collection in collect/*; do
173 single_collection $collection;
174 #update_single_collection $collection;
175 done
176}
177
178# http://stackoverflow.com/questions/16483119/example-of-how-to-use-getopt-in-bash
179function usage() {
180# usage() { echo "Usage: $0 [-s <45|90>] [-p <string>]" 1>&2; exit 1; }
181
182 echo "*******************************************"
183 echo "Usage: $0 [--svnupdate|--svndelete] [col1, col2, col3,...]";
184 echo "If no collections are provided, all collections will be processed.";
185 echo "If neither svnupdate nor svndelete are provided, svnupdate is assumed.";
186 echo "*******************************************"
187 exit 1;
188}
189
190
191# The program starts here
192
193# process optional command line arguments
194# http://blog.onetechnical.com/2012/07/16/bash-getopt-versus-getopts/
195# Execute getopt
196ARGS=$(getopt -o ud -l "svnupdate,svndelete" -n "$0" -- "$@");
197
198#Bad arguments
199if [ $? -ne 0 ];then
200 usage
201 exit 1
202fi
203
204eval set -- "$ARGS";
205
206# mode can be svndelete or svnupdate
207mode=
208
209# -n: http://tldp.org/LDP/abs/html/testconstructs.html
210while true; do
211 case "$1" in
212 -d|--svndelete)
213 shift;
214 if [ "x$mode" == "xsvnupdate" ]; then
215 echo
216 echo "Can't use both svndelete and svnupdate"
217 usage
218 exit 1
219 else
220 mode=svndelete
221 fi
222 ;;
223 -u|--svnupdate)
224 shift;
225 if [ "x$mode" == "xsvndelete" ]; then
226 echo
227 echo "Can't use both svndelete and svnupdate"
228 usage
229 exit 1
230 else
231 mode=svnupdate
232 fi
233 ;;
234 --)
235 shift;
236 break;
237 ;;
238 esac
239done
240
241# If no mode provided (svndelete|svnupdate) as cmd line arg, then don't modify
242# the svn model-collect folder. We leave it at rebuilding its copy in collect
243
244#http://www.cyberciti.biz/faq/linux-unix-sleep-bash-scripting/
245#if [ "x$mode" == "x" ]; then
246# echo
247# echo "***Mode svndelete or svnupdate not provided. Defaulting to svnupdate... 3s to change to your mind"
248# echo
249# sleep 3s
250# mode=svnupdate
251#fi
252
253# the remaining arguments to the script are assumed to be collections
254
255# debugging
256#for collection in "$@"; do
257# collection=collect/$collection
258# echo "Collection: $collection"
259#done
260
261# finished processing arguments
262
263
264# report will contain the output of the diff for
265if [ -f report.txt ]; then
266 rm report.txt
267fi
268
269# Need pdfbox for the PDFBox tutorial
270if [ ! -e ext/pdf-box ]; then
271 cd ext
272 if [ ! -e ext/pdf-box-java.tar.gz ]; then
273 wget http://trac.greenstone.org/export/head/gs2-extensions/pdf-box/trunk/pdf-box-java.tar.gz
274 tar -xvzf pdf-box-java.tar.gz
275 fi
276 cd ..
277fi
278
279
280if test -e model-collect; then
281 svn up model-collect
282else
283 svn co http://svn.greenstone.org/other-projects/nightly-tasks/diffcol/trunk/model-collect
284fi
285
286# move the existing collect folder out of the way
287if [ -e collect ] && [ ! -e collect_orig ] ; then
288 mv collect collect_orig
289fi
290
291# make a copy of the model-collect named as the new collect
292# and remove the copy's .svn folders
293if [ -e collect_orig ]; then
294 if [ -e collect ]; then
295 rm -rf collect
296 fi
297 cp -r model-collect collect
298fi
299
300#cd collect
301#find . -name ".svn" -type d -exec rm -rf {} \;
302#cd ..
303find collect -name ".svn" -type d -exec rm -rf {} \; #2>&1 > /dev/null
304
305
306# Set up the Greenstone environment for building
307source setup.bash
308
309# parse arguments
310# http://stackoverflow.com/questions/12711786/bash-convert-command-line-arguments-into-array
311# http://stackoverflow.com/questions/255898/how-to-iterate-over-arguments-in-bash-script
312
313if [ "$1" == "" ]; then
314 # all_collections
315 #for each collection, import, build, move building to index
316 for collection in collect/*; do
317 single_collection $collection;
318 if [ "x$mode" != "x" ]; then
319 update_single_collection $collection $mode;
320 fi
321 done
322else
323 # Command-line args are a list of collections,
324 # process each command-line arg, after confirming such a collection exists
325
326 for collection in "$@"; do
327 collection=collect/$collection
328 if test -e $collection; then
329 single_collection $collection;
330 if [ "x$mode" != "x" ]; then
331 update_single_collection $collection $mode;
332 fi
333 else
334 echo "Can't find collection $collection. Skipping."
335 fi
336 done
337fi
338
339
340echo
341echo "*****************************************"
342# if we were svn updating/deleting collections, then mode was set
343# if in that case a report was generated with additional differences, point the user to it
344if [ -f report.txt ] && [ "x$mode" != "x" ]; then
345 echo "Some files outside of archives and index folders were different. See report.txt"
346 echo
347fi
348
349echo "The original collect directory has been left renamed as collect_orig"
350echo
351if [ "x$mode" == "x" ]; then
352 echo "The model-collect folder has not been altered. Changes have only been made to collect"
353else
354 echo "You still need to run svn status and svn commit on the model-collect folder"
355fi
356echo "*****************************************"
357echo
358
359
Note: See TracBrowser for help on using the repository browser.