1 | #!/bin/bash
|
---|
2 |
|
---|
3 | # PURPOSE
|
---|
4 | # This is not a nightly script. You use it to regenerate the model-collections
|
---|
5 | # if Greenstone has changed fundamentally, such as what HASH OIDs get assigned
|
---|
6 | # to documents or something that changes the contents of the index and
|
---|
7 | # archives folders. This has happened now with the commits
|
---|
8 | # http://trac.greenstone.org/changeset/28022 and
|
---|
9 | # http://trac.greenstone.org/changeset/28021
|
---|
10 | # These commits generate new stable HASH OIDs for the existing documents.
|
---|
11 |
|
---|
12 |
|
---|
13 | # USAGE
|
---|
14 | # Put this file in the toplevel of the Greenstone 2 binary/compiled SVN installation
|
---|
15 | # that you want to generate the model collections with.
|
---|
16 | # You can provide a list of collection names or none, in which case all the collections
|
---|
17 | # are processed.
|
---|
18 |
|
---|
19 | # Pass in --svnupdate to copy across the contents of archives and index in the
|
---|
20 | # rebuilt collection, overwriting their equivalents in the svn model collection,
|
---|
21 | # but not removing any extraneous HASH folders already present.
|
---|
22 |
|
---|
23 | # Pass in --svndelete to remove what's in archives and index from svn and replace
|
---|
24 | # the contents of archives and index with the matching contents from the rebuild
|
---|
25 | # collection. Useful for when the HASH directory naming has changed and everything
|
---|
26 | # in archives and index has to be wiped out and moved back in from the rebuilt col.
|
---|
27 |
|
---|
28 | # If neither is passed in, then the collections are rebuilt but the svn model-collect
|
---|
29 | # is not updated and the repository is not updated.
|
---|
30 |
|
---|
31 | # This program leaves you to do the final commit on the (svn) model-collect folder!
|
---|
32 |
|
---|
33 | # Examples of usage:
|
---|
34 | # ./gen-model-colls.sh
|
---|
35 | # ./gen-model-colls.sh --svndelete
|
---|
36 | # ./gen-model-colls.sh --svnupdate Tudor-Basic Tudor-Enhanced
|
---|
37 |
|
---|
38 | # The first just rebuilds all the collections in a new folder called collect and stops there
|
---|
39 | # The second rebuilds all the collections and then removes all contents of the archives and the
|
---|
40 | # index folders from the svn checked-out model-collect and removes them from svn. Then it copies
|
---|
41 | # across all the contents of the rebuilt archives and index into model-collect and svn adds them.
|
---|
42 | # The third example checks out all the model-collections again, but rebuilds only the 3 collections
|
---|
43 | # specified in the new collect folder. Then it copies across the contents of the archives and index
|
---|
44 | # folders of those 3 collections into their model-collect equivalents.
|
---|
45 |
|
---|
46 | # You then still have to do the final svn commit on the model-collect folder.
|
---|
47 |
|
---|
48 | # Also valid examples:
|
---|
49 | # ./gen-model-colls.sh Tudor-Basic Tudor-Enhanced
|
---|
50 | # ./gen-model-colls.sh --svndelete Tudor-Basic Tudor-Enhanced
|
---|
51 | # ./gen-model-colls.sh --svnupdate
|
---|
52 |
|
---|
53 | # PSEUDOCODE
|
---|
54 | # This script:
|
---|
55 | # Checks out the model-collections folder from SVN
|
---|
56 | # Makes a copy
|
---|
57 | # In the copy: gets rid of their .svn folders, and builds each collection in turn, moving building to index once done
|
---|
58 | # If --svndelete was passed in: svn removes model-collect/archives/* and model-collect/index/*, copies over index/*
|
---|
59 | # and archives/* from collect into model-collect and svn adds model-collect/archives/* and model-collect/index/*
|
---|
60 | # If --svnupdate was passed in: copies collect/archives/* into model-collect/archives/*, and copies collect/index/*
|
---|
61 | # into model-collect/index/*, overwriting files that already existed but have now been updated upon rebuild. However,
|
---|
62 | # --svnupdate will leave untouched any files and folders unique to model-collect.
|
---|
63 |
|
---|
64 | # svn update/delete a single collection
|
---|
65 | function update_single_collection () {
|
---|
66 | collection=$1
|
---|
67 | mode=$2
|
---|
68 |
|
---|
69 | #escape the filename (in case of space)
|
---|
70 | collection=`echo $collection | sed 's@ @\\\ @g'`
|
---|
71 |
|
---|
72 | #get just the basename
|
---|
73 | collection=`basename $collection`
|
---|
74 |
|
---|
75 | if [ ! -e model-collect/$collection ]; then
|
---|
76 | echo "$collection does not exist in model-collect, commit separately"
|
---|
77 | return;
|
---|
78 | fi
|
---|
79 |
|
---|
80 | # return here if just deleting empty dirs
|
---|
81 | #return
|
---|
82 |
|
---|
83 | # diff the svn model and rebuilt model collections
|
---|
84 | diff_result=`diff -rq model-collect/$collection collect/$collection | grep -v ".svn"`
|
---|
85 | # echo "Diff result for collection $collection: $diff_result"
|
---|
86 |
|
---|
87 | # if no differences in the current collection, then we're done
|
---|
88 | if [ "x$diff_result" == "x" ]; then
|
---|
89 | echo "No differences in collection $collection"
|
---|
90 | return;
|
---|
91 | fi
|
---|
92 |
|
---|
93 | # check that none of the lines mention files outside the archives or index folders
|
---|
94 | # http://en.gibney.org/tell_the_bash_to_split_by_newline_charac
|
---|
95 | # http://forums.gentoo.org/viewtopic-p-3130541.html
|
---|
96 |
|
---|
97 | # http://wi-fizzle.com/article/276
|
---|
98 | # http://stackoverflow.com/questions/918886/how-do-i-split-a-string-on-a-delimiter-in-bash
|
---|
99 | # http://www.linuxquestions.org/questions/programming-9/split-a-string-on-newlines-bash-313206/
|
---|
100 | # http://unix.stackexchange.com/questions/39473/command-substitution-splitting-on-newline-but-not-space
|
---|
101 |
|
---|
102 | # store backup of Internal Field Separator value, then set IFS to newline for splitting on newline
|
---|
103 |
|
---|
104 | IFS_BAK=$IFS
|
---|
105 | # IFS='\n' # splits on all whitespace
|
---|
106 | IFS='
|
---|
107 | '
|
---|
108 | # in the lines returned from the diff, test for archives or newline
|
---|
109 | # http://stackoverflow.com/questions/229551/string-contains-in-bash
|
---|
110 | for line in `diff -rq model-collect/$collection collect/$collection | grep -v ".svn"`; do
|
---|
111 | # echo "LINE: $line"
|
---|
112 | if [[ "$line" != *archives* && "$line" != *index* ]]; then
|
---|
113 | # the file that is different is neither in index nor in archives, send this diffline to the report
|
---|
114 | echo $line >> report.txt
|
---|
115 | fi
|
---|
116 | done
|
---|
117 |
|
---|
118 | IFS=$IFS_BAK
|
---|
119 | IFS_BAK=
|
---|
120 |
|
---|
121 | # Now svn remove what's unique to model-collect and svn add what's been rebuilt in index and archives
|
---|
122 | # see http://stackoverflow.com/questions/7502261/delete-folder-content-and-remove-from-version-control
|
---|
123 | # And doing: rm -rfv mydir/* should leave .hidden folders in mydir, see
|
---|
124 | # http://askubuntu.com/questions/72446/how-to-remove-all-files-and-subdirectories-in-a-directory-without-deleting-the-d
|
---|
125 | if [ "$mode" == "svndelete" ]; then
|
---|
126 | svn rm model-collect/$collection/archives/*
|
---|
127 | svn rm model-collect/$collection/index/*
|
---|
128 | # rm -rfv model-collect/$collection/archives/*
|
---|
129 | # rm -rfv model-collect/$collection/index/*
|
---|
130 | # from testing, the above will remove all the contents of archives and index
|
---|
131 | # except the .svn folder of toplevel archives/index
|
---|
132 | fi
|
---|
133 |
|
---|
134 | # http://stackoverflow.com/questions/5044214/how-do-i-detect-and-or-delete-empty-subversion-directories
|
---|
135 | # http://stackoverflow.com/questions/1301203/removing-svn-files-from-all-directories
|
---|
136 |
|
---|
137 | # copy across the contents of the rebuilt model-collection's index and archives to the svn model-collect
|
---|
138 | cp -r collect/$collection/archives/* model-collect/$collection/archives/.
|
---|
139 | cp -r collect/$collection/index/* model-collect/$collection/index/.
|
---|
140 |
|
---|
141 | if [ "$mode" == "svndelete" ]; then
|
---|
142 | svn add model-collect/$collection/archives/*
|
---|
143 | svn add model-collect/$collection/index/*
|
---|
144 | fi
|
---|
145 |
|
---|
146 | # To undo the changes made by svndelete, run the following manually
|
---|
147 | # svn revert --depth infinity <model-collect/$collection/archives/*
|
---|
148 | # svn revert --depth infinity <model-collect/$collection/archives/*
|
---|
149 | # then remove both the local archives and index, and do an svn up to get original checkout back
|
---|
150 |
|
---|
151 | # if etc/collect.cfg is different, copy it across too?
|
---|
152 | }
|
---|
153 |
|
---|
154 |
|
---|
155 | # re-build a single collection in "collect" which is a copy of model-collect
|
---|
156 | function single_collection () {
|
---|
157 | collection=$1
|
---|
158 |
|
---|
159 | #escape the filename (in case of space)
|
---|
160 | collection=`echo $collection | sed 's@ @\\\ @g'`
|
---|
161 |
|
---|
162 | #get just the basename
|
---|
163 | collection=`basename $collection`
|
---|
164 |
|
---|
165 | # clean up empty directories:
|
---|
166 | # HASH dirs that are empty in local (non-svn) collect's archives and index/assoc,
|
---|
167 | # need to be removed from the svn in model-collect.
|
---|
168 | # They're almost empty in the mode-collect, except for the .svn folder they contain
|
---|
169 |
|
---|
170 | for coldir in `find collect/$collection/archives/HASH* -type d -empty`; do
|
---|
171 | modeldir="model-$coldir"
|
---|
172 | echo "Removing empty dir from svn: $modeldir"
|
---|
173 |
|
---|
174 | # remove from svn of model collect
|
---|
175 | svn rm $modeldir
|
---|
176 | rm -rf $modeldir
|
---|
177 | # remove physically from local collect (not necessary, as a rebuild will sort this out too)
|
---|
178 | rm -rf $coldir
|
---|
179 | done
|
---|
180 |
|
---|
181 | for coldir in `find collect/$collection/index/assoc/HASH* -type d -empty`; do
|
---|
182 | modeldir="model-$coldir"
|
---|
183 | echo "Removing empty dir from svn: $modeldir"
|
---|
184 |
|
---|
185 | # remove from svn of model collect
|
---|
186 | svn rm $modeldir
|
---|
187 | rm -rf $modeldir
|
---|
188 | # remove physically from local collect (not necessary, as a rebuild will sort this out too)
|
---|
189 | rm -rf $coldir
|
---|
190 | done
|
---|
191 |
|
---|
192 | import.pl -removeold $collection
|
---|
193 | buildcol.pl -removeold $collection
|
---|
194 | rm -rf collect/$collection/index
|
---|
195 | mv collect/$collection/building collect/$collection/index
|
---|
196 |
|
---|
197 | echo
|
---|
198 | echo "*********************"
|
---|
199 | echo "Done processing $collection"
|
---|
200 | echo "*********************"
|
---|
201 | echo
|
---|
202 | }
|
---|
203 |
|
---|
204 | # unused
|
---|
205 | # re-build all collections in "collect" which is a copy of model-collect
|
---|
206 | function all_collections() {
|
---|
207 |
|
---|
208 | #for each collection, import, build, move building to index
|
---|
209 | for collection in collect/*; do
|
---|
210 | single_collection $collection;
|
---|
211 | #update_single_collection $collection;
|
---|
212 | done
|
---|
213 | }
|
---|
214 |
|
---|
215 | # http://stackoverflow.com/questions/16483119/example-of-how-to-use-getopt-in-bash
|
---|
216 | function usage() {
|
---|
217 | # usage() { echo "Usage: $0 [-s <45|90>] [-p <string>]" 1>&2; exit 1; }
|
---|
218 |
|
---|
219 | echo "*******************************************"
|
---|
220 | echo "Usage: $0 [--svnupdate|--svndelete] [col1, col2, col3,...]";
|
---|
221 | echo "If no collections are provided, all collections will be processed.";
|
---|
222 | echo "If neither svnupdate nor svndelete are provided, svnupdate is assumed.";
|
---|
223 | echo "*******************************************"
|
---|
224 | exit 1;
|
---|
225 | }
|
---|
226 |
|
---|
227 |
|
---|
228 | # The program starts here
|
---|
229 |
|
---|
230 | # process optional command line arguments
|
---|
231 | # http://blog.onetechnical.com/2012/07/16/bash-getopt-versus-getopts/
|
---|
232 | # Execute getopt
|
---|
233 | ARGS=$(getopt -o ud -l "svnupdate,svndelete" -n "$0" -- "$@");
|
---|
234 |
|
---|
235 | #Bad arguments
|
---|
236 | if [ $? -ne 0 ];then
|
---|
237 | usage
|
---|
238 | exit 1
|
---|
239 | fi
|
---|
240 |
|
---|
241 | eval set -- "$ARGS";
|
---|
242 |
|
---|
243 | # mode can be svndelete or svnupdate
|
---|
244 | mode=
|
---|
245 |
|
---|
246 | # -n: http://tldp.org/LDP/abs/html/testconstructs.html
|
---|
247 | while true; do
|
---|
248 | case "$1" in
|
---|
249 | -d|--svndelete)
|
---|
250 | shift;
|
---|
251 | if [ "x$mode" == "xsvnupdate" ]; then
|
---|
252 | echo
|
---|
253 | echo "Can't use both svndelete and svnupdate"
|
---|
254 | usage
|
---|
255 | exit 1
|
---|
256 | else
|
---|
257 | mode=svndelete
|
---|
258 | fi
|
---|
259 | ;;
|
---|
260 | -u|--svnupdate)
|
---|
261 | shift;
|
---|
262 | if [ "x$mode" == "xsvndelete" ]; then
|
---|
263 | echo
|
---|
264 | echo "Can't use both svndelete and svnupdate"
|
---|
265 | usage
|
---|
266 | exit 1
|
---|
267 | else
|
---|
268 | mode=svnupdate
|
---|
269 | fi
|
---|
270 | ;;
|
---|
271 | --)
|
---|
272 | shift;
|
---|
273 | break;
|
---|
274 | ;;
|
---|
275 | esac
|
---|
276 | done
|
---|
277 |
|
---|
278 | # If no mode provided (svndelete|svnupdate) as cmd line arg, then don't modify
|
---|
279 | # the svn model-collect folder. We leave it at rebuilding its copy in collect
|
---|
280 |
|
---|
281 | #http://www.cyberciti.biz/faq/linux-unix-sleep-bash-scripting/
|
---|
282 | #if [ "x$mode" == "x" ]; then
|
---|
283 | # echo
|
---|
284 | # echo "***Mode svndelete or svnupdate not provided. Defaulting to svnupdate... 3s to change to your mind"
|
---|
285 | # echo
|
---|
286 | # sleep 3s
|
---|
287 | # mode=svnupdate
|
---|
288 | #fi
|
---|
289 |
|
---|
290 | # the remaining arguments to the script are assumed to be collections
|
---|
291 |
|
---|
292 | # debugging
|
---|
293 | #for collection in "$@"; do
|
---|
294 | # collection=collect/$collection
|
---|
295 | # echo "Collection: $collection"
|
---|
296 | #done
|
---|
297 |
|
---|
298 | # finished processing arguments
|
---|
299 |
|
---|
300 |
|
---|
301 | # report will contain the output of the diff for
|
---|
302 | if [ -f report.txt ]; then
|
---|
303 | rm report.txt
|
---|
304 | fi
|
---|
305 |
|
---|
306 | # Need pdfbox for the PDFBox tutorial
|
---|
307 | if [ ! -e ext/pdf-box ]; then
|
---|
308 | cd ext
|
---|
309 | if [ ! -e ext/pdf-box-java.tar.gz ]; then
|
---|
310 | wget http://trac.greenstone.org/export/head/gs2-extensions/pdf-box/trunk/pdf-box-java.tar.gz
|
---|
311 | tar -xvzf pdf-box-java.tar.gz
|
---|
312 | fi
|
---|
313 | cd ..
|
---|
314 | fi
|
---|
315 |
|
---|
316 |
|
---|
317 | if test -e model-collect; then
|
---|
318 | svn up model-collect
|
---|
319 | else
|
---|
320 | svn co http://svn.greenstone.org/other-projects/nightly-tasks/diffcol/trunk/model-collect
|
---|
321 | fi
|
---|
322 |
|
---|
323 | # move the existing collect folder out of the way
|
---|
324 | if [ -e collect ] && [ ! -e collect_orig ] ; then
|
---|
325 | mv collect collect_orig
|
---|
326 | fi
|
---|
327 |
|
---|
328 | # make a copy of the model-collect named as the new collect
|
---|
329 | # and remove the copy's .svn folders
|
---|
330 | if [ -e collect_orig ]; then
|
---|
331 | if [ -e collect ]; then
|
---|
332 | rm -rf collect
|
---|
333 | fi
|
---|
334 | cp -r model-collect collect
|
---|
335 | fi
|
---|
336 |
|
---|
337 | #cd collect
|
---|
338 | #find . -name ".svn" -type d -exec rm -rf {} \;
|
---|
339 | #cd ..
|
---|
340 | find collect -name ".svn" -type d -exec rm -rf {} \; #2>&1 > /dev/null
|
---|
341 |
|
---|
342 |
|
---|
343 | # Now delete all empty dirs in the copy of model-collect (since the dirs will not have .svn folders in them anymore)
|
---|
344 | # http://www.commandlinefu.com/commands/view/5131/recursively-remove-all-empty-directories
|
---|
345 | #find collect -type d -empty -delete
|
---|
346 |
|
---|
347 | # Set up the Greenstone environment for building
|
---|
348 | source setup.bash
|
---|
349 |
|
---|
350 | # parse arguments
|
---|
351 | # http://stackoverflow.com/questions/12711786/bash-convert-command-line-arguments-into-array
|
---|
352 | # http://stackoverflow.com/questions/255898/how-to-iterate-over-arguments-in-bash-script
|
---|
353 |
|
---|
354 | if [ "$1" == "" ]; then
|
---|
355 | # all_collections
|
---|
356 | #for each collection, import, build, move building to index
|
---|
357 | for collection in collect/*; do
|
---|
358 | single_collection $collection;
|
---|
359 | if [ "x$mode" != "x" ]; then
|
---|
360 | # Now delete all empty dirs in the copy of model-collect (since the dirs will not have .svn folders in them anymore)
|
---|
361 | # http://www.commandlinefu.com/commands/view/5131/recursively-remove-all-empty-directories
|
---|
362 | # find collect/$collection/archives/HASH* -type d -empty -delete
|
---|
363 | # find collect/$collection/index/assoc/HASH* -type d -empty -delete
|
---|
364 | update_single_collection $collection $mode;
|
---|
365 | fi
|
---|
366 | done
|
---|
367 | else
|
---|
368 | # Command-line args are a list of collections,
|
---|
369 | # process each command-line arg, after confirming such a collection exists
|
---|
370 |
|
---|
371 | for collection in "$@"; do
|
---|
372 | collection=collect/$collection
|
---|
373 | if test -e $collection; then
|
---|
374 | single_collection $collection;
|
---|
375 | if [ "x$mode" != "x" ]; then
|
---|
376 | # find collect/$collection/archives/HASH* -type d -empty -delete
|
---|
377 | # find collect/$collection/index/assoc/HASH* -type d -empty -delete
|
---|
378 | update_single_collection $collection $mode;
|
---|
379 | fi
|
---|
380 | else
|
---|
381 | echo "Can't find collection $collection. Skipping."
|
---|
382 | fi
|
---|
383 | done
|
---|
384 | fi
|
---|
385 |
|
---|
386 |
|
---|
387 | echo
|
---|
388 | echo "*****************************************"
|
---|
389 | # if we were svn updating/deleting collections, then mode was set
|
---|
390 | # if in that case a report was generated with additional differences, point the user to it
|
---|
391 | if [ -f report.txt ] && [ "x$mode" != "x" ]; then
|
---|
392 | echo "Some files outside of archives and index folders were different. See report.txt"
|
---|
393 | echo
|
---|
394 | fi
|
---|
395 |
|
---|
396 | echo "The original collect directory has been left renamed as collect_orig"
|
---|
397 | echo
|
---|
398 | if [ "x$mode" == "x" ]; then
|
---|
399 | echo "The model-collect folder has not been altered. Changes have only been made to collect"
|
---|
400 | else
|
---|
401 | echo "You still need to run svn status and svn commit on the model-collect folder"
|
---|
402 | fi
|
---|
403 | echo "*****************************************"
|
---|
404 | echo
|
---|
405 |
|
---|
406 |
|
---|
407 | # The following in a separate script file will delete all folders from model-collect that are
|
---|
408 | # empty in the copied collection (all folders which contain only a .svn subfolder in model-collect)
|
---|
409 |
|
---|
410 | #for collection in collect/*; do
|
---|
411 | #escape the filename (in case of space)
|
---|
412 | # collection=`echo $collection | sed 's@ @\\\ @g'`
|
---|
413 |
|
---|
414 | #get just the basename
|
---|
415 | # collection=`basename $collection`
|
---|
416 |
|
---|
417 | # HASH dirs that are empty in local collect's archives and index/assoc,
|
---|
418 | # need to be removed from the svn in model-collect
|
---|
419 |
|
---|
420 | # for line in `find collect/$collection/archives/HASH* -type d -empty`; do
|
---|
421 | # modelline="model-$line"
|
---|
422 | # echo "LINE: $modelline"
|
---|
423 |
|
---|
424 | # remove from svn of model collect
|
---|
425 | # svn rm $modelline
|
---|
426 | ## rm -rf $modelline
|
---|
427 | # remove physically from local collect
|
---|
428 | # rm -rf $line
|
---|
429 | # done
|
---|
430 |
|
---|
431 | # for line in `find collect/$collection/index/assoc/HASH* -type d -empty`; do
|
---|
432 | # modelline="model-$line"
|
---|
433 | # echo "LINE: $modelline"
|
---|
434 |
|
---|
435 | # remove from svn of model collect
|
---|
436 | # svn rm $modelline
|
---|
437 | ## rm -rf $modelline
|
---|
438 | # remove physically from local collect
|
---|
439 | # rm -rf $line
|
---|
440 | # done
|
---|
441 |
|
---|
442 | #done |
---|