source: other-projects/nightly-tasks/diffcol/trunk/gen-model-colls.sh@ 28660

Last change on this file since 28660 was 28660, checked in by ak19, 10 years ago

Adding a force to the svn remove statements.

File size: 29.9 KB
RevLine 
[28026]1#!/bin/bash
2
3# PURPOSE
4# This is not a nightly script. You use it to regenerate the model-collections
5# if Greenstone has changed fundamentally, such as what HASH OIDs get assigned
6# to documents or something that changes the contents of the index and
7# archives folders. This has happened now with the commits
8# http://trac.greenstone.org/changeset/28022 and
9# http://trac.greenstone.org/changeset/28021
10# These commits generate new stable HASH OIDs for the existing documents.
11
12
13# USAGE
[28037]14# Put this file in the toplevel of the Greenstone 2 binary/compiled SVN installation
[28026]15# that you want to generate the model collections with.
[28037]16# You can provide a list of collection names or none, in which case all the collections
17# are processed.
[28026]18
[28037]19# Pass in --svnupdate to copy across the contents of archives and index in the
20# rebuilt collection, overwriting their equivalents in the svn model collection,
21# but not removing any extraneous HASH folders already present.
[28049]22# !!!!! IMPORTANT: if you pass in svnupdate, it leaves you to do the final commit on
23# the (svn) model-collect folder!
[28037]24
[28049]25# Pass in --svndelete to remove the archives and index from svn in the model-collect
26# and replace this with the rebuilt archives and index
27# The --svndelete is useful for when the HASH directory naming has changed and everything
[28037]28# in archives and index has to be wiped out and moved back in from the rebuilt col.
[28049]29# Passing in --svndelete will do the final commits on the model-collect folder.
[28037]30
[28049]31# If neither flag is passed in, then the collections are rebuilt but the svn model-collect
[28037]32# is not updated and the repository is not updated.
33
34# Examples of usage:
35# ./gen-model-colls.sh
36# ./gen-model-colls.sh --svndelete
37# ./gen-model-colls.sh --svnupdate Tudor-Basic Tudor-Enhanced
38
39# The first just rebuilds all the collections in a new folder called collect and stops there
40
[28049]41# The second rebuilds all the collections in collect and svn removes the archives and the index
42# folders in model-collect. Then it copies across the rebuilt archives and index into model-collect
43# and svn adds them.
[28037]44
[28049]45# The third example checks out all the model-collections again, but rebuilds only the 2 collections
46# specified in the new collect folder. Then it copies across the *contents* of the archives and
47# index folders of those 2 collections into their model-collect equivalents. You then still have to
48# do the final svn commit on the model-collect folder after looking over the differences.
49
[28037]50# Also valid examples:
51# ./gen-model-colls.sh Tudor-Basic Tudor-Enhanced
52# ./gen-model-colls.sh --svndelete Tudor-Basic Tudor-Enhanced
53# ./gen-model-colls.sh --svnupdate
54
[28026]55# PSEUDOCODE
56# This script:
57# Checks out the model-collections folder from SVN
58# Makes a copy
59# In the copy: gets rid of their .svn folders, and builds each collection in turn, moving building to index once done
[28049]60# If --svndelete was passed in: svn removes model-collect/archives and model-collect/index, copies over collect/index
61# and collect/archives into model-collect and svn adds model-collect/archives and model-collect/index. Then SVN COMMITS
62# model-collect/archives and model-collect/index.
[28037]63# If --svnupdate was passed in: copies collect/archives/* into model-collect/archives/*, and copies collect/index/*
64# into model-collect/index/*, overwriting files that already existed but have now been updated upon rebuild. However,
[28049]65# --svnupdate will leave untouched any files and folders unique to model-collect. No SVN commit, that's LEFT UP TO YOU.
[28026]66
[28048]67# See earlier version of this script:
68# To svn remove what's unique to model-collect and svn add what's been rebuilt in index and archives
69# see http://stackoverflow.com/questions/7502261/delete-folder-content-and-remove-from-version-control
70
71# http://stackoverflow.com/questions/5044214/how-do-i-detect-and-or-delete-empty-subversion-directories
72# http://stackoverflow.com/questions/1301203/removing-svn-files-from-all-directories
73
[28277]74# To checkout just this file and other files at this level from trac, see
75# http://stackoverflow.com/questions/11650156/svn-checkout-depth
76# http://svnbook.red-bean.com/en/1.7/svn.advanced.sparsedirs.html
77# So you would do:
78# svn co http://svn.greenstone.org/other-projects/nightly-tasks/diffcol/trunk diffcol --depth files
79
[28069]80#*******************************GLOBAL VARIABLES***************************
[28048]81
[28069]82# mode can be svndelete or svnupdate
83mode=
84debug_mode=0
85commit_message=
86
87#*****************************FUNCTIONS*****************************
88
[28048]89# DON'T ADD ANY FURTHER ECHO STATEMENTS IN FUNCTION get_col_basename
90# "you have to be really careful on what you have in this function, as having any code which will eventually echo will mean that you get incorrect return string."
91# see http://stackoverflow.com/questions/3236871/how-to-return-a-string-value-from-a-bash-function
92function get_col_basename () {
[28037]93 collection=$1
[28026]94
[28037]95 #escape the filename (in case of space)
96 collection=`echo $collection | sed 's@ @\\\ @g'`
97
98 #get just the basename
99 collection=`basename $collection`
[28026]100
[28048]101 # returning a string does not work in bash
102 # see http://stackoverflow.com/questions/3236871/how-to-return-a-string-value-from-a-bash-function
103
104 #return $collection
105 echo $collection
106}
107
[28081]108# model-collect>svn -R propset svn:ignore -F .customignore .
109# where .customignore is a file containing:
110# log
111# earliestDatestamp
112# cache
113# model-collect>svn proplist -v
114# shows the svn properties, including the svn:ignore property. So it shows what files svn will ignore
[28074]115function svn_add_new_collection () {
[28048]116
[28074]117 collection=$1
118
119 #escape the filename (in case of space) and get just the basename
120 collection=$(get_col_basename $collection)
121
122 if [ -e model-collect/$collection ];then
123 echo "svn_add_new_collection: $collection already exists in model-collect, can't add it to svn."
124 return
125 fi
126
[28081]127 # Using rsync to copy folders while excluding files/subfolders, BUT rsync is not available on lsb
128 # http://www.linuxquestions.org/questions/linux-software-2/copy-svn-working-dir-without-svn-hidden-dirs-and-files-620586/
129 # See also http://www.thegeekstuff.com/2011/01/rsync-exclude-files-and-folders/,
130 # section "Exclude multiple files and directories at the same time" (can also use a file to blacklist folders/files)
131
[28148]132 # for GS3 we have a custom ignore file
133# if [ "x$GSDL3SRCHOME" != "x" ]; then
134# mkdir model-collect/$collection
135# svn add --force model-collect/$collection
136# svn propset -R svn:ignore -F model-collect/.customignore model-collect/$collection
137#
138# if [ "x$commit_message" == "x" ]; then
139# commit_message="Adding new model collections 1/2: new empty collection dir with svn-ignore set."
140# fi
141# if [ "$debug_mode" -eq "0" ]; then
142# svn commit -m "AUTOCOMMIT by gen-model-colls.sh script. Message: $commit_message" model-collect/$collection
143# fi
144# fi
145
146
[28081]147 # need slash on end of src dir collect/$collection/ !
[28277]148 rsync -r --exclude=.svn/ --exclude=log/ --exclude=cached/--exclude=cache/ --exclude=earliestDatestamp --exclude=fail.log --exclude=collectionConfig.bak collect/$collection/ model-collect/$collection
[28081]149
150# find collect/$collection -name ".svn" -type d -exec rm -rf {} \;
151# cp -r collect/$collection model-collect/$collection
152
153 # http://www.thegeekstuff.com/2010/06/bash-array-tutorial/
154# ignorelist=('log' 'cache' 'archives/earliestDatestamp');
155# for ignored in "${ignorelist[@]}"; do
156# if [ -f model-collect/$collection/$ignorelist ]; then
157# rm model-collect/$collection/$ignorelist
158# elif [ -d model-collect/$collection/$ignorelist ]; then
159# rm -rf model-collect/$collection/$ignorelist
160# fi
161# done
162
163 svn add --force model-collect/$collection
164
165 # http://stackoverflow.com/questions/15880249/subclipse-svn-first-commit-ignore-certain-directories
166 # http://wolfram.kriesing.de/blog/index.php/2005/svnignore-and-svnkeywords
167 # http://boblet.tumblr.com/post/35755799/setting-up-and-using-svn-ignore-with-subversion
168 # http://www.petefreitag.com/item/662.cfm
169 # http://svnbook.red-bean.com/en/1.7/svn.advanced.props.special.ignore.html
170 # http://stackoverflow.com/questions/116074/how-to-ignore-a-directory-with-svn
171
172 # Dr Bainbridge's way of doing an svn ignore is better and involves fewer steps:
173 # create the empty collection folder (-p for subcollections), svn add it,
174 # svn:ignore all the files to be ignored
175 # copy the contents of the collection across,
176 # do an svn add --force on the collection folder
177
178 #mkdir -p model-collect/$collection
179 #svn add model-collect/$collection
180 #ignorelist=('log' 'cache' 'archives/earliestDatestamp');
181 #for ignored in "${ignorelist[@]}"; do
182 # svn propset svn:ignore $ignorelist model-collect/$collection/.
183 #done
184 # cp -r collect/$collection/* model-collect/$collection/*
185 #svn add --force model-collect/$collection
[28074]186}
187
[28049]188# Function that handles the --svndelete flag (mode) of this script for a single collection
[28048]189function svn_delete () {
190
191 # svn remove archives and index in each collection
192 # commit them all
193 # copy over newly rebuilt archives and index into each model-collection
194 # svn add the new archives and index folders of each collection
195 # commit them all
196
197
198 if [ "x$1" == "x" ]; then
199 for collection in collect/*; do
200 _del_col_archives_index $collection
201 done
202 else
203 for collection in "$@"; do
204 _del_col_archives_index $collection
205 done
206 fi
207
[28069]208 # svn commit all the svn rm statements done above in one go:
209 # don't do `svn up` at this point, as doing so will then retrieve all the folders that just were svn-removed
[28048]210
[28069]211 if [ "x$commit_message" == "x" ]; then
212 commit_message="Clean rebuild of model collections 1/2. Clearing out deprecated archives and index."
213 fi
[28048]214
[28069]215 # Numerical comparisons: http://tldp.org/LDP/abs/html/comparison-ops.html
216 if [ "$debug_mode" -eq "0" ]; then
217 svn commit -m "AUTOCOMMIT by gen-model-colls.sh script. Message: $commit_message" model-collect
218 fi
219
220 # Having svn committed the deletes, do an svn up to locally delete what was svn-removed above,
221 # BEFORE copying from the rebuilt archives and index folders
222 if [ "$debug_mode" -eq "0" ]; then
223 svn up model-collect
224 fi
225
226 # copy from the rebuilt archives and index over into the svn model-collect and svn add them
[28048]227 if [ "x$1" == "x" ]; then
228 for collection in collect/*; do
229 _add_col_archives_index $collection
230 done
231 else
232 for collection in "$@"; do
233 _add_col_archives_index $collection
234 done
235 fi
236
237 # commit all the svn add statements done just above in one go
[28069]238 if [ "x$commit_message" == "x" ]; then
239 commit_message="Clean rebuild of model collections 2/2. Adding rebuilt archives and index."
240 fi
[28049]241
[28069]242 if [ "$debug_mode" -eq "0" ]; then
243 svn commit -m "AUTOCOMMIT by gen-model-colls.sh script. Message: $commit_message" model-collect
244 fi
245
[28049]246 echo
247 echo "*********************"
248 echo "Done svn-deleting rebuilt model-collection: $collection"
249 echo "*********************"
250 echo
[28048]251}
252
253# To undo the changes made by svndelete, run the following manually
254# svn revert --depth infinity <model-collect/$collection/archives/*
255# svn revert --depth infinity <model-collect/$collection/archives/*
256# then remove both the local archives and index, and do an svn up to get original checkout back
257
258# svn delete this collection's archives and index folders
259# (The commit will be done when in one step for all collections on which this function was called)
260function _del_col_archives_index () {
261 collection=$1
262
263 #escape the filename (in case of space) and get just the basename
264 collection=$(get_col_basename $collection)
265
[28037]266 if [ ! -e model-collect/$collection ]; then
[28070]267 echo "del_col_archives_index: $collection does not exist in model-collect"
[28037]268 return;
269 fi
270
[28048]271 # remove the entire archives and index folders from svn
[28069]272 if [ "$debug_mode" -eq "0" ]; then
[28660]273 svn rm --force model-collect/$collection/archives
274 svn rm --force model-collect/$collection/index
[28069]275 elif [ "$debug_mode" -eq "1" ]; then
276 rm -rf model-collect/$collection/archives
277 rm -rf model-collect/$collection/index
278 fi
[28048]279
280}
281
282
283# copy and then svn add the collection's archives and index folders
284function _add_col_archives_index () {
285 collection=$1
286
287 #escape the filename (in case of space) and get just the basename
288 collection=$(get_col_basename $collection)
289
290 if [ ! -e model-collect/$collection ]; then
[28070]291 echo "add_col_archives_index: $collection does not exist in model-collect"
[28048]292 return;
293 fi
294
[28240]295 # copy across collect.cfg file if it has been modified
296 `diff collect/$collection/etc/collect.cfg model-collect/$collection/etc/collect.cfg`
297 status=$?
298 if [ "$status" -eq "1" ]; then
299 cp collect/$collection/etc/collect.cfg model-collect/$collection/etc/collect.cfg
300 fi
301
[28105]302 # copy across the entire rebuilt index and archives folders to the svn model-collect
[28277]303 rsync -r --exclude=.svn/ --exclude=cached/ --exclude=cache/ --exclude=earliestDatestamp collect/$collection/archives model-collect/$collection
304 rsync -r --exclude=.svn/ --exclude=cached/ --exclude=cache/ collect/$collection/index model-collect/$collection
[28069]305
[28081]306 # need a --force to skip all the svn:ignored files (archives/earliestDatestamp)
307 # when doing the recursive svn add on the archives and index directories
[28069]308 if [ "$debug_mode" -eq "0" ]; then
[28081]309 svn add --force model-collect/$collection/archives
310 svn add --force model-collect/$collection/index
[28069]311 fi
[28048]312}
313
314
[28049]315# UNUSED, but useful for spotting differences between the collect and model-collect
316# after rebuild, before svn updating/deleting, as opposed to at the end of the script
[28048]317function svn_process_single_collection () {
318 collection=$1
319
320 #escape the filename (in case of space) and get just the basename
321 collection=$(get_col_basename $collection)
322
323 if [ ! -e model-collect/$collection ]; then
[28070]324 echo "svn_process_single_collection: $collection does not exist in model-collect"
[28048]325 return;
326 fi
327
[28046]328# return here if just deleting empty dirs
329#return
330
[28037]331 # diff the svn model and rebuilt model collections
332 diff_result=`diff -rq model-collect/$collection collect/$collection | grep -v ".svn"`
333# echo "Diff result for collection $collection: $diff_result"
334
335 # if no differences in the current collection, then we're done
336 if [ "x$diff_result" == "x" ]; then
337 echo "No differences in collection $collection"
338 return;
339 fi
340
341 # check that none of the lines mention files outside the archives or index folders
342 # http://en.gibney.org/tell_the_bash_to_split_by_newline_charac
343 # http://forums.gentoo.org/viewtopic-p-3130541.html
344
345 # http://wi-fizzle.com/article/276
346 # http://stackoverflow.com/questions/918886/how-do-i-split-a-string-on-a-delimiter-in-bash
347 # http://www.linuxquestions.org/questions/programming-9/split-a-string-on-newlines-bash-313206/
348 # http://unix.stackexchange.com/questions/39473/command-substitution-splitting-on-newline-but-not-space
349
350 # store backup of Internal Field Separator value, then set IFS to newline for splitting on newline
351
352 IFS_BAK=$IFS
353# IFS='\n' # splits on all whitespace
354IFS='
355'
356 # in the lines returned from the diff, test for archives or newline
357 # http://stackoverflow.com/questions/229551/string-contains-in-bash
358 for line in `diff -rq model-collect/$collection collect/$collection | grep -v ".svn"`; do
359 # echo "LINE: $line"
360 if [[ "$line" != *archives* && "$line" != *index* ]]; then
361 # the file that is different is neither in index nor in archives, send this diffline to the report
362 echo $line >> report.txt
363 fi
364 done
365
366 IFS=$IFS_BAK
367 IFS_BAK=
[28048]368}
[28037]369
[28049]370# Function that takes care of the --svnupdate flag mode of this script for a single collection
[28048]371function update_single_collection () {
[28070]372 collection=$1
373
374 #escape the filename (in case of space) and get just the basename
375 collection=$(get_col_basename $collection)
[28037]376
[28070]377 if [ ! -e model-collect/$collection ]; then
378 echo "update_single_collection: $collection does not exist in model-collect"
379 return;
380 fi
381
[28240]382 # copy across collect.cfg file if it has been modified
383 `diff collect/$collection/etc/collect.cfg model-collect/$collection/etc/collect.cfg`
384 status=$?
385 if [ "$status" -eq "1" ]; then
386 cp collect/$collection/etc/collect.cfg model-collect/$collection/etc/collect.cfg
387 fi
388
[28037]389 # copy across the contents of the rebuilt model-collection's index and archives to the svn model-collect
[28277]390 rsync -r --exclude=.svn/ --exclude=cached/ --exclude=cache/ --exclude=earliestDatestamp collect/$collection/archives/* model-collect/$collection/archives
391 rsync -r --exclude=.svn/ --exclude=cached/ --exclude=cache/ collect/$collection/index/* model-collect/$collection/index
[28037]392
[28069]393 # now svn add any and all the NEW items in model-collect's archives and index
394 # see http://stackoverflow.com/questions/1071857/how-do-i-svn-add-all-unversioned-files-to-svn
[28081]395 # see also http://stackoverflow.com/questions/116074/how-to-ignore-a-directory-with-svn
[28069]396# if [ "$debug_mode" -eq "0" ]; then
397 svn add --force model-collect/$collection/archives/* --auto-props --parents --depth infinity -q
398 svn add --force model-collect/$collection/index/* --auto-props --parents --depth infinity -q
399# fi
400
[28048]401 echo "svn model-collect update process complete. CHECK AND COMMIT THE model-collect FOLDER!"
[28037]402
403 # if etc/collect.cfg is different, copy it across too?
[28049]404
405 echo
406 echo "*********************"
407 echo "Done updating the rebuilt LOCAL model-collection: model-collect/$collection"
408 echo "*********************"
409 echo
[28046]410}
[28037]411
412
413# re-build a single collection in "collect" which is a copy of model-collect
[28048]414function build_single_collection () {
415 collection=$1
416
417 collection=$(get_col_basename $collection)
[28148]418
419 # GS2 or GS3 building
420 if [ "x$GSDL3SRCHOME" == "x" ]; then
421 import.pl -removeold $collection;
422 buildcol.pl -removeold $collection;
423 else
424 import.pl -site localsite -removeold $collection
425 buildcol.pl -site localsite -removeold $collection
426 fi
427
[28026]428 rm -rf collect/$collection/index
429 mv collect/$collection/building collect/$collection/index
430
431 echo
432 echo "*********************"
[28049]433 echo "Done rebuilding model collection: $collection"
[28026]434 echo "*********************"
435 echo
436}
437
438
[28037]439# http://stackoverflow.com/questions/16483119/example-of-how-to-use-getopt-in-bash
440function usage() {
441# usage() { echo "Usage: $0 [-s <45|90>] [-p <string>]" 1>&2; exit 1; }
[28026]442
[28037]443 echo "*******************************************"
[28133]444 echo "Usage: $0 [--svnupdate|--svndelete|--svnaddnew] [--debug] [--message 'custom commit message'] [col1, col2, col3,...]";
[28037]445 echo "If no collections are provided, all collections will be processed.";
[28227]446 echo "If none of svnupdate, svndelete or svnaddnew are provided,"
447 echo "the specified collections are just rebuilt in the collect folder.";
[28037]448 echo "*******************************************"
449 exit 1;
450}
451
452
[28069]453#*******************************MAIN PROGRAM***************************
[28027]454
[28037]455# process optional command line arguments
456# http://blog.onetechnical.com/2012/07/16/bash-getopt-versus-getopts/
457# Execute getopt
[28074]458ARGS=$(getopt -o m:uxadh -l "message:,svnupdate,svndelete,svnaddnew,debug,help" -n "$0" -- "$@");
[28037]459
460#Bad arguments
461if [ $? -ne 0 ];then
462 usage
463 exit 1
464fi
465
466eval set -- "$ARGS";
467
468
469# -n: http://tldp.org/LDP/abs/html/testconstructs.html
470while true; do
471 case "$1" in
[28073]472 -h|--help)
473 shift;
474 usage
475 exit 0
476 ;;
[28074]477 -a|--svnaddnew)
478 shift;
479 if [ "x$mode" == "xsvnupdate" ] || [ "x$mode" == "xsvndelete" ]; then
480 echo
481 echo "Can't use both svnaddnew and svndelete/svnupdate"
482 usage
483 exit 1
484 else
485 mode=svnaddnew
486# echo "svnaddnew not yet implemented"
487# exit 0
488 fi
489 ;;
[28069]490 -x|--svndelete)
[28037]491 shift;
[28074]492 if [ "x$mode" == "xsvnupdate" ] || [ "x$mode" == "xsvnaddnew" ]; then
[28037]493 echo
[28074]494 echo "Can't use both svndelete and svnupdate/svnaddnew"
[28037]495 usage
496 exit 1
497 else
498 mode=svndelete
499 fi
500 ;;
501 -u|--svnupdate)
502 shift;
[28074]503 if [ "x$mode" == "xsvndelete" ] || [ "x$mode" == "xsvnaddnew" ]; then
[28037]504 echo
[28074]505 echo "Can't use both svnupdate and svndelete/svnaddnew"
[28037]506 usage
507 exit 1
508 else
509 mode=svnupdate
510 fi
511 ;;
[28069]512 -d|--debug)
513 shift;
514 debug_mode=1
515 ;;
516 -m|--message)
517 shift;
518 if [ -n "$1" ]; then
519 commit_message=$1
520 shift;
521 fi
522 ;;
[28037]523 --)
524 shift;
525 break;
526 ;;
527 esac
528done
529
[28069]530#echo "commit message: $commit_message"
531#echo "Debug mode is: $debug_mode"
532#exit
533
[28070]534
[28148]535# Set up the Greenstone environment, this is mainly for building, but also for locating a
536# Greenstone installation folder, in case this script doesn't live in one.
537# Then cd into the collect folder's parent for the Greenstone installation.
538# Test for GS3 home env then for GS2 home and if found, cd into the GS2/GS3 home location and
539# run setup, else try to find setup.bash/gs3-setup.bash in the current location and run it.
[28133]540# Else print a warning message saying that GSDLHOME is not set.
[28148]541if [ "$GSDL3SRCHOME" != "" ]; then
542 echo "cd-ing into Greenstone 3 home directory: $GSDL3SRCHOME"
543 cd "$GSDL3SRCHOME/web/sites/localsite"
544elif [ "$GSDLHOME" != "" ]; then
[28133]545 echo "cd-ing into Greenstone home directory: $GSDLHOME"
546 cd "$GSDLHOME"
547else
[28240]548 echo "** No GS envvars set. Attempting to source the Greenstone setup script"
[28148]549 if [ -e gs3-setup.sh ]; then
550 source ./gs3-setup.sh
551 cd "$GSDL3SRCHOME/web/sites/localsite"
552 elif [ -e setup.bash ]; then
[28133]553 source ./setup.bash
554 else
[28148]555 echo "No Greenstone Home set and no setup script found in current folder."
556 echo "You need to source the setup script in a Greenstone installation. Exiting."
[28133]557 exit -1
558 fi
559fi
560
561
[28037]562# If no mode provided (svndelete|svnupdate) as cmd line arg, then don't modify
[28048]563# the svn model-collect folder. Then this script stops after rebuilding the model-copy in collect
[28037]564
565# the remaining arguments to the script are assumed to be collections
566
567# debugging
568#for collection in "$@"; do
569# collection=collect/$collection
570# echo "Collection: $collection"
571#done
572
573# finished processing arguments
574
575
576# report will contain the output of the diff for
577if [ -f report.txt ]; then
578 rm report.txt
579fi
580
[28148]581# Need the pdfbox extension for the PDFBox tutorial
582# The PDFBox ext has to be placed in the *GSDLHOME*/ext,
583# also in GS3's case where GSDLHOME is GS3/gs2build/ext
584# Go into ext and at end popd to get back into the collect folder's parent for the GS installation
585if [ ! -e "$GSDLHOME/ext/pdf-box" ]; then
[28240]586 echo "** Getting PDFBox"
[28148]587 pushd "$GSDLHOME/ext"
588 if [ ! -e "$GSDLHOME/ext/pdf-box-java.tar.gz" ]; then
[28027]589 wget http://trac.greenstone.org/export/head/gs2-extensions/pdf-box/trunk/pdf-box-java.tar.gz
590 fi
[28148]591 tar -xvzf pdf-box-java.tar.gz
592 popd
[28027]593fi
594
595
[28026]596# move the existing collect folder out of the way
[28074]597# unless we are adding a new collection to svn, in which case, we'll grab them from whatever collect folder exists
598if [ "x$mode" != "xsvnaddnew" ] && [ -e collect ] && [ ! -e collect_orig ]; then
[28240]599 echo "** Moving collect out of the way"
[28026]600 mv collect collect_orig
601fi
602
[28048]603
604# get model-collect from svn
605# if we already have it, svn update the entire model-collect folder if processing all collections
606# or svn update just any collections specified in the model-collect folder
607if test -e model-collect; then
[28240]608 echo "** SVN updating model-collect"
[28048]609 if [ "$1" == "" ]; then
610 svn up model-collect
611 else
612 for collection in "$@"; do
[28069]613 svn up model-collect/$collection
[28048]614 done
615 fi
616else
[28240]617 echo "** Getting the model-collect folder from SVN"
[28148]618 if [ "$GSDL3SRCHOME" != "" ]; then
619 svn co http://svn.greenstone.org/other-projects/nightly-tasks/diffcol/trunk/gs3-model-collect model-collect
620 else
621 svn co http://svn.greenstone.org/other-projects/nightly-tasks/diffcol/trunk/model-collect
622 fi
[28048]623fi
624
[28133]625
[28081]626# Not using rsync to copy folders while excluding files/subfolders, since rsync is not available on lsb
627# http://www.linuxquestions.org/questions/linux-software-2/copy-svn-working-dir-without-svn-hidden-dirs-and-files-620586/
628# rsync -r --exclude=.svn/ model-collect/ collect
629
[28048]630# Make a copy of the model-collect named as the new collect
631# (or if collections are specified in the cmdline arguments, copy just these over from model-collect into collect)
632# Then remove the copy's .svn folders
[28074]633if [ "x$mode" != "xsvnaddnew" ] && [ -e collect_orig ]; then
634
635 echo "***********************************************"
636 echo "Creating a copy of the model-collect folder as folder collect and removing the .svn subfolders from the copy:"
637 echo
638
[28048]639 if [ ! -e collect ]; then
640 cp -r model-collect collect
641 find collect -name ".svn" -type d -exec rm -rf {} \; #2>&1 > /dev/null
642 else
643 if [ "$1" == "" ]; then
644 rm -rf collect
645 cp -r model-collect collect
646 find collect -name ".svn" -type d -exec rm -rf {} \;
647 else
648 for collection in "$@"; do
649 if [ -e collect/$collection ]; then
650 rm -rf collect/$collection
651 fi
652 cp -r model-collect/$collection collect/$collection
653 find collect/$collection -name ".svn" -type d -exec rm -rf {} \;
654 done
655 fi
[28026]656 fi
[28074]657 echo "***********************************************"
[28026]658fi
659
[28074]660
[28026]661# parse arguments
662# http://stackoverflow.com/questions/12711786/bash-convert-command-line-arguments-into-array
663# http://stackoverflow.com/questions/255898/how-to-iterate-over-arguments-in-bash-script
664
665if [ "$1" == "" ]; then
[28048]666
667 # all_collections
[28037]668 #for each collection, import, build, move building to index
669 for collection in collect/*; do
[28048]670 build_single_collection $collection;
671
[28037]672 if [ "x$mode" != "x" ]; then
[28049]673 #svn_process_single_collection $collection
[28048]674
675 if [ "x$mode" == "xsvnupdate" ]; then
676 update_single_collection $collection
[28074]677 elif [ "x$mode" == "xsvnaddnew" ]; then
678 svn_add_new_collection $collection
[28048]679 fi
[28037]680 fi
681 done
[28048]682
[28074]683 # having rebuilt all the collections, just the processing for svndelete remains:
[28048]684 if [ "x$mode" == "xsvndelete" ]; then
685 svn_delete
686 fi
687
[28026]688else
689 # Command-line args are a list of collections,
690 # process each command-line arg, after confirming such a collection exists
691
[28037]692 for collection in "$@"; do
693 collection=collect/$collection
[28026]694 if test -e $collection; then
[28048]695 build_single_collection $collection;
696
[28037]697 if [ "x$mode" != "x" ]; then
[28049]698 #svn_process_single_collection $collection
[28048]699
[28049]700 if [ "x$mode" == "xsvnupdate" ]; then
[28048]701 update_single_collection $collection
[28074]702 elif [ "x$mode" == "xsvnaddnew" ]; then
703 svn_add_new_collection $collection
[28048]704 fi
[28037]705 fi
[28026]706 else
[28074]707 echo
[28026]708 echo "Can't find collection $collection. Skipping."
[28074]709 echo
[28026]710 fi
711 done
[28048]712
[28074]713 # having rebuilt the specified collections above, just the processing for svndelete remains
[28048]714 if [ "x$mode" == "xsvndelete" ]; then
715 svn_delete $@
716 fi
[28026]717fi
[28037]718
719
720echo
721echo "*****************************************"
[28049]722echo
723# NO LONGER NECESSARY: WE'RE DOING A DIFF BETWEEN collect AND model-collect AT THIS SCRIPT'S END
[28037]724# if we were svn updating/deleting collections, then mode was set
725# if in that case a report was generated with additional differences, point the user to it
[28049]726#if [ -f report.txt ] && [ "x$mode" != "x" ]; then
727# echo "Some files or folders outside of archives and index directories were different. See report.txt"
728# echo
729#fi
730
731# if not svnupdating or svndeleting, then inform the user that model-collect is unchanged
732# if svnupdating, then warn the user that model-collect still needs committing
733# if svndeleting, then inform the user that model-collect has been changed and committed
734if [ "x$mode" == "x" ]; then
735 echo "* The model-collect folder has not been altered. Changes have only been made to collect"
736elif [ "x$mode" == "xsvnupdate" ]; then
[28081]737 echo "* TO DO: You still need to run svn status and svn commit on the model-collect folder. Besides that:"
[28049]738elif [ "x$mode" == "xsvndelete" ]; then
739 echo "* The model-collect folder's archives and index subfolders have been updated and committed to svn."
[28074]740elif [ "x$mode" == "xsvnaddnew" ]; then
741 echo "* The new collection(s) have been built, copied to model-collect and added to svn."
[28081]742 echo "* TO DO: You still need to run svn status and svn commit on the model-collect folder. Besides that:"
[28049]743fi
744echo
745
746if [ "x$mode" != "x" ]; then
747 echo "* DIFFERENCES REMAINING BETWEEN model-collect AND collect (skipping .svn folders):"
[28037]748 echo
[28069]749 if [ "$1" == "" ]; then
750 echo "---START DIFF---"
751 diff -rq model-collect collect | grep -v ".svn"
752 else
753 for collection in "$@"; do
754 echo "--COLLECTION: $collection"
755 diff -rq model-collect/$collection collect/$collection | grep -v ".svn"
756 echo "--"
757 done
758 fi
[28049]759 echo "---END DIFF---"
760 echo
[28037]761fi
762
[28074]763if [ -e collect_orig ]; then
764 echo "* The original collect directory has been left renamed as collect_orig"
765 echo
766fi
[28069]767
768if [ "$debug_mode" -eq "1" ]; then
769 echo "* This script was run in DEBUG MODE, nothing has been changed in svn"
770fi
771echo
[28037]772echo "*****************************************"
773echo
774
775
[28048]776# deletes empty dirs
777# find collect/$collection/archives/HASH* -type d -empty -delete
778# find collect/$collection/index/assoc/HASH* -type d -empty -delete
779
780# To recursively delete all empty dirs in the copy of model-collect (since the dirs will not have .svn folders in them anymore)
781# http://www.commandlinefu.com/commands/view/5131/recursively-remove-all-empty-directories
782#find collect -type d -empty -delete
783
784# The following when put in a separate script file will delete all folders from model-collect that are
[28046]785# empty in the copied collection (all folders which contain only a .svn subfolder in model-collect)
[28049]786# ---------------------------------------------
[28048]787#!/bin/bash
[28046]788
789#for collection in collect/*; do
790 #escape the filename (in case of space)
791# collection=`echo $collection | sed 's@ @\\\ @g'`
792
793 #get just the basename
794# collection=`basename $collection`
795
796 # HASH dirs that are empty in local collect's archives and index/assoc,
797 # need to be removed from the svn in model-collect
798
799# for line in `find collect/$collection/archives/HASH* -type d -empty`; do
800# modelline="model-$line"
801# echo "LINE: $modelline"
802
803 # remove from svn of model collect
804# svn rm $modelline
805## rm -rf $modelline
806 # remove physically from local collect
807# rm -rf $line
808# done
809
810# for line in `find collect/$collection/index/assoc/HASH* -type d -empty`; do
811# modelline="model-$line"
812# echo "LINE: $modelline"
813
814 # remove from svn of model collect
815# svn rm $modelline
816## rm -rf $modelline
817 # remove physically from local collect
818# rm -rf $line
819# done
820
[28048]821#done
[28049]822# ---------------------------------------------
Note: See TracBrowser for help on using the repository browser.