source: gs3-extensions/html-to-expeditee/trunk/src/src/cgi-bin/html-to-expeditee.pl.in@ 26751

Last change on this file since 26751 was 26751, checked in by davidb, 11 years ago

Fixed bug with first CL page not being written out to a frame.

File size: 15.2 KB
Line 
1#!/cygdrive/c/strawberry/perl/bin/perl -w
2
3# Need to specify the full path of Perl above, e.g. for Windows something like
4#!C:\\Perl32\\bin\\perl -w
5
6use strict;
7
8# Set this to 1 to work around IIS 6 craziness
9my $iis6_mode = 0;
10
11
12# IIS 6: for some reason, IIS runs this script with the working directory set to the Greenstone
13# directory rather than the cgi-bin directory, causing lots of stuff to fail
14if ($iis6_mode)
15{
16 # Change into cgi-bin directory
17 chdir("cgi-bin");
18}
19
20
21# We use require and an eval here (instead of "use") to catch any errors loading the module (for IIS)
22eval("require \"gsdlCGI.pm\"");
23if ($@)
24{
25 print STDOUT "Content-type:text/plain\n\n";
26 print STDOUT "ERROR: $@\n";
27 exit 0;
28}
29
30sub generate_html_form
31{
32 my($isGSDL2,$site,$collect,$cl) = @_;
33
34 # first generate the document frames
35 # then generate the classifier browsing frames.
36
37 my $html_form = <<EOT;
38 <!DOCTYPE html>
39 <head>
40 <meta content="text/html;charset=utf-8" http-equiv="Content-Type">
41 <meta content="utf-8" http-equiv="encoding">
42 <title>HTML To Expeditee Frames</title>
43 <base href=".."/>
44
45 <link type="text/css" href="ext/html-to-expeditee/jquery/css/le-frog/jquery-ui-1.8.16.custom.css" rel="stylesheet" />
46 <script type="text/javascript" src="ext/html-to-expeditee/jquery/js/jquery-1.6.2.min.js"></script>
47 <script type="text/javascript" src="ext/html-to-expeditee/jquery/js/jquery-ui-1.8.16.custom.min.js"></script>
48 <script type="text/javascript" src="ext/html-to-expeditee/js/gsajax-min.js"></script>
49 <script type="text/javascript" src="ext/html-to-expeditee/js/html-to-expeditee.js"></script>
50 </head>
51 <body>
52 <form class="ui-widget">
53 Convert the collection <input type="text" class="ui-corner-all" style="padding: 4px;" name="collect" value="$collect" id="collect" /> to Expeditee frames by traversing the classifier <input type="text" class="ui-corner-all" style="padding: 4px;" name="cl" value="$cl" id="cl" />
54
55 <input value="$site" name="site" id="site" type="hidden">
56
57 <p>
58 <input type="checkbox" id="checkBoxBrowsing" name="generate_browsing" value="generate_browsing">Generate Collection Space<br/>
59 </p>
60
61 <p style="font-weight: bold;">Extra Expeditee Frame Output Options:</p>
62 <input type="checkbox" id="checkBoxFont" name="compute_font" value="compute_font">Compute Font<br/>
63 <input type="checkbox" id="checkBoxWidth" name="compute_width" value="compute_width">Compute Width<br/>
64
65 <p><input value="Go" id="go" class="ui-button ui-widget ui-state-default ui-corner-all" type="submit"></p>
66 </form>
67
68 <script type="text/javascript">
69 var collect;
70 var site;
71 var cl;
72 var gs2;
73
74 var docOIDs = [];
75 var clPages = [];
76
77 var hashMapDocFrames = new Array();
78
79 var numDocOIDs = 0;
80 var numClPages = 0;
81
82 var currDocFrameNum;
83 var currClFrameNum;
84
85 var compute_font = false;
86 var compute_width = false;
87 var generate_browsing = false; //generate a matching collection space frameset
88
89 \$(function(){
90
91 \$('#progress').progressbar();
92
93 \$('#go').button().click(function(){
94
95 site = document.getElementById("site").value;
96
97 collect = document.getElementById("collect").value;
98
99 if(collect.match(/^\\s*\$/)){
100 alert("No collection specified");
101 return false;
102 }
103
104 cl = document.getElementById("cl").value;
105 if(cl.match(/^\\s*\$/)){
106 alert("No classifier specified");
107 return false;
108 }
109
110 compute_font = document.getElementById("checkBoxFont").checked;
111 compute_width = document.getElementById("checkBoxWidth").checked;
112
113 generate_browsing = document.getElementById("checkBoxBrowsing").checked;
114
115 gs2=$isGSDL2;
116
117 var url;
118
119 /*obtain url for classifier/browse page and grab all links (doc and CL links) from this page.*/
120 if(gs2){
121 url = "library.cgi";
122 url += "?c="+collect +"&a=d&cl=" + cl;
123 }else{
124 url = "library";
125 url += "?c="+collect +"&a=b&rt=s&s=ClassifierBrowse&cl=" + cl;\
126 url += "&excerptid=gs_content";
127 }
128
129 docOIDs = [];
130 clPages = [];
131
132 var outstandingURLs = [];
133 var visitedURLs = {};
134
135 outstandingURLs.push(url);
136 visitedURLs[url] = 1;
137
138 while(outstandingURLs.length > 0){
139 url = outstandingURLs.shift();
140
141 var clHtml = urlGetSync(url);
142
143 var workingTrav = document.getElementById("workingTraverse");
144 workingTrav.innerHTML = clHtml;
145
146 var aElems = workingTrav.getElementsByTagName("a");
147
148 /* any links with (document|browse)=> outstandingURLS */
149 /* any links with (document|browse)/CL[0-9]/[0-9] => clPages */
150 /* any links with (document) => docOIDs */
151
152 var actionRE = new RegExp("(\\\\/)(?:document|browse)(\\\\/)");
153 var clRE = new RegExp("(\\\\/)browse\\\\/" + cl + "(\\\\/\\\\d+)+(\$)");
154 var docRE = new RegExp("(?:\\\\/)document\\\\/(.*?)(?:\$)");
155
156 for(var i = 0; i < aElems.length; i++){
157 var aElem = aElems[i];
158 var href = aElem.href;
159
160 if(href && href.match(actionRE)){
161 if(href.match(clRE)){
162 if(!visitedURLs[href]){
163 //console.log("found a new CL line: " + href);
164 outstandingURLs.push(href);
165 visitedURLs[href] = 1;
166 clPages.push(href);
167 }
168 }else if(href.match(docRE)){
169 if(!visitedURLs[href]){
170 var docMatch = docRE.exec(href);
171 var docOID = docMatch[1];
172
173 //console.log("found a new doc line: " + docOID);
174 visitedURLs[href] = 1;
175 docOIDs.push(docOID);
176 }
177 }
178 }
179 }
180 }
181
182 numDocOIDs = docOIDs.length;
183 numClPages = clPages.length;
184 var iframe = document.getElementById('iframe');
185
186 startProcessing(iframe); //Process documents
187
188 return false;
189 });
190
191 });
192
193 function startProcessing(iframe){
194
195 var docOID = docOIDs.shift();
196
197 iframe.src = getDocumentUrl(docOID);
198
199 var progressbar = document.getElementById('progressbar');
200 progressbar.style.display = 'block';
201 }
202
203
204
205
206 function pageLoaded(){
207 var iframe = document.getElementById("iframe");
208
209 if(iframe.src){
210
211 if(iframe.style.display != 'block'){
212 iframe.height = '90%';
213 iframe.style.display = 'block';
214 }
215
216 var iframeDoc = getIframeDoc(iframe);
217
218
219 /*Check url - if it's a doc Url, call "writeDocument", otherwise call "writeClPage"*/
220 var clRE = new RegExp("(\\\\/)browse\\\\/" + cl + "(\\\\/\\\\d+)+(\$)");
221
222
223 if(iframe.src.match(clRE)){
224 writeClPage(iframe,iframeDoc);
225 }else{
226 writeDocument(iframe,iframeDoc);
227 }
228 }
229 }
230
231 function writeDocument(iframe){
232
233 console.log("now processing document: " + iframe.src + " ***");
234 var xmlUrl = iframe.src + "&o=xml";
235
236 var iter = (numDocOIDs - docOIDs.length);
237 var progressPercent = iter/numDocOIDs * 100;
238
239 var frameNum = getMetadata(xmlUrl,'frameID');
240
241 if(frameNum == null){
242 frameNum = iter;
243 }
244
245 var iframeDoc = getIframeDoc(iframe);
246
247
248 \$(function(){
249 \$('#progressbar').progressbar({ value: progressPercent });
250 });
251
252 var gsContent = iframeDoc.getElementById("gs_content");
253
254 var assocElem = iframeDoc.getElementById("assocfilepath");
255
256 var assocElem = iframeDoc.getElementById('assocfilepath');
257 var assoc = null;
258
259 if(assocElem === undefined || assocElem === null){
260 assoc = getMetadata(xmlUrl,'assocfilepath');
261 }
262
263 var expFrameTree = htmlToExpeditee(gsContent,compute_font,compute_width);
264
265 var expFrame = JSON.stringify(expFrameTree);
266 //console.log(expFrame);
267
268 var url = "cgi-bin/html-to-expeditee.pl";
269 var params = "c=" + collect;
270
271 if(site.match(/\\w/)){
272 params += "&site=" + site;
273 }
274
275 params += "&a=generate-frame&fn=" + frameNum;
276 params += "&json=" + escape(expFrame);
277
278 //Add an assocfilepath but only if it is defined
279 if(assoc !== null){
280 params += "&assoc=" + assoc;
281 }
282
283 params += "&compute-font=" + compute_font;
284
285 params += "&page-type=" + "document";
286
287
288 var clHtml = urlPostSync(url,params);
289
290 if(!clHtml.match(/html-to-expeditee saved frame/)){
291 alert("ERROR PROCESSING URL: " + url);
292 }
293
294 if(docOIDs.length > 0){
295 var docOID = docOIDs.shift();
296
297 //Add docOID and matching frame number to an associative array for later use.
298 hashMapDocFrames[docOID] = frameNum;
299
300
301 iframe.src = getDocumentUrl(docOID);
302
303 }else{
304
305 //start writing CL pages.
306 if(generate_browsing){
307 iframe.src = clPages.shift();
308 }else{
309 //We are finished
310 finish(iframe);
311 }
312 }
313 }
314
315 function getIframeDoc(){
316
317 var iframeDoc = null;
318
319 if(iframe.contentDocument){ /* FF and Chrome */
320 iframeDoc = iframe.contentDocument;
321 }else if(iframe.contentWindow){ /* IE */
322 iframeDoc = iframe.contentWindow.document;
323 }
324
325
326 return iframeDoc;
327 }
328
329 function getDocumentUrl(docOID){
330 var url;
331
332 if(gs2){
333 url = "library.cgi";
334 }else{
335 url = "library";
336 }
337
338 url += "?c=" + collect + "&a=d&d=" + docOID;
339
340 url += "&p.showAssocFilePath = 1";
341
342 return url;
343 }
344
345 function writeClPage(iframe){
346
347 var frameNum = numClPages - clPages.length;
348 var progressPercent = frameNum / numClPages * 100;
349 var iframeDoc = getIframeDoc(iframe);
350 console.log("Processing cl page: " + iframe.src + " ****");
351
352 \$(function(){
353 \$('#progressbar').progressbar({ value: progressPercent });
354 });
355
356 var gsContent = iframeDoc.getElementById("gs_content");
357 var gsContentChildren = gsContent.getElementsByTagName('*');
358
359 var docRE = new RegExp("(?:\\\\/)document\\\\/(.*?)(?:\$)");
360
361
362 for(var i = 0; i < gsContentChildren.length; i++){
363 var child = gsContentChildren[i];
364
365 //get rid of rectangles around nodes.
366 if(child.tagName !== "IMG"){
367 child.setAttribute('rect','norect');
368 }
369
370 if(child.tagName === "A"){
371 var aElem = child;
372 var aElemSrc = aElem.href;
373
374 //if aElemSrc is a document url, then extract docOID then access hash map and get matching frame number.
375 if(aElemSrc.match(docRE)){
376 var docMatch = docRE.exec(aElemSrc);
377 var docOID = docMatch[1];
378
379 var fn = hashMapDocFrames[docOID];
380 var aElemChildren = aElem.getElementsByTagName('*');
381
382 for(var j = 0; j < aElemChildren.length; j++){
383 var aElemChild = aElemChildren[j];
384 var frameName = collect + fn;
385
386 aElemChild.setAttribute("link",frameName);
387 }
388
389 }else{
390 //TODO: Check if the link is a CL link.
391 //TODO: Make a hashmap for storing CL page frame numbers?
392 }
393 }
394 }
395
396
397 //TODO: Need to change htmlToExpeditee code to account for links
398
399
400 var expFrameTree = htmlToExpeditee(gsContent);
401 var expFrame = JSON.stringify(expFrameTree);
402 //console.log(expFrame);
403
404 var url = "cgi-bin/html-to-expeditee.pl";
405 var params = "c=" + collect;
406
407 if(site.match(/\\w/)){
408 params += "&site=" + site;
409 }
410
411 params += "&cl=" + cl;
412 params += "&a=generate-frame&fn=" + frameNum;
413 params += "&json=" + escape(expFrame);
414 params += "&page-type=" + "clPage";
415
416 var clHtml = urlPostSync(url,params);
417
418 if(!clHtml.match(/html-to-expeditee saved frame/)){
419 alert("Error processing url: " + url);
420 }
421
422 if(clPages.length > 0){
423 iframe.src = clPages.shift();
424 }else{
425 finish(iframe);
426 }
427 }
428
429 function finish(iframe){
430 var progressbar = document.getElementById("progressbar");
431 progressbar.style.display = "none";
432
433 iframe.style.display = "none";
434 delete iframe.src;
435 }
436
437 /**
438 * This method is used at this stage to retrieve assocfilepath and
439 * frame number metadata values from the document's xml.
440 **/
441 function getMetadata(xmlUrl,nameValue){
442 var metadata = null;
443
444 \$.ajax({
445 type: "GET",
446 async: false,
447 url: xmlUrl,
448 dataType: "xml",
449 success: function(xml){
450 \$(xml).find('metadata').each(function(){
451 var name = \$(this).attr('name');
452
453 if(name === nameValue){
454 if(metadata === null){
455 metadata = \$(this).text();
456 }
457 }
458 });
459 }
460 });
461
462 return metadata;
463 }
464
465 </script>
466
467 <div id="progressbar" width="100%" style="display: none; margin: 10px; height: 10px;"></div>
468
469 <div id="workingTraverse" style="display: none"></div>
470
471 <hr style="margin: 10px;">
472
473 <iframe width="100%" id="iframe" style="display:none;" onload="pageLoaded()"></iframe>
474 </body>
475</html>
476EOT
477
478 print "Content-type:text/html\n\n";
479 print $html_form;
480}
481
482sub main
483{
484 my $gsdl_cgi = new gsdlCGI();
485
486 #Load GS modules
487 $gsdl_cgi->setup_gsdl();
488
489 my $gsdlhome = $ENV{'GSDLHOME'};
490 $gsdl_cgi->checked_chdir($gsdlhome);
491
492 #TODO: Refactor so we only need to use HtmlToExpediteeAction
493 require cgiactions::HtmlToExpediteeAction;
494 require cgiactions::CollectionSpaceAction;
495
496 $gsdl_cgi->parse_cgi_args();
497 $gsdl_cgi->{'xml'} = 0;
498
499 my $fn = $gsdl_cgi->clean_param("fn");
500
501 if(defined $fn){
502 #page_type can have two values: "document" or "clPage"
503 my $page_type = $gsdl_cgi->clean_param("page-type");
504
505 if(defined $page_type){
506 my $action;
507
508 if($page_type eq "document"){
509 $action = new HtmlToExpediteeAction($gsdl_cgi,$iis6_mode);
510 }elsif($page_type eq "clPage"){
511 $action = new CollectionSpaceAction($gsdl_cgi,$iis6_mode);
512 }else{
513 $gsdl_cgi->generate_error("Invalid page type specified. Must be 'document' or 'clPage'");
514 }
515
516 $action->do_action();
517 }else{
518 $gsdl_cgi->generate_error("No page type specified. Must be 'document' or 'clPage'");
519 }
520
521 }else{
522 # generate html form
523 my $collect = $gsdl_cgi->clean_param("collect");
524 my $cl = $gsdl_cgi->clean_param("cl");
525
526 #Establish collect_dir using defining 'site' along the way if GS3
527 my $site = undef;
528 my $isGSDL2 = undef;
529
530 if($gsdl_cgi->greenstone_version() == 2){
531 $isGSDL2 = 1;
532 }else{
533 $isGSDL2 = 0;
534
535 #GS3 (and possible future versions) make use of 'site'
536 $site = $gsdl_cgi->clean_param("site");
537
538 if(!defined $site){
539 $gsdl_cgi->generate_error("No site specified.");
540 }
541 }
542
543 generate_html_form($isGSDL2,$site,$collect,$cl);
544 }
545}
546
547&main();
Note: See TracBrowser for help on using the repository browser.