source: gs3-extensions/html-to-expeditee/trunk/src/src/cgi-bin/html-to-expeditee.pl.in@ 26767

Last change on this file since 26767 was 26767, checked in by davidb, 11 years ago

Work done on linking collection browsing frames as well as linking to matching collection item frames.

File size: 15.3 KB
Line 
1#!/cygdrive/c/strawberry/perl/bin/perl -w
2
3# Need to specify the full path of Perl above, e.g. for Windows something like
4#!C:\\Perl32\\bin\\perl -w
5
6use strict;
7
8# Set this to 1 to work around IIS 6 craziness
9my $iis6_mode = 0;
10
11
12# IIS 6: for some reason, IIS runs this script with the working directory set to the Greenstone
13# directory rather than the cgi-bin directory, causing lots of stuff to fail
14if ($iis6_mode)
15{
16 # Change into cgi-bin directory
17 chdir("cgi-bin");
18}
19
20
21# We use require and an eval here (instead of "use") to catch any errors loading the module (for IIS)
22eval("require \"gsdlCGI.pm\"");
23if ($@)
24{
25 print STDOUT "Content-type:text/plain\n\n";
26 print STDOUT "ERROR: $@\n";
27 exit 0;
28}
29
30sub generate_html_form
31{
32 my($isGSDL2,$site,$collect,$cl) = @_;
33
34 # first generate the document frames
35 # then generate the classifier browsing frames.
36
37 my $html_form = <<EOT;
38 <!DOCTYPE html>
39 <head>
40 <meta content="text/html;charset=utf-8" http-equiv="Content-Type">
41 <meta content="utf-8" http-equiv="encoding">
42 <title>HTML To Expeditee Frames</title>
43 <base href=".."/>
44
45 <link type="text/css" href="ext/html-to-expeditee/jquery/css/le-frog/jquery-ui-1.8.16.custom.css" rel="stylesheet" />
46 <script type="text/javascript" src="ext/html-to-expeditee/jquery/js/jquery-1.6.2.min.js"></script>
47 <script type="text/javascript" src="ext/html-to-expeditee/jquery/js/jquery-ui-1.8.16.custom.min.js"></script>
48 <script type="text/javascript" src="ext/html-to-expeditee/js/gsajax-min.js"></script>
49 <script type="text/javascript" src="ext/html-to-expeditee/js/html-to-expeditee.js"></script>
50 </head>
51 <body>
52 <form class="ui-widget">
53 Convert the collection <input type="text" class="ui-corner-all" style="padding: 4px;" name="collect" value="$collect" id="collect" /> to Expeditee frames by traversing the classifier <input type="text" class="ui-corner-all" style="padding: 4px;" name="cl" value="$cl" id="cl" />
54
55 <input value="$site" name="site" id="site" type="hidden">
56
57 <p>
58 <input type="checkbox" id="checkBoxBrowsing" name="generate_browsing" value="generate_browsing">Generate Collection Space<br/>
59 </p>
60
61 <p style="font-weight: bold;">Extra Expeditee Frame Output Options:</p>
62 <input type="checkbox" id="checkBoxFont" name="compute_font" value="compute_font">Compute Font<br/>
63 <input type="checkbox" id="checkBoxWidth" name="compute_width" value="compute_width">Compute Width<br/>
64
65 <p><input value="Go" id="go" class="ui-button ui-widget ui-state-default ui-corner-all" type="submit"></p>
66 </form>
67
68 <script type="text/javascript">
69 var collect;
70 var site;
71 var cl;
72 var gs2;
73
74 var docOIDs = [];
75 var clPages = [];
76
77 var hashMapDocFrames = new Array();
78
79 var numDocOIDs = 0;
80 var numClPages = 0;
81
82 var currDocFrameNum;
83 var currClFrameNum;
84
85 var compute_font = false;
86 var compute_width = false;
87 var generate_browsing = false; //generate a matching collection space frameset
88
89 \$(function(){
90
91 \$('#progress').progressbar();
92
93 \$('#go').button().click(function(){
94
95 site = document.getElementById("site").value;
96
97 collect = document.getElementById("collect").value;
98
99 if(collect.match(/^\\s*\$/)){
100 alert("No collection specified");
101 return false;
102 }
103
104 cl = document.getElementById("cl").value;
105 if(cl.match(/^\\s*\$/)){
106 alert("No classifier specified");
107 return false;
108 }
109
110 compute_font = document.getElementById("checkBoxFont").checked;
111 compute_width = document.getElementById("checkBoxWidth").checked;
112
113 generate_browsing = document.getElementById("checkBoxBrowsing").checked;
114
115 gs2=$isGSDL2;
116
117 var url;
118
119 /*obtain url for classifier/browse page and grab all links (doc and CL links) from this page.*/
120 if(gs2){
121 url = "library.cgi";
122 url += "?c="+collect +"&a=d&cl=" + cl;
123 }else{
124 url = "library";
125 url += "?c="+collect +"&a=b&rt=s&s=ClassifierBrowse&cl=" + cl;\
126 url += "&excerptid=gs_content";
127 }
128
129 docOIDs = [];
130 clPages = [];
131
132 var outstandingURLs = [];
133 var visitedURLs = {};
134
135 outstandingURLs.push(url);
136 visitedURLs[url] = 1;
137
138 while(outstandingURLs.length > 0){
139 url = outstandingURLs.shift();
140
141 var clHtml = urlGetSync(url);
142
143 var workingTrav = document.getElementById("workingTraverse");
144 workingTrav.innerHTML = clHtml;
145
146 var aElems = workingTrav.getElementsByTagName("a");
147
148 /* any links with (document|browse)=> outstandingURLS */
149 /* any links with (document|browse)/CL[0-9]/[0-9] => clPages */
150 /* any links with (document) => docOIDs */
151
152 var actionRE = new RegExp("(\\\\/)(?:document|browse)(\\\\/)");
153 var clRE = new RegExp("(\\\\/)browse\\\\/" + cl + "(\\\\/\\\\d+)+(\$)");
154 var docRE = new RegExp("(?:\\\\/)document\\\\/(.*?)(?:\$)");
155
156 for(var i = 0; i < aElems.length; i++){
157 var aElem = aElems[i];
158 var href = aElem.href;
159
160 if(href && href.match(actionRE)){
161 if(href.match(clRE)){
162 if(!visitedURLs[href]){
163 //console.log("found a new CL line: " + href);
164 outstandingURLs.push(href);
165 visitedURLs[href] = 1;
166 clPages.push(href);
167 }
168 }else if(href.match(docRE)){
169 if(!visitedURLs[href]){
170 var docMatch = docRE.exec(href);
171 var docOID = docMatch[1];
172
173 //console.log("found a new doc line: " + docOID);
174 visitedURLs[href] = 1;
175 docOIDs.push(docOID);
176 }
177 }
178 }
179 }
180 }
181
182 numDocOIDs = docOIDs.length;
183 numClPages = clPages.length;
184 var iframe = document.getElementById('iframe');
185
186 startProcessing(iframe); //Process documents
187
188 return false;
189 });
190
191 });
192
193 function startProcessing(iframe){
194
195 var docOID = docOIDs.shift();
196
197 iframe.src = getDocumentUrl(docOID);
198
199 var progressbar = document.getElementById('progressbar');
200 progressbar.style.display = 'block';
201 }
202
203
204
205
206 function pageLoaded(){
207 var iframe = document.getElementById("iframe");
208
209 if(iframe.src){
210
211 if(iframe.style.display != 'block'){
212 iframe.height = '90%';
213 iframe.style.display = 'block';
214 }
215
216 var iframeDoc = getIframeDoc(iframe);
217
218
219 /*Check url - if it's a doc Url, call "writeDocument", otherwise call "writeClPage"*/
220 var clRE = new RegExp("(\\\\/)browse\\\\/" + cl + "(\\\\/\\\\d+)+(\$)");
221
222
223 if(iframe.src.match(clRE)){
224 writeClPage(iframe,iframeDoc);
225 }else{
226 writeDocument(iframe,iframeDoc);
227 }
228 }
229 }
230
231 function writeDocument(iframe){
232
233 var xmlUrl = iframe.src + "&o=xml";
234 console.log("xml url: " + xmlUrl);
235 var iter = (numDocOIDs - docOIDs.length);
236 var progressPercent = iter/numDocOIDs * 100;
237
238 var frameNum = getMetadata(xmlUrl,'frameID');
239
240 if(frameNum === null){
241 frameNum = iter;
242 }
243
244 var iframeDoc = getIframeDoc(iframe);
245
246 \$(function(){
247 \$('#progressbar').progressbar({ value: progressPercent });
248 });
249
250 var gsContent = iframeDoc.getElementById("gs_content");
251
252 var assocElem = iframeDoc.getElementById("assocfilepath");
253
254 var assoc = null;
255
256 if(assocElem === undefined || assocElem === null){
257 assoc = getMetadata(xmlUrl,'assocfilepath');
258
259 }
260
261 var expFrameTree = htmlToExpeditee(gsContent,compute_font,compute_width);
262
263 var expFrame = JSON.stringify(expFrameTree);
264 //console.log(expFrame);
265
266 var url = "cgi-bin/html-to-expeditee.pl";
267 var params = "c=" + collect;
268
269 if(site.match(/\\w/)){
270 params += "&site=" + site;
271 }
272
273 params += "&a=generate-frame&fn=" + frameNum;
274 params += "&json=" + escape(expFrame);
275
276 //Add an assocfilepath but only if it is defined
277 if(assoc !== null){
278 params += "&assoc=" + assoc;
279 }
280
281 params += "&compute-font=" + compute_font;
282
283 params += "&page-type=" + "document";
284
285
286 var clHtml = urlPostSync(url,params);
287
288 if(!clHtml.match(/html-to-expeditee saved frame/)){
289 alert("ERROR PROCESSING URL: " + url);
290 }
291
292 if(docOIDs.length > 0){
293 var docOID = docOIDs.shift();
294
295 //Add docOID and matching frame number to an associative array for later use.
296 hashMapDocFrames[docOID] = frameNum;
297
298 iframe.src = getDocumentUrl(docOID);
299
300 }else{
301
302 //start writing CL pages.
303 if(generate_browsing){
304 iframe.src = clPages.shift();
305 }else{
306 //We are finished
307 finish(iframe);
308 }
309 }
310 }
311
312 function getIframeDoc(){
313
314 var iframeDoc = null;
315
316 if(iframe.contentDocument){ /* FF and Chrome */
317 iframeDoc = iframe.contentDocument;
318 }else if(iframe.contentWindow){ /* IE */
319 iframeDoc = iframe.contentWindow.document;
320 }
321
322
323 return iframeDoc;
324 }
325
326 function getDocumentUrl(docOID){
327 var url;
328
329 if(gs2){
330 url = "library.cgi";
331 }else{
332 url = "library";
333 }
334
335 url += "?c=" + collect + "&a=d&d=" + docOID;
336
337 url += "&p.showAssocFilePath=1";
338
339 return url;
340 }
341
342 function writeClPage(iframe){
343
344 var frameNum = numClPages - clPages.length;
345 var progressPercent = frameNum / numClPages * 100;
346 var iframeDoc = getIframeDoc(iframe);
347 //console.log("Processing cl page: " + iframe.src + " ****");
348
349 \$(function(){
350 \$('#progressbar').progressbar({ value: progressPercent });
351 });
352
353 var gsContent = iframeDoc.getElementById("gs_content");
354 var gsContentChildren = gsContent.getElementsByTagName('*');
355
356 var docRE = new RegExp("(?:\\\\/)document\\\\/(.*?)(?:\$)");
357 var clRE = new RegExp("(\\\\/)browse\\\\/" + cl + "(\\\\/\\\\d+)+(\$)");
358
359 for(var i = 0; i < gsContentChildren.length; i++){
360 var child = gsContentChildren[i];
361
362 //get rid of rectangles around nodes.
363 if(child.tagName !== "IMG"){
364 child.setAttribute('rect','norect');
365 }
366
367 if(child.tagName === "A"){
368 var aElem = child;
369 var aElemSrc = aElem.href;
370
371 //if aElemSrc is a document url, then extract docOID then access hash map and get matching frame number.
372 if(aElemSrc.match(docRE)){
373 var docMatch = docRE.exec(aElemSrc);
374 var docOID = docMatch[1];
375
376 var fn = hashMapDocFrames[docOID];
377
378 var aElemChildren = aElem.getElementsByTagName('*');
379
380 for(var j = 0; j < aElemChildren.length; j++){
381 var aElemChild = aElemChildren[j];
382
383 if(fn !== null && fn!== undefined){
384 var frameName = collect + fn;
385 aElemChild.setAttribute("link",frameName);
386 }
387 }
388
389 }else if(aElemSrc.match(clRE)){ //link the item to one of the other CL browsing frameset pages
390
391 var clMatch = clRE.exec(aElemSrc);
392 var clPage = clMatch[2].substring(1);
393 console.log(clPage);
394 aElem.setAttribute("link",clPage);
395 }
396 }
397 }
398
399 var expFrameTree = htmlToExpeditee(gsContent);
400 var expFrame = JSON.stringify(expFrameTree);
401 console.log(expFrame);
402
403 var url = "cgi-bin/html-to-expeditee.pl";
404 var params = "c=" + collect;
405
406 if(site.match(/\\w/)){
407 params += "&site=" + site;
408 }
409
410 params += "&cl=" + cl;
411 params += "&a=generate-frame&fn=" + frameNum;
412 params += "&json=" + escape(expFrame);
413 params += "&page-type=" + "clPage";
414
415 var clHtml = urlPostSync(url,params);
416
417 if(!clHtml.match(/html-to-expeditee saved frame/)){
418 alert("Error processing url: " + url);
419 }
420
421 if(clPages.length > 0){
422 iframe.src = clPages.shift();
423 }else{
424 finish(iframe);
425 }
426 }
427
428 function finish(iframe){
429 var progressbar = document.getElementById("progressbar");
430 progressbar.style.display = "none";
431
432 iframe.style.display = "none";
433 delete iframe.src;
434 }
435
436 /**
437 * This method is used at this stage to retrieve assocfilepath and
438 * frame number metadata values from the document's xml.
439 **/
440 function getMetadata(xmlUrl,nameValue){
441 var metadata = null;
442
443 \$.ajax({
444 type: "GET",
445 async: false,
446 url: xmlUrl,
447 dataType: "xml",
448 success: function(xml){
449 \$(xml).find('metadata').each(function(){
450 var name = \$(this).attr('name');
451
452 if(name === nameValue){
453 if(metadata === null){
454 metadata = \$(this).text();
455 }
456 }
457 });
458 }
459 });
460
461 return metadata;
462 }
463
464 </script>
465
466 <div id="progressbar" width="100%" style="display: none; margin: 10px; height: 10px;"></div>
467
468 <div id="workingTraverse" style="display: none"></div>
469
470 <hr style="margin: 10px;">
471
472 <iframe width="100%" id="iframe" style="display:none;" onload="pageLoaded()"></iframe>
473 </body>
474</html>
475EOT
476
477 print "Content-type:text/html\n\n";
478 print $html_form;
479}
480
481sub main
482{
483 my $gsdl_cgi = new gsdlCGI();
484
485 #Load GS modules
486 $gsdl_cgi->setup_gsdl();
487
488 my $gsdlhome = $ENV{'GSDLHOME'};
489 $gsdl_cgi->checked_chdir($gsdlhome);
490
491 #TODO: Refactor so we only need to use HtmlToExpediteeAction
492 require cgiactions::HtmlToExpediteeAction;
493 require cgiactions::CollectionSpaceAction;
494
495 $gsdl_cgi->parse_cgi_args();
496 $gsdl_cgi->{'xml'} = 0;
497
498 my $fn = $gsdl_cgi->clean_param("fn");
499
500 if(defined $fn){
501 #page_type can have two values: "document" or "clPage"
502 my $page_type = $gsdl_cgi->clean_param("page-type");
503
504 if(defined $page_type){
505 my $action;
506
507 if($page_type eq "document"){
508 $action = new HtmlToExpediteeAction($gsdl_cgi,$iis6_mode);
509 }elsif($page_type eq "clPage"){
510 $action = new CollectionSpaceAction($gsdl_cgi,$iis6_mode);
511 }else{
512 $gsdl_cgi->generate_error("Invalid page type specified. Must be 'document' or 'clPage'");
513 }
514
515 $action->do_action();
516 }else{
517 $gsdl_cgi->generate_error("No page type specified. Must be 'document' or 'clPage'");
518 }
519
520 }else{
521 # generate html form
522 my $collect = $gsdl_cgi->clean_param("collect");
523 my $cl = $gsdl_cgi->clean_param("cl");
524
525 #Establish collect_dir using defining 'site' along the way if GS3
526 my $site = undef;
527 my $isGSDL2 = undef;
528
529 if($gsdl_cgi->greenstone_version() == 2){
530 $isGSDL2 = 1;
531 }else{
532 $isGSDL2 = 0;
533
534 #GS3 (and possible future versions) make use of 'site'
535 $site = $gsdl_cgi->clean_param("site");
536
537 if(!defined $site){
538 $gsdl_cgi->generate_error("No site specified.");
539 }
540 }
541
542 generate_html_form($isGSDL2,$site,$collect,$cl);
543 }
544}
545
546&main();
Note: See TracBrowser for help on using the repository browser.