source: gs3-extensions/html-to-expeditee/trunk/src/src/cgi-bin/html-to-expeditee.pl.in@ 26596

Last change on this file since 26596 was 26596, checked in by davidb, 11 years ago

The html to expeditee feature will now obtain a matching assocfilepath for each frame and write this value out to the frame as a piece of text.

File size: 10.8 KB
Line 
1 #!/cygdrive/c/strawberry/perl/bin/perl -w
2
3# Need to specify the full path of Perl above, e.g. for Windows something like
4#!C:\\Perl32\\bin\\perl -w
5
6
7use strict;
8
9# Set this to 1 to work around IIS 6 craziness
10my $iis6_mode = 0;
11
12
13# IIS 6: for some reason, IIS runs this script with the working directory set to the Greenstone
14# directory rather than the cgi-bin directory, causing lots of stuff to fail
15if ($iis6_mode)
16{
17 # Change into cgi-bin directory
18 chdir("cgi-bin");
19}
20
21
22# We use require and an eval here (instead of "use") to catch any errors loading the module (for IIS)
23eval("require \"gsdlCGI.pm\"");
24if ($@)
25{
26 print STDOUT "Content-type:text/plain\n\n";
27 print STDOUT "ERROR: $@\n";
28 exit 0;
29}
30
31
32sub generate_html_form
33{
34 my ($isGSDL2,$site,$collect,$cl) = @_;
35
36 my $html_form = <<EOT;
37<html>
38 <head>
39 <title>HTML to Expeditee Frames</title>
40
41 <base href=".." />
42
43 <link type="text/css" href="ext/html-to-expeditee/jquery/css/le-frog/jquery-ui-1.8.16.custom.css" rel="stylesheet" />
44 <script type="text/javascript" src="ext/html-to-expeditee/jquery/js/jquery-1.6.2.min.js"></script>
45 <script type="text/javascript" src="ext/html-to-expeditee/jquery/js/jquery-ui-1.8.16.custom.min.js"></script>
46 <script type="text/javascript" src="ext/html-to-expeditee/js/gsajax-min.js"></script>
47 <script type="text/javascript" src="ext/html-to-expeditee/js/html-to-expeditee.js"></script>
48
49
50
51 </head>
52 <body>
53 <form class="ui-widget">
54 Convert the collection
55 <input type="text"
56 class="ui-corner-all" style="padding: 4px;"
57 name="collect" value="$collect" id="collect" />
58 to Expeditee frames by traversing the classifier
59 <input type="text"
60 class="ui-corner-all" style="padding: 4px;"
61 name="cl" value="$cl" id="cl" />
62
63 <input value="$site" name="site" id="site" type="hidden">
64 <input value="Go" id="go"
65 class="ui-button ui-widget ui-state-default ui-corner-all"
66 type="submit">
67
68 </form>
69 <script type="text/javascript">
70 var docOIDs = [];
71 var numDocOIDs;
72
73 \$(function() {
74 \$('#progress').progressbar();
75
76 \$('#go').button().click(function() {
77 var collect = document.getElementById("collect").value;
78 if (collect.match(/^\\s*\$/)) {
79 alert("No collection specified");
80 return false;
81 }
82
83 var cl = document.getElementById("cl").value;
84 if (cl.match(/^\\s*\$/)) {
85 alert("No classifier specified");
86 return false;
87 }
88 if (cl.match(/^\\d+\$/)) {
89 cl = "CL" + cl;
90 }
91
92 var gs2=$isGSDL2;
93
94 var url;
95 if (gs2) {
96 url = "library.cgi";
97 url += "?c="+collect +"&a=d&cl=" + cl;
98 }
99 else {
100 url = "library";
101 url += "?c="+collect +"&a=b&rt=s&s=ClassifierBrowse&cl=" + cl;
102 url += "&excerptid=gs_content";
103 }
104
105 /* processing animation */
106
107 docOIDs = [];
108 var outstandingURLs = [];
109 var visitedURLs = {};
110
111 outstandingURLs.push(url);
112 visitedURLs[url] = 1;
113
114 while (outstandingURLs.length>0) {
115 url = outstandingURLs.shift();
116
117 var clHtml = urlGetSync(url);
118
119 var aElems;
120 var workingTrav = document.getElementById("workingTraverse");
121 workingTrav.innerHTML = clHtml;
122 aElems = workingTrav.getElementsByTagName("a");
123
124
125 /* any links with a=(b|d) ... cl=??? => outstandingURLS */
126 /* any links with a=d ... d=??? => docOIDS */
127
128/*
129 var actionRE = new RegExp("(\\\\?|&)a=(?:d|b)(&|\$)");
130 var clRE = new RegExp("(\\\\?|&)cl=" + cl + "(\\\\.\\\\d+)+(&|\$)");
131 var docRE = new RegExp("(?:\\\\?|&)d=(.*?)(?:&|\$)");
132*/
133 var actionRE = new RegExp("(\\\\/)(?:document|browse)(\\\\/)");
134 var clRE = new RegExp("(\\\\/)browse\\\\/" + cl + "(\\\\/\\\\d+)+(\$)");
135 var docRE = new RegExp("(?:\\\\/)document\\\\/(.*?)(?:\$)");
136
137 for (var i=0; i<aElems.length; i++) {
138 var aElem = aElems[i];
139 var href=aElem.href;
140 if (href && href.match(actionRE)) {
141 if (href.match(clRE)) {
142 if (!visitedURLs[href]) {
143 // console.log("found a new CL line: " + href);
144 outstandingURLs.push(href);
145 visitedURLs[href] = 1;
146 }
147 }
148 else if (href.match(docRE)) {
149 if(!visitedURLs[href]){
150 var docMatch = docRE.exec(href);
151 var docOID = docMatch[1];
152
153 // console.log("found a new doc line: " + docOID);
154 visitedURLs[href] = 1;
155 docOIDs.push(docOID);
156 }
157 }
158 }
159 }
160 }
161
162 numDocOIDs = docOIDs.length;
163
164 var iframe = document.getElementById("iframe");
165
166 var docOID = docOIDs.shift();
167 //console.log("doc oid = " + docOID);
168
169 var url;
170 if (gs2) {
171 url = "library.cgi";
172 url += "?c="+collect +"&a=d&d=" + docOID;
173 }
174 else {
175 url = "library";
176 url += "?c="+collect +"&a=d&d=" + docOID;
177
178 /** The line below has been commented out. This is because the incorrect page is being loaded up in the iframe (i.e. just the document image and heading is being displayed but not any html customizations made in the collectionConfig.xml file**/
179 //url += "&ed=1&dt=hierarchy";
180 // url += "&excerptid=gs_content";
181 }
182
183 //url += "&p.showExpediteeAttributes=1";
184
185
186 /* load iframe with document drawn from Greenstone collection */
187 iframe.src = url;
188
189 var progressbar = document.getElementById("progressbar");
190 progressbar.style.display = "block";
191
192
193 return false; });
194 });
195
196 function pageLoaded()
197 {
198 var iframe = document.getElementById("iframe");
199
200 if (iframe.src) {
201 if (iframe.style.display != "block") {
202 iframe.height = "90%";
203 iframe.style.display = "block";
204 }
205
206 var iframeDoc;
207 if ( iframe.contentDocument )
208 { /* FF */
209 iframeDoc = iframe.contentDocument;
210 }
211 else if ( iframe.contentWindow )
212 { /* IE */
213 iframeDoc = iframe.contentWindow.document;
214 }
215
216 var frameID = (numDocOIDs - docOIDs.length);
217 var progressPercent = frameID/numDocOIDs * 100;
218
219 \$(function() {
220 \$('#progressbar').progressbar({ value: progressPercent })
221 });
222
223 var gsContent = iframeDoc.getElementById("gs_content");
224
225 /*
226 var gsContentPos = getElementPosition(gsContent);
227 var pxl = gsContentPos.xl;
228 var pxr = gsContentPos.xr;
229 */
230
231 //get assocfilepath from xml
232 var xmlUrl = iframe.src + "&o=xml";
233
234 var assoc = getAssocFilePath(xmlUrl);
235
236 var expFrameTree = htmlToExpeditee(gsContent);
237
238 var expFrame = JSON.stringify(expFrameTree);
239 //alert(expFrame);
240
241 var collect = document.getElementById("collect").value;
242 var site = document.getElementById("site").value;
243
244 var url = "cgi-bin/html-to-expeditee.pl";
245 var params = "c=" + collect;
246 if (site.match(/\\w/)) {
247 params += "&site=" + site;
248 }
249
250 params += "&a=generate-frame&fn=" + frameID;
251 params += "&json=" + escape(expFrame);
252
253 //add an assocfilepath parameter
254
255 if(assoc !== null && assoc !== undefined){
256 params += "&assoc=" + assoc;
257 }
258
259 var clHtml = urlPostSync(url,params);
260
261 if (!clHtml.match(/html-to-expeditee saved frame/)) {
262 alert("Error processing url: " + url);
263 }
264
265 if (docOIDs.length>0) {
266 var docOID = docOIDs.shift();
267
268 // console.log("doc oid = " + docOID);
269
270 // is the following line used ????
271 var cl = document.getElementById("cl").value;
272
273 var gs2 = $isGSDL2;
274 var url;
275
276 if (gs2) {
277 url = "library.cgi";
278 url += "?c="+collect +"&a=d&d=" + docOID;
279 }
280 else {
281 url = "library";
282 url += "?c="+collect +"&a=d&d=" + docOID;
283
284 /** Refer to comment earlier in this code in regards to the commented out line below.**/
285 //url += "&ed=1&dt=hierarchy";
286 // url += "&excerptid=gs_content";
287 }
288
289 //url += "&p.showExpediteeAttributes=1";
290
291 iframe.src = url;
292 }
293 else {
294 var progressbar = document.getElementById("progressbar");
295 progressbar.style.display = "none";
296
297 iframe.style.display = "none";
298 delete iframe.src;
299 }
300 }
301
302 }
303
304 function getAssocFilePath(xmlUrl){
305 var assoc = "";
306
307 \$.ajax({
308 type: "GET",
309 async: false,
310 url: xmlUrl,
311 dataType: "xml",
312 success: function(xml){
313
314
315 \$(xml).find('metadata').each(function(){
316
317 var name = \$(this).attr('name');
318
319 if(name === 'assocfilepath'){
320 if(assoc === ""){
321 assoc = \$(this).text();
322 //console.log(assoc);
323 }
324
325 }
326 });
327
328 }
329
330 });
331
332 return assoc;
333 }
334
335 </script>
336
337 <div id="progressbar" width="100%"
338 style="display: none; margin: 10px; height: 10px;"></div>
339 <div id="workingTraverse" style="display: none"></div>
340
341 <hr style="margin: 10px;">
342
343 <iframe width="100%" id="iframe" style="display: none;"
344 onload="pageLoaded()"></iframe>
345
346 </body>
347</html>
348EOT
349
350 print "Content-type:text/html\n\n";
351 print $html_form;
352}
353
354sub main
355{
356
357# $ENV{'QUERY_STRING'} = "a=...";set-import-metadata&c=espresso-music&d=HASH012d6f72cde5dc48162f4a1d.1&metaname=annotation&metapos=0&metavalue=adfadfad";
358# $ENV{'REQUEST_METHOD'} = "GET";
359
360 my $gsdl_cgi = new gsdlCGI();
361
362 # Load the Greenstone modules that we need to use
363 $gsdl_cgi->setup_gsdl();
364
365 my $gsdlhome = $ENV{'GSDLHOME'};
366 $gsdl_cgi->checked_chdir($gsdlhome);
367
368 require cgiactions::HtmlToExpediteeAction;
369
370 # Useful debug statement for seeing what packages have been included
371#### printf("%-45s%-s\n",$_,$INC{$_}) foreach (sort keys %INC);
372
373 $gsdl_cgi->parse_cgi_args();
374
375 # We don't want the gsdlCGI module to return errors and warnings in XML
376 $gsdl_cgi->{'xml'} = 0;
377
378 my $fn = $gsdl_cgi->clean_param("fn"); # frame number
379
380 if (defined $fn) {
381
382 my $action = new HtmlToExpediteeAction($gsdl_cgi,$iis6_mode);
383
384 $action->do_action();
385 }
386 else {
387 # generate form, pre-filled out with any useful values such
388 # as the collection and classifier value
389
390 my $collect = $gsdl_cgi->clean_param("collect");
391 my $cl = $gsdl_cgi->clean_param("cl");
392
393 # Establish collect_dir using defining 'site' along the way if GS3
394
395 my $site = undef;
396 my $isGSDL2 = undef;
397
398 if ($gsdl_cgi->greenstone_version() == 2) {
399 $isGSDL2 = 1;
400 }
401 else {
402 $isGSDL2 = 0;
403
404 # GS3 (and possible future versions) make use of 'site'
405 $site = $gsdl_cgi->clean_param("site");
406 if (!defined $site) {
407 $gsdl_cgi->generate_error("No site specified.");
408 }
409 }
410
411 generate_html_form($isGSDL2,$site,$collect,$cl);
412 }
413}
414
415
416
417&main();
Note: See TracBrowser for help on using the repository browser.