source: other-projects/meddle/trunk/meddle-fixup.js

Last change on this file was 31684, checked in by davidb, 7 years ago

Code tidyup

File size: 14.6 KB
Line 
1/**
2 * MEDDLE version 0.9
3 * Copyright (C) 2017 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19function trim(str)
20{
21 return str.replace(/^\s\s*/, '').replace(/\s\s*$/, '');
22}
23
24
25// Ligature lookup info:
26// https://en.wikipedia.org/wiki/List_of_precomposed_Latin_characters_in_Unicode
27// https://en.wikipedia.org/wiki/Typographic_ligature#Ligatures_in_Unicode_.28Latin_alphabets.29
28
29
30var ligature_map = {
31
32 "\uA732": "AA" ,
33 "\uA733": "aa" ,
34 "\u00C6": "AE" ,
35 "\u00E6": "ae" ,
36 "\uA734": "AO" ,
37 "\uA735": "ao" ,
38 "\uA736": "AU" ,
39 "\uA737": "au" ,
40 "\uA738": "AV" ,
41 "\uA739": "av" ,
42 // as above but with bar
43 "\uA73A": "AV" ,
44 "\uA73B": "av" ,
45 "\uA73C": "AY" ,
46 "\uA73D": "ay" ,
47 "\u1F670": "et" ,
48 "\uFB00": "ff" ,
49 "\uFB03": "ffi" ,
50 "\uFB04": "ffl" ,
51 "\uFB01": "fi" ,
52 "\uFB02": "fl" ,
53 "\u0152": "OE" ,
54 "\u0153": "oe" ,
55 "\uA74E": "OO" ,
56 "\uA74F": "oo" ,
57 "\u00DF": "Å¿s" ,
58 "\u00DF": "Å¿z" ,
59 "\uFB06": "st" ,
60 "\uFB05": "Å¿t" ,
61 "\uA728": "TZ" ,
62 "\uA729": "tz" ,
63 "\u1D6B": "ue" ,
64 "\uAB50": "ui" ,
65 "\uA760": "VY" ,
66 "\uA761": "vy"
67};
68
69var ligature_re = null;
70
71var accented = {
72 'a': '[Aa\xaa\xc0-\xc5\xe0-\xe5\u0100-\u0105\u01cd\u01ce\u0200-\u0203\u0226\u0227\u1d2c\u1d43\u1e00\u1e01\u1e9a\u1ea0-\u1ea3\u2090\u2100\u2101\u213b\u249c\u24b6\u24d0\u3371-\u3374\u3380-\u3384\u3388\u3389\u33a9-\u33af\u33c2\u33ca\u33df\u33ff\uff21\uff41]',
73 'b': '[Bb\u1d2e\u1d47\u1e02-\u1e07\u212c\u249d\u24b7\u24d1\u3374\u3385-\u3387\u33c3\u33c8\u33d4\u33dd\uff22\uff42]',
74 'c': '[Cc\xc7\xe7\u0106-\u010d\u1d9c\u2100\u2102\u2103\u2105\u2106\u212d\u216d\u217d\u249e\u24b8\u24d2\u3376\u3388\u3389\u339d\u33a0\u33a4\u33c4-\u33c7\uff23\uff43]',
75 'd': '[Dd\u010e\u010f\u01c4-\u01c6\u01f1-\u01f3\u1d30\u1d48\u1e0a-\u1e13\u2145\u2146\u216e\u217e\u249f\u24b9\u24d3\u32cf\u3372\u3377-\u3379\u3397\u33ad-\u33af\u33c5\u33c8\uff24\uff44]',
76 'e': '[Ee\xc8-\xcb\xe8-\xeb\u0112-\u011b\u0204-\u0207\u0228\u0229\u1d31\u1d49\u1e18-\u1e1b\u1eb8-\u1ebd\u2091\u2121\u212f\u2130\u2147\u24a0\u24ba\u24d4\u3250\u32cd\u32ce\uff25\uff45]',
77 'f': '[Ff\u1da0\u1e1e\u1e1f\u2109\u2131\u213b\u24a1\u24bb\u24d5\u338a-\u338c\u3399\ufb00-\ufb04\uff26\uff46]',
78 'g': '[Gg\u011c-\u0123\u01e6\u01e7\u01f4\u01f5\u1d33\u1d4d\u1e20\u1e21\u210a\u24a2\u24bc\u24d6\u32cc\u32cd\u3387\u338d-\u338f\u3393\u33ac\u33c6\u33c9\u33d2\u33ff\uff27\uff47]',
79 'h': '[Hh\u0124\u0125\u021e\u021f\u02b0\u1d34\u1e22-\u1e2b\u1e96\u210b-\u210e\u24a3\u24bd\u24d7\u32cc\u3371\u3390-\u3394\u33ca\u33cb\u33d7\uff28\uff48]',
80 'i': '[Ii\xcc-\xcf\xec-\xef\u0128-\u0130\u0132\u0133\u01cf\u01d0\u0208-\u020b\u1d35\u1d62\u1e2c\u1e2d\u1ec8-\u1ecb\u2071\u2110\u2111\u2139\u2148\u2160-\u2163\u2165-\u2168\u216a\u216b\u2170-\u2173\u2175-\u2178\u217a\u217b\u24a4\u24be\u24d8\u337a\u33cc\u33d5\ufb01\ufb03\uff29\uff49]',
81 'j': '[Jj\u0132-\u0135\u01c7-\u01cc\u01f0\u02b2\u1d36\u2149\u24a5\u24bf\u24d9\u2c7c\uff2a\uff4a]',
82 'k': '[Kk\u0136\u0137\u01e8\u01e9\u1d37\u1d4f\u1e30-\u1e35\u212a\u24a6\u24c0\u24da\u3384\u3385\u3389\u338f\u3391\u3398\u339e\u33a2\u33a6\u33aa\u33b8\u33be\u33c0\u33c6\u33cd-\u33cf\uff2b\uff4b]',
83 'l': '[Ll\u0139-\u0140\u01c7-\u01c9\u02e1\u1d38\u1e36\u1e37\u1e3a-\u1e3d\u2112\u2113\u2121\u216c\u217c\u24a7\u24c1\u24db\u32cf\u3388\u3389\u33d0-\u33d3\u33d5\u33d6\u33ff\ufb02\ufb04\uff2c\uff4c]',
84 'm': '[Mm\u1d39\u1d50\u1e3e-\u1e43\u2120\u2122\u2133\u216f\u217f\u24a8\u24c2\u24dc\u3377-\u3379\u3383\u3386\u338e\u3392\u3396\u3399-\u33a8\u33ab\u33b3\u33b7\u33b9\u33bd\u33bf\u33c1\u33c2\u33ce\u33d0\u33d4-\u33d6\u33d8\u33d9\u33de\u33df\uff2d\uff4d]',
85 'n': '[Nn\xd1\xf1\u0143-\u0149\u01ca-\u01cc\u01f8\u01f9\u1d3a\u1e44-\u1e4b\u207f\u2115\u2116\u24a9\u24c3\u24dd\u3381\u338b\u339a\u33b1\u33b5\u33bb\u33cc\u33d1\uff2e\uff4e]',
86 'o': '[Oo\xba\xd2-\xd6\xf2-\xf6\u014c-\u0151\u01a0\u01a1\u01d1\u01d2\u01ea\u01eb\u020c-\u020f\u022e\u022f\u1d3c\u1d52\u1ecc-\u1ecf\u2092\u2105\u2116\u2134\u24aa\u24c4\u24de\u3375\u33c7\u33d2\u33d6\uff2f\uff4f]',
87 'p': '[Pp\u1d3e\u1d56\u1e54-\u1e57\u2119\u24ab\u24c5\u24df\u3250\u3371\u3376\u3380\u338a\u33a9-\u33ac\u33b0\u33b4\u33ba\u33cb\u33d7-\u33da\uff30\uff50]',
88 'q': '[Qq\u211a\u24ac\u24c6\u24e0\u33c3\uff31\uff51]',
89 'r': '[Rr\u0154-\u0159\u0210-\u0213\u02b3\u1d3f\u1d63\u1e58-\u1e5b\u1e5e\u1e5f\u20a8\u211b-\u211d\u24ad\u24c7\u24e1\u32cd\u3374\u33ad-\u33af\u33da\u33db\uff32\uff52]',
90 's': '[Ss\u015a-\u0161\u017f\u0218\u0219\u02e2\u1e60-\u1e63\u20a8\u2101\u2120\u24ae\u24c8\u24e2\u33a7\u33a8\u33ae-\u33b3\u33db\u33dc\ufb06\uff33\uff53]',
91 't': '[Tt\u0162-\u0165\u021a\u021b\u1d40\u1d57\u1e6a-\u1e71\u1e97\u2121\u2122\u24af\u24c9\u24e3\u3250\u32cf\u3394\u33cf\ufb05\ufb06\uff34\uff54]',
92 'u': '[Uu\xd9-\xdc\xf9-\xfc\u0168-\u0173\u01af\u01b0\u01d3\u01d4\u0214-\u0217\u1d41\u1d58\u1d64\u1e72-\u1e77\u1ee4-\u1ee7\u2106\u24b0\u24ca\u24e4\u3373\u337a\uff35\uff55]',
93 'v': '[Vv\u1d5b\u1d65\u1e7c-\u1e7f\u2163-\u2167\u2173-\u2177\u24b1\u24cb\u24e5\u2c7d\u32ce\u3375\u33b4-\u33b9\u33dc\u33de\uff36\uff56]',
94 'w': '[Ww\u0174\u0175\u02b7\u1d42\u1e80-\u1e89\u1e98\u24b2\u24cc\u24e6\u33ba-\u33bf\u33dd\uff37\uff57]',
95 'x': '[Xx\u02e3\u1e8a-\u1e8d\u2093\u213b\u2168-\u216b\u2178-\u217b\u24b3\u24cd\u24e7\u33d3\uff38\uff58]',
96 'y': '[Yy\xdd\xfd\xff\u0176-\u0178\u0232\u0233\u02b8\u1e8e\u1e8f\u1e99\u1ef2-\u1ef9\u24b4\u24ce\u24e8\u33c9\uff39\uff59]',
97 'z': '[Zz\u0179-\u017e\u01f1-\u01f3\u1dbb\u1e90-\u1e95\u2124\u2128\u24b5\u24cf\u24e9\u3390-\u3394\uff3a\uff5a]'
98};
99
100function accent_fold(search_string)
101{
102 // tidy up any pre- or post- whitespace
103 search_string = trim(search_string);
104
105 // protect any string literals that are special RE characters
106 search_string = search_string.replace(/([|\(\)\[\{\}.+*?^$\\])/g,"\\$1");
107
108 // replace characters by their compositors
109 var accent_replacer = function(chr) {
110 if (chr.charCodeAt(0)>128) {
111 for (var base_c in accented) {
112 var letter_re = new RegExp(accented[base_c],'gi');
113 if (letter_re.test(chr)) {
114 // Got accented char
115 // => downgrade to base character
116 chr = base_c;
117 }
118 // otherwise, leave it alone
119 }
120 }
121 return chr;
122 }
123
124 search_string = search_string.replace(/\S/g,accent_replacer);
125 search_string = search_string.replace(/\\([|\(\)\[\{\}.+*?^$\\])/g,"$1");
126
127 search_string = search_string.replace(/\s+/g," ");
128
129 return search_string;
130}
131
132
133function opt_ligature_replacement(query_box_val)
134{
135
136 if (ligature_re === null) {
137 var ligature_str = Object.keys(ligature_map).join("|");
138 ligature_re = new RegExp(ligature_str);
139 }
140
141 var detected_ligatures = ligature_re.test(query_box_val);
142 var do_replacement = $('#meddle-lig').is(":checked");
143
144 if (!do_replacement) {
145 // option not on, but maybe the user hasn't realized their query contains ligatures
146 if (detected_ligatures) {
147 do_replacement = confirm("Detected ligatures in query. Replace with individual characters?");
148 }
149 }
150
151 if (do_replacement) {
152 var query_chars = query_box_val.split("");
153 var query_chars_fixed = [];
154
155 for (var i=0; i<query_chars.length; i++) {
156 var c = query_chars[i];
157 if (ligature_map[c]) {
158 c = ligature_map[c];
159 }
160 query_chars_fixed.push(c);
161 }
162 query_box_val = query_chars_fixed.join("");
163 }
164
165 return query_box_val;
166}
167
168function opt_accent_fold(query_box_val)
169{
170
171 var do_accent_replacement = $('#meddle-acc').is(":checked");
172
173 if (do_accent_replacement) {
174 query_box_val = accent_fold(query_box_val);
175 }
176
177 return query_box_val;
178}
179
180
181function fixupEncodeInput(form) {
182 console.log( "amcm-dl-fixup fixupEncodeInput(form) called" );
183
184 var query_box_val = form.elements['query'].value;
185 query_box_val = opt_ligature_replacement(query_box_val);
186
187
188 query_box_val = opt_accent_fold(query_box_val);
189
190 // e.g., content.ftsec:(+capisco)
191
192 var query_box_val = query_box_val.replace(/^\s+/,"");
193 var query_box_words = query_box_val.split(/\s+/);
194
195 var query_box_val_plus = "";
196 var query_box_val_double = "";
197
198 for (var i=0; i<query_box_words.length; i++) {
199 if (i>0) {
200 query_box_val_plus += " ";
201 }
202 query_box_val_plus += "+" + query_box_words[i];
203
204 if (i>0) {
205 query_box_val_double += " " + query_box_words[i-1]+query_box_words[i];
206 }
207 }
208
209
210 form.elements['query'].value = "content.ftsec:(" + query_box_val_plus + query_box_val_double + ")";
211
212
213 return encodeInput(form);
214}
215
216function fixupSubmitIt() {
217 var search_where = $("#fld0 option:selected").text();
218 var x = 0;
219 var search_select = $("#fld"+x+" option:selected");
220 var search_where = search_select.text();
221
222 while (search_where) {
223 if (search_where == "Any field") {
224
225 $("#fld"+x).val('content.ftsec:');
226 }
227 x = x + 1;
228 search_select = $("#fld"+x+" option:selected");
229 search_where = search_select.text();
230 }
231 buildQuery();
232
233 return submitIt();
234}
235
236function meddleInfoBox() {
237
238 // Icon image
239 // By VistaICO.com (VistaICO Toolbar Icons) [CC BY 3.0 ]
240
241
242 var dlcontextoptions = '<input id="meddle-lig" name="meddle-lig" type="checkbox"><label for="meddle-lig" style="display: inline">&nbsp;Ligature&nbsp;expansion</label>&nbsp;&nbsp;<input id="meddle-acc" name="meddle-acc" type="checkbox"><label for="meddle-acc" style="display: inline">&nbsp;Accent&nbsp;folding</label>';
243
244
245 //console.log("*** location = " + window.location.pathname);
246 var detected_dl = "";
247
248 var domain_match = window.location.pathname.match(/nph-proxy-meddle.cgi\/.*?https?\/(.*)$/);
249
250 if (domain_match.length>1) {
251 var real_domain = domain_match[1];
252
253 if (real_domain.match(/^dl\.acm\.org\//)) {
254 detected_dl = "ACM DL (Meddled with to provide: full-text quicksearch, word-wrap protection)";
255 }
256 else if (real_domain.match(/^scholar\.google\.[a-z]+/)) {
257 detected_dl = "Google Scholar";
258 }
259 else if (real_domain.match(/^www.springer\.com/)) {
260 detected_dl = "Springer";
261 }
262 else if (real_domain.match(/^ieeexplore\.ieee\.org\//)) {
263 detected_dl = "IEEE Xplore";
264 }
265 }
266
267 if (detected_dl !== "") {
268 $('#dlcontext').html("<i>Detected:" + detected_dl + "</i>");
269 }
270
271 $('#dlcontextoptions').html('<font size="-1">'+dlcontextoptions+'</font>');
272
273 return detected_dl;
274}
275
276
277function acmdlFixupInit() {
278
279 console.log("amc-dl-fixup initiated");
280
281 var query_form = document.getElementsByName("qiksearch")[0];
282 if (query_form) {
283 query_form.setAttribute("onsubmit", "fixupEncodeInput(this) ; _proxy_jslib_flush_write_buffers();");
284 }
285
286 var adv_query_form = document.getElementById("theform");
287 if (adv_query_form) {
288 // javascript:submitIt();
289 // javascript:; _proxy_jslib_proxify_html(submitIt())[0]
290
291 adv_query_form.setAttribute("action","javascript:fixupSubmitIt();");
292 }
293
294 var query_box = document.getElementsByName("query")[0];
295 if (query_box) {
296 var query_box_val = query_box.getAttribute("value");
297 query_box.setAttribute("value", query_box_val.replace(/content\.ftsec:\(\+(.*)\)/g,"$1"));
298 }
299
300 console.log("acm-dl-fixup DOM tweaks done");
301}
302
303
304function fixupGScholarSubmit(evt) {
305 console.log( "gscholar-dl-fixup fixupGScholarSubmit(form) called" );
306
307 var event = evt || window.event;
308 form = event.target.id;
309
310 event.preventDefault(); // to stop the form from submitting
311
312 var query_box = document.getElementById("gs_hp_tsi");
313 var query_box_val = query_box.value;
314
315 query_box_val = opt_ligature_replacement(query_box_val);
316
317 query_box_val = opt_accent_fold(query_box_val);
318
319 query_box.value = query_box_val;
320
321 _proxy_jslib_flush_write_buffers();
322
323 var query_form = document.getElementsByName("f")[0];
324 query_form.submit();
325}
326
327function gscholarFixupInit() {
328
329 console.log("gscholar-fixup initiated");
330
331 var query_form = document.getElementsByName("f")[0];
332 if (query_form) {
333
334 query_form.onsubmit = fixupGScholarSubmit;
335
336 }
337}
338
339
340function fixupXploreSubmit(evt) {
341 console.log( "xplore-dl-fixup fixupXploreSubmit(evt) called" );
342
343 var event = evt || window.event;
344 event.preventDefault(); // to stop the form from submitting
345
346 var query_box = document.getElementById("input-basic");
347 var query_box_val = query_box.value;
348
349 query_box_val = opt_ligature_replacement(query_box_val);
350
351/*
352 var do_accent_replacement = $('#meddle-acc').is(":checked");
353 if (do_accent_replacement) {
354 query_box_val = accent_fold(query_box_val);
355 }
356*/
357
358 query_box_val = opt_accent_fold(query_box_val);
359
360 query_box.value = query_box_val;
361
362 var query_form = document.getElementById("search_form");
363
364 _proxy_jslib_flush_write_buffers(); // is this needed?
365 query_form.submit();
366
367
368}
369
370
371function xploreFixupInit() {
372
373 console.log("IEEE-Xpore DOM tweaks initiated");
374
375 var query_form = document.getElementById("search_form");
376 if (query_form) {
377 query_form.onsubmit = fixupXploreSubmit;
378 }
379
380 console.log("IEEE-Xplore DOM tweaks done");
381}
382
383
384
385function fixupSpringerSubmit(evt) {
386 console.log( "spring-dl-fixup fixupSpringerSubmit(evt) called" );
387
388 var event = evt || window.event;
389 event.preventDefault(); // to stop the form from submitting
390
391 var query_box = document.getElementById("query");
392 var query_box_val = query_box.value;
393
394 query_box_val = opt_ligature_replacement(query_box_val);
395
396 query_box_val = opt_accent_fold(query_box_val);
397
398 query_box.value = query_box_val;
399
400 var query_form = document.getElementById("global-search");
401
402 _proxy_jslib_flush_write_buffers(); // is this needed?
403 document.createElement('form').submit.call(query_form);
404
405}
406
407
408function fixupSpringerInit() {
409
410 console.log("Springer DOM tweaks initiated");
411
412 var query_form = document.getElementById("global-search");
413 if (query_form) {
414 query_form.onsubmit = fixupSpringerSubmit;
415 }
416
417 console.log("Springer DOM tweaks done");
418}
419
420
421
422$(document).ready(function() {
423 console.log( "jquery DOM ready" );
424 var detected_dl = meddleInfoBox();
425
426 if (detected_dl.match(/^ACM DL/)) {
427 acmdlFixupInit();
428 }
429 else if (detected_dl.match(/^Google Scholar/)) {
430 gscholarFixupInit();
431 }
432 //xploreFixupInit();
433 else if (detected_dl.match(/^Springer/)) {
434 fixupSpringerInit();
435 }
436
437});
438
Note: See TracBrowser for help on using the repository browser.