source: gs3-extensions/seaweed-debug/trunk/src/WhitespaceUtil.js@ 28098

Last change on this file since 28098 was 25160, checked in by sjm84, 12 years ago

Initial cut at a version of seaweed for debugging purposes. Check it out live into the web/ext folder

File size: 31.9 KB
Line 
1/*
2 * file: WhitespaceUtil.js
3 *
4 * @BEGINLICENSE
5 * Copyright 2010 Brook Novak (email : [email protected])
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 * @ENDLICENSE
18 */
19
20bootstrap.provides("WhitespaceUtil");
21
22var _consolidateWSSeqs, _normalizeNBSP, _convertWSToNBSP;
23
24/*
25 * @see http://www.w3.org/TR/html401/struct/text.html
26 */
27(function() {
28
29 /* Elements which can be physically separated by white space. */
30 var breakableElements = $createLookupMap("button,img,iframe,map,object"),
31
32 /* Inline elements which cannot be regarded as part of a whitespace sequence. */
33 nonWSInlineElements = $createLookupMap("br,button,img,iframe,map,object,select,textarea,applet");
34
35 /**
36 * This does not create any undoable operations.
37 *
38 * @param {Node} targetNode A node to convert all whitespaces to NBSP entities in
39 * text nodes which can support non whitespace and has normal whitespace breaking
40 */
41 _convertWSToNBSP = function(targetNode) {
42 _visitTextNodes(targetNode, targetNode, true, function(textNode) {
43 if (_doesTextSupportNonWS(textNode) && getWSStyle(textNode) == "normal")
44 textNode.nodeValue = textNode.nodeValue.replace(/[\t\n\r ]/g, _NBSP);
45 });
46 };
47
48 /**
49 * Consolidates white space. This creates undoable operations.
50 *
51 * @param {Node} targetNode The DOM node and all it's descendants to consolidate.
52 *
53 * @param {Boolean} extendRange If the first text node begins with whitespace, then the first
54 * whitespace sequence may start before the target node. If the last text
55 * node in the range ends with whitespace, then the last whitespace sequence
56 * may end after the target node. Set to true to allow consolidation outside of
57 * the target node, false will truncate whitespace sequences within the target node.
58 *
59 */
60 _consolidateWSSeqs = function (targetNode, extendRange) {
61
62 // Get the first text node within target node - that is editable
63 var ftn;
64 _visitTextNodes(targetNode, targetNode, true, function(textNode){
65 if (_doesTextSupportNonWS(textNode) && _nodeLength(textNode) > 0) {
66 ftn = textNode;
67 return false;
68 }
69 });
70
71 // If there are no text nodes then there is nothing to consolidate
72 if (!ftn) return;
73
74 // If the first text node contains a whitespace... extend range backward...
75 // possibly before the targetnode... to ensure that all preceeding whitespace
76 // that is part of the first node/index sequence is included. May over estimate but
77 // that is ok.
78 var currentNode = targetNode;
79 var ignorePreceedingWS = false;
80
81 if (extendRange && _isAllWhiteSpace(ftn.nodeValue.charAt(0))) {
82
83 _visitAllNodes(null, ftn, false, function(domNode) {
84
85 // Skip start node
86 if (domNode == ftn) return;
87
88 if (domNode.nodeType == Node.TEXT_NODE) {
89
90 // Text nodes that do not support not whitespace shouldn't be consolidated...
91 if (!_doesTextSupportNonWS(domNode))
92 return false;
93
94 // Adjust new node to start consolidating from
95 currentNode = domNode;
96
97 // If the text node contains a nonWS charactor then the range has been extended enough
98 if (!_isAllWhiteSpace(domNode.nodeValue)) {
99 // Set flag to ignore any preceeding whitespace at the starting node (see later)
100 ignorePreceedingWS = true;
101 return false;
102 }
103
104 }
105
106 // If the node is not inline, then WS sequences can't spill over these
107 else if (!_isInlineLevel(domNode)) return false;
108
109 });
110 }
111
112 var seenTargetNode = _isAncestor(targetNode, currentNode),
113 currentIndex = 0;
114
115 // Keep traversing through the target node's descendants until all whitespace sequences are
116 // consolidated or completely removed
117 while(currentNode) {
118
119 // Get the next whitespace sequence
120 var seq = nextWSSequence(currentNode, currentIndex, ignorePreceedingWS, targetNode, seenTargetNode, false, false, extendRange);
121 ignorePreceedingWS = false;
122 seenTargetNode = seq.seenTargetNode;
123 currentNode = seq.resumeNode;
124 currentIndex = seq.resumeIndex;
125
126 // Was there a whitespace sequence? if so, and the sequence is not using "pre" wrapping then
127 // there might be something to consolidate
128 if (seq.startNode && getWSStyle(seq.startNode) != "pre") {
129
130 // If the whitespace sequence breaks two inline/text elements apart, then adjust the range
131 // so that it leaves one whitespace behind
132 if (isBreaker(seq.startNode, seq.startIndex, seq.endNode, seq.endIndex)) {
133
134 // If the whitespace sequence is just one in length, then there is nothing to consolidate
135 if (seq.startNode == seq.endNode && seq.startIndex == (seq.endIndex - 1)) {
136 seq.startNode = null;
137 } else {
138
139 // Increment start node / index by one whitespace to leave one white space behind
140 // If the start index is larger/equal to the start nodes text length,
141 // the fragment range will include the start node, but exclude it from removal.
142 seq.startIndex++;
143 }
144 }
145
146 // Is there anything to consolidate?
147 if (seq.startNode) {
148
149 // Create the fragment and disconnect it from the document
150 var seqFrag = _buildFragment(_getCommonAncestor(seq.startNode, seq.endNode, false), seq.startNode, seq.startIndex, seq.endNode, seq.endIndex);
151 seqFrag.disconnect();
152
153 // Keep the current node/index pointer updated
154 var updateTargetNode = currentNode == targetNode;
155 if (currentNode) {
156
157 var startFrag = seqFrag.getStartFragment(),
158 endFrag = seqFrag.getEndFragment(),
159 updated = false,
160 wasStartSplit = seqFrag.wasStartSplit(),
161 wasEndSplit = seqFrag.wasEndSplit();
162
163
164 // Is this node the same as the start node of the fragment, and was the start node split?
165 if (currentNode == seq.startNode && wasStartSplit) {
166
167 debug.assert(startFrag.getPreSplitNode() == seq.startNode);
168 debug.assert(_nodeLength(seq.startNode) == seq.startIndex);
169
170 // Does the index need updating?
171 if (currentIndex >= _nodeLength(seq.startNode)) {
172
173 var remTextLen = _nodeLength(startFrag.node);
174
175 // Does the index fall in the removed range?
176 if (currentIndex < (_nodeLength(seq.startNode) + remTextLen))
177
178 // If adjusting left, then simply truncate the index to the end of the start node
179 currentIndex = _nodeLength(seq.startNode) - 1;
180
181 // Was both the end node AND start node split at the same node?.. and the node/index
182 // is pointing in the remaining text (right most)?
183 else if (currentNode == seq.endNode && wasEndSplit) {
184
185 // Adjust the node to become the remaining text
186 currentNode = endFrag.getPostSplitNode()
187
188 // Set the index to become relative to the split end node
189 currentIndex -= (_nodeLength(seq.startNode) + remTextLen);
190
191 } else
192 assert(false);
193 }
194
195 updated = true;
196
197 // Otherwise is this node the same as the end node of the fragment, and was the end node split?
198 } else if (currentNode == seq.endNode && wasEndSplit) {
199
200 var remTextLen = _nodeLength(endFrag.node);
201
202 // Does the index fall outside the removed range?
203 if (currentIndex >= remTextLen) {
204
205 // Adjust the node to become the remaining text
206 currentNode = endFrag.getPostSplitNode();
207
208 // Set the index to become relative to the split end node
209 currentIndex -= remTextLen;
210
211 updated = true;
212
213 // If not, then the node/index should be set to the start or end bounds node/index
214 } else currentNode = null;
215
216 }
217
218 if (!updated) {
219 // Determine if the disconnection of the fragment removed this dom node
220 var wasRemoved;
221 if (currentNode) {
222 wasRemoved = false;
223 seqFrag.visit(function(frag){
224 if (!frag.isShared && frag.node == currentNode) {
225 wasRemoved = true;
226 return false;
227 }
228 });
229 } else
230 wasRemoved = true;
231
232 if (wasRemoved) {
233
234 // If the node was removed, then set the node/index to the starting bounds
235 var frag = startFrag;
236 while (!frag.isShared) {
237 frag = frag.parent;
238 }
239
240 // Set the node to become the first shared node on the starting bound...
241 currentNode = frag.node;
242
243 // The index should be at the end of the start bound if the very-end of start bound still remains in the document,
244 // Otherwise the index should be set to the beggining of the start bound.
245 // It is possible for the very-end of the start fragment to still be included because if the
246 // sequence is a breaker, then the start index can be incremented exclude the start node.
247 currentIndex = frag == startFrag ? _nodeLength(currentNode, 1) : 0;
248
249 // If the shared node contains child nodes, then set the current node to become the child at which the startbounds
250 // proceeded from
251 if (currentNode.childNodes.length > 0 && frag.children.length > 0 && frag.children[0].pos > 0) {
252 currentNode = currentNode.childNodes[frag.children[0].pos - 1];
253 // Set index to the end of selected node
254 currentIndex = _nodeLength(currentNode, 1);
255 }
256 }
257 }
258
259 // If the current node is the same as the target node, the target node
260 // is a text node that has been split - so update this aswell
261 if (currentNode && updateTargetNode) targetNode = currentNode;
262 }
263
264 }
265
266 }
267
268 } // End loop: consolidating whitespaces in target node
269
270 };
271
272
273 /**
274 * Converts any NBSP entities within a given node (and in some cases just outside of the node)
275 * into whitespace, only if the conversion won't collapse the whitespace.
276 *
277 * This will not create any undoable operations
278 *
279 * @param {Node} targetNode The node to normalize all containing non breaking spaces
280 */
281 _normalizeNBSP = function(targetNode) {
282
283 var currentNode = targetNode,
284 currentIndex = 0;
285
286 while (currentNode) {
287
288 // Get the next whitespace sequence.. including NBPS's
289 var seq = nextWSSequence(currentNode, currentIndex, false, targetNode, true, true, true, true);
290 currentNode = seq.resumeNode;
291 currentIndex = seq.resumeIndex;
292
293 // Is there a whitespace sequence?
294 if (seq.startNode) {
295
296 var isWSSeqBreaker = isBreaker(seq.startNode, seq.startIndex, seq.endNode, seq.endIndex),
297 seqTextNodes = [];
298
299 //debug.println("Found ws sequence - wordbreaker=" + isWSSeqBreaker + ", endIndex=" + seq.endIndex);
300
301 // Get all text nodes in the whitespace sequence into an array
302 _visitTextNodes(_getCommonAncestor(seq.startNode, seq.endNode, false), seq.startNode, true, function(textNode) {
303 seqTextNodes.push(textNode);
304 if (textNode == seq.endNode) return false;
305 });
306
307 // For each text node in the whitespace sequence....
308 for (var i = 0; i < seqTextNodes.length; i++) {
309
310 var textNode = seqTextNodes[i];
311
312 // For each charactor in the whitespace sequence
313 for (var index = (i == 0 ? seq.startIndex : 0); index < (i == (seqTextNodes.length - 1) ? seq.endIndex : _nodeLength(textNode)); index++) {
314
315 // debug.println("Checking whitespace at index " + index + "...");
316
317 if (textNode.nodeValue.charAt(index) == _NBSP) {
318
319 // debug.println("Found NBSP at index " + index);
320
321 // Keep NBSP if the NBSP is at the start or end of the sequence, and the sequence is not
322 // a word breaker
323 if (!(!isWSSeqBreaker &&
324 ((i == 0 && index == seq.startIndex) ||
325 (i == (seqTextNodes.length - 1) && index == (seq.endIndex - 1))))) {
326
327 // Keep the NBSP if preceded by a whitespace
328 var ch;
329 if (index == 0)
330 ch = (i > 0) ? seqTextNodes[i - 1].nodeValue.charAt(_nodeLength(seqTextNodes[i - 1]) - 1) : null;
331 else
332 ch = textNode.nodeValue.charAt(index - 1);
333
334 if (!ch || !_isAllWhiteSpace(ch)) {
335
336 // Keep the NBSP if proceeded by a whitespace
337 if (index == (_nodeLength(textNode) - 1))
338 ch = (i < (seqTextNodes.length - 1)) ? seqTextNodes[i + 1].nodeValue.charAt(0) : null;
339 else
340 ch = textNode.nodeValue.charAt(index + 1);
341
342 // debug.println("ch = " + (ch ? ch : "NULL"));
343
344 if (!ch || !_isAllWhiteSpace(ch)) {
345
346 // Otherwise... replace the non breaking space with a whitespace
347
348 // debug.println("Replacing NBSP at index " + index + " (node length = " + _nodeLength(textNode) + ")");
349
350 textNode.nodeValue = textNode.nodeValue.substr(0, index) + " " + textNode.nodeValue.substr(index + 1);
351 }
352 }
353
354 }
355
356 }
357 } // End loop: iterating over whitespaces in ws seqence
358 } // End loop: Iterating over text nodes in ws sequence
359
360 }
361
362 } // End loop: searching for whitespace sequences in target node
363
364 };
365
366 /**
367 * Discovers the start and end points of the next whitespace seqeunce from a given point (inclusive)
368 *
369 * @param {Node} initNode The node to search from (towards the right)
370 * @param {Number} initIndex The index to search from.
371 * @param {Boolean} ignorePreceedingWS True to ignore the initial whitespaces encountered
372 * @param {Node} targetNode The node at which the search should reside within.
373 * @param {Boolean} seenTargetNode Flag as true if the target node has been visited.
374 * @param {Boolean} includeNBSP True to include non breaking spaces as whitespace, false to only count whitespace.
375 * @param {Boolean} ignoreInternalSingleWS True to ignore any single whitespace sequences that are definatly breaking two words apart
376 * @param {Boolean} extendRange True to allow the sequences to go past the target node for boundry cases.
377 *
378 * @return {Object} An object with the following members:
379 * seenTargetNode - true if the target node was encountered.
380 * resumeNode - The node to resume the search for remaining ws sequences in the target node
381 * resumeIndex - The index to resume the search for remaining ws sequences in the target node
382 * startNode - The start node of the sequence. Null if there was none.
383 * startIndex - The start index of the sequence (if there was one)
384 * endNode - The end node of the sequence, if there was one.
385 * endIndex - The end index of the sequence, if there was one.
386 */
387 function nextWSSequence(initNode, initIndex, ignorePreceedingWS, targetNode, seenTargetNode, includeNBSP, ignoreInternalSingleWS, extendRange) {
388
389 var resumeNode = null,
390 resumeIndex = initIndex,
391 startNode, startIndex, endNode, endIndex, startWSStyle, curWSStyle;
392
393 // Locate the next whitespace sequence from the current node onwards (if any).
394 _visitAllNodes(null, initNode, true, function(domNode) {
395
396 // Has the search space exhausted? I.E: Has the traversal gone past the target node's descendants -
397 // and at this point isn't looking for any whitespace to consolidate?
398 if (seenTargetNode && domNode != targetNode && (!startNode || !extendRange) && !_isAncestor(targetNode, domNode)) // Case: if target is text node, then it can split... and prematurely end search
399 return false; // Finished consolidating/removing ws
400
401 // Update flag if domnode is the target node
402 seenTargetNode |= (domNode == targetNode);
403
404 // Set helper: the whitespace CSS style for the current visited dom node
405 curWSStyle = getWSStyle(domNode);
406
407 if (startNode) {
408
409 // If building a whitespace sequence, check to see if the ancestors of the starting node - up to
410 // the common ancestor of the start node and this current node - can be contained in a whitespace
411 // sequence.
412 var ca = _getCommonAncestor(domNode, startNode, false);
413 var ancestors = _getAncestors(startNode, ca, false, false);
414 var terminateSeq = false;
415 for (var i in ancestors) {
416 if (!(_isInlineLevel(ancestors[i]) && !nonWSInlineElements[_nodeName(ancestors[i])])) {
417 terminateSeq = true;
418 break;
419 }
420 }
421
422 // Whitespace sequences cannot contain different breaking mechanisms.
423 if (terminateSeq || curWSStyle != startWSStyle) {
424 resumeNode = domNode;
425 resumeIndex = 0;
426 return false;
427 }
428 }
429
430 if (domNode.nodeType == Node.TEXT_NODE) {
431
432 if (domNode.parentNode.nodeType != Node.COMMENT_NODE) {
433
434 if (!_doesTextSupportNonWS(domNode)) {
435
436 debug.assert(!ignorePreceedingWS);
437
438 // If there is potentially something to consolidate, abort this traversal
439 if (startNode) {
440 // Record current position to resume traversal after consolidation
441 resumeNode = domNode;
442 resumeIndex = 0;
443 return false;
444 }
445
446 } else {
447
448 // Iterate over charactors in the text run
449 while (resumeIndex < _nodeLength(domNode)) {
450 var ch = domNode.nodeValue.charAt(resumeIndex);
451 if (_isAllWhiteSpace(ch) || (includeNBSP && ch == _NBSP)) {
452
453 if (!ignorePreceedingWS) {
454
455 // Note start/end node/index of whitespace sequence
456 if (startNode) {
457 endNode = domNode;
458 endIndex = resumeIndex + 1;
459 } else {
460 startNode = domNode;
461 startWSStyle = curWSStyle;
462 startIndex = resumeIndex;
463 endNode = null;
464 }
465 }
466
467 } else { // Non whitespace charactor
468 ignorePreceedingWS = false;
469
470 // Is there a current sequence that has more than 1 whitespace, or one that resides at the start of
471 // this text run?
472 if (endNode || (startNode && (ignoreInternalSingleWS || startIndex == 0))) {
473
474 // Record current position to resume traversal after consolidation
475 resumeNode = domNode;
476 return false;
477
478 // Ignore any previous single-whitespace sequences, that do not reside at the start of the text run
479 } else
480 startNode = null;
481 }
482
483 resumeIndex++;
484
485 } // End loop: iterating over charactors in text run
486 }
487
488 } else ignorePreceedingWS = false;
489
490 } else { // Not a text node
491
492 ignorePreceedingWS = false;
493
494 if (domNode.nodeType != Node.COMMENT_NODE) {
495 // Whitespace sequences can contain a subset of inline elements.
496 if (startNode &&
497 !(_isInlineLevel(domNode) && !nonWSInlineElements[_nodeName(domNode)])) {
498 resumeNode = domNode;
499 resumeIndex = 0;
500 return false;
501 }
502 }
503
504 // The element at this point can be part of the current whitespace sequence...
505
506 }
507
508 resumeIndex = 0;
509
510 }); // End visit
511
512 // If sequence is one in length, must set end position
513 if (startNode && !endNode) {
514 endNode = startNode;
515 endIndex = startIndex + 1;
516 }
517
518 return {
519 seenTargetNode : seenTargetNode,
520 resumeNode : resumeNode,
521 resumeIndex : resumeIndex,
522 startNode : startNode,
523 startIndex : startIndex,
524 endNode : endNode,
525 endIndex : endIndex
526 };
527
528
529 }
530
531 /**
532 * @param {Node} startNode The starting text node of the whitespace sequence
533 * @param {Number} startIndex The starting index of the whitespace sequence
534 * @param {Node} endNode The ending text node of the whitespace sequence
535 * @param {Number} endIndex The ending index of the whitespace sequence
536 * @return {Boolean} True iff the given whitespace sequence breaks two words/breakable-elements apart.
537 */
538 function isBreaker(startNode, startIndex, endNode, endIndex) {
539
540 var startWSStyle = getWSStyle(startNode);
541
542 // Look to the left
543 if (startIndex == 0) {
544 var found = false;
545 _visitAllNodes(null, startNode, false, function(domNode) {
546
547 if (domNode == startNode) return; // Skip initial text node
548
549 var res = scan(domNode, startNode);
550
551 if (!res && found) {
552 // Check that all ancestors up to and excluding the common ancestor of this dom node
553 // and the start node, are all nodes which are breaked by whitespace
554 var ca = _getCommonAncestor(domNode, startNode, false);
555 var ancestors = _getAncestors(domNode, ca, false, false);
556
557 for (var i in ancestors) {
558 // Reset found flag
559 found = false;
560
561 // Check ancestor if it does not break on whitespace...
562 if (!scan(ancestors[i], startNode) && !found)
563 return false;
564 }
565
566 // Restore flags
567 res = false;
568 found = true;
569
570
571 }
572 return res;
573 });
574
575 if (!found) return false;
576 }
577
578 // Look to the right
579 if (endIndex == _nodeLength(endNode)) {
580 var found = false;
581 _visitAllNodes(null, endNode, true, function(domNode){
582 if (domNode == endNode) return; // Skip initial text node
583
584 var res = scan(domNode, endNode);
585
586 if (!res && found) {
587 // Check that all ancestors up to and excluding the common ancestor of this dom node
588 // and the end node, are all nodes which are breakable by whitespace
589 var ca = _getCommonAncestor(domNode, endNode, false);
590 var ancestors = _getAncestors(endNode, ca, false, false);
591
592 for (var i in ancestors) {
593 // Reset found flag
594 found = false;
595
596 // Check ancestor if it does not break on whitespace...
597 if (!scan(ancestors[i], endNode) && !found)
598 return false;
599 }
600 // Restore flags
601 found = true;
602 res = false;
603
604 }
605
606 return res;
607 });
608
609 if (!found) return false;
610 }
611
612 return true;
613
614 /**
615 * Inner helper function.
616 *
617 * Sets the "found" local to true if domnode is considered breakable (in it's context)
618 *
619 * @param {Node} domNode The node to check
620 * @param {Node} initialNode The end or start node of the scan
621 * @return {Boolean} True to continue scanning, false to abort...a result was found.
622 */
623 function scan(domNode, initialNode) {
624
625 if (domNode.nodeType == Node.TEXT_NODE) {
626 if (_nodeLength(domNode) > 0) {
627 found = _doesTextSupportNonWS(domNode); // Non-WS nodes are not breakable
628 return false;
629 }
630
631 } else if (breakableElements[_nodeName(domNode)]) {
632 found = !_isAncestor(domNode, initialNode); // WS Doesn't break from within breakable nodes to outside of them
633 return false;
634
635 // If hit a block level element or line break before a breakable node, then the sequence must be
636 // leading or trailing text.
637 } else if(_isBlockLevel(domNode) || _nodeName(domNode) == "br")
638 return false;
639
640 // Keep looking...
641 return true;
642 }// End inner scan
643
644 }
645
646 /**
647 * @param {Node} node A node to get it's whitespace CSS style for.
648 * @return {String} The CSS white-space style for the given node,
649 * never null/always is a style.
650 */
651 function getWSStyle(node) {
652
653 var style = _getComputedStyle(node, "white-space");
654
655 if (!style) {
656
657 // Check if descends from PRE
658 do {
659 if (_nodeName(node) == "pre") {
660 style = "pre";
661 break;
662 }
663 node = node.parentNode;
664 } while (node && node.nodeType == Node.ELEMENT_NODE);
665
666 // Set as normal
667 if (!style) style = "normal";
668 }
669
670 return style;
671
672 }
673
674})();
675
Note: See TracBrowser for help on using the repository browser.