Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: gs3-extensions/seaweed-debug/trunk/src/WhitespaceUtil.js@ 28098

Last change on this file since 28098 was 25160, checked in by sjm84, 12 years ago
Initial cut at a version of seaweed for debugging purposes. Check it out live into the web/ext folder
File size: 31.9 KB

Line
1	/*
2	* file: WhitespaceUtil.js
3	*
4	* @BEGINLICENSE
5	* Copyright 2010 Brook Novak (email : [email protected])
6	* This program is free software; you can redistribute it and/or modify
7	* it under the terms of the GNU General Public License as published by
8	* the Free Software Foundation; either version 2 of the License, or
9	* (at your option) any later version.
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	* You should have received a copy of the GNU General Public License
15	* along with this program; if not, write to the Free Software
16	* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17	* @ENDLICENSE
18	*/
19
20	bootstrap.provides("WhitespaceUtil");
21
22	var _consolidateWSSeqs, _normalizeNBSP, _convertWSToNBSP;
23
24	/*
25	* @see http://www.w3.org/TR/html401/struct/text.html
26	*/
27	(function() {
28
29	/* Elements which can be physically separated by white space. */
30	var breakableElements = $createLookupMap("button,img,iframe,map,object"),
31
32	/* Inline elements which cannot be regarded as part of a whitespace sequence. */
33	nonWSInlineElements = $createLookupMap("br,button,img,iframe,map,object,select,textarea,applet");
34
35	/**
36	* This does not create any undoable operations.
37	*
38	* @param {Node} targetNode A node to convert all whitespaces to NBSP entities in
39	* text nodes which can support non whitespace and has normal whitespace breaking
40	*/
41	_convertWSToNBSP = function(targetNode) {
42	_visitTextNodes(targetNode, targetNode, true, function(textNode) {
43	if (_doesTextSupportNonWS(textNode) && getWSStyle(textNode) == "normal")
44	textNode.nodeValue = textNode.nodeValue.replace(/[\t\n\r ]/g, _NBSP);
45	});
46	};
47
48	/**
49	* Consolidates white space. This creates undoable operations.
50	*
51	* @param {Node} targetNode The DOM node and all it's descendants to consolidate.
52	*
53	* @param {Boolean} extendRange If the first text node begins with whitespace, then the first
54	* whitespace sequence may start before the target node. If the last text
55	* node in the range ends with whitespace, then the last whitespace sequence
56	* may end after the target node. Set to true to allow consolidation outside of
57	* the target node, false will truncate whitespace sequences within the target node.
58	*
59	*/
60	_consolidateWSSeqs = function (targetNode, extendRange) {
61
62	// Get the first text node within target node - that is editable
63	var ftn;
64	_visitTextNodes(targetNode, targetNode, true, function(textNode){
65	if (_doesTextSupportNonWS(textNode) && _nodeLength(textNode) > 0) {
66	ftn = textNode;
67	return false;
68	}
69	});
70
71	// If there are no text nodes then there is nothing to consolidate
72	if (!ftn) return;
73
74	// If the first text node contains a whitespace... extend range backward...
75	// possibly before the targetnode... to ensure that all preceeding whitespace
76	// that is part of the first node/index sequence is included. May over estimate but
77	// that is ok.
78	var currentNode = targetNode;
79	var ignorePreceedingWS = false;
80
81	if (extendRange && _isAllWhiteSpace(ftn.nodeValue.charAt(0))) {
82
83	_visitAllNodes(null, ftn, false, function(domNode) {
84
85	// Skip start node
86	if (domNode == ftn) return;
87
88	if (domNode.nodeType == Node.TEXT_NODE) {
89
90	// Text nodes that do not support not whitespace shouldn't be consolidated...
91	if (!_doesTextSupportNonWS(domNode))
92	return false;
93
94	// Adjust new node to start consolidating from
95	currentNode = domNode;
96
97	// If the text node contains a nonWS charactor then the range has been extended enough
98	if (!_isAllWhiteSpace(domNode.nodeValue)) {
99	// Set flag to ignore any preceeding whitespace at the starting node (see later)
100	ignorePreceedingWS = true;
101	return false;
102	}
103
104	}
105
106	// If the node is not inline, then WS sequences can't spill over these
107	else if (!_isInlineLevel(domNode)) return false;
108
109	});
110	}
111
112	var seenTargetNode = _isAncestor(targetNode, currentNode),
113	currentIndex = 0;
114
115	// Keep traversing through the target node's descendants until all whitespace sequences are
116	// consolidated or completely removed
117	while(currentNode) {
118
119	// Get the next whitespace sequence
120	var seq = nextWSSequence(currentNode, currentIndex, ignorePreceedingWS, targetNode, seenTargetNode, false, false, extendRange);
121	ignorePreceedingWS = false;
122	seenTargetNode = seq.seenTargetNode;
123	currentNode = seq.resumeNode;
124	currentIndex = seq.resumeIndex;
125
126	// Was there a whitespace sequence? if so, and the sequence is not using "pre" wrapping then
127	// there might be something to consolidate
128	if (seq.startNode && getWSStyle(seq.startNode) != "pre") {
129
130	// If the whitespace sequence breaks two inline/text elements apart, then adjust the range
131	// so that it leaves one whitespace behind
132	if (isBreaker(seq.startNode, seq.startIndex, seq.endNode, seq.endIndex)) {
133
134	// If the whitespace sequence is just one in length, then there is nothing to consolidate
135	if (seq.startNode == seq.endNode && seq.startIndex == (seq.endIndex - 1)) {
136	seq.startNode = null;
137	} else {
138
139	// Increment start node / index by one whitespace to leave one white space behind
140	// If the start index is larger/equal to the start nodes text length,
141	// the fragment range will include the start node, but exclude it from removal.
142	seq.startIndex++;
143	}
144	}
145
146	// Is there anything to consolidate?
147	if (seq.startNode) {
148
149	// Create the fragment and disconnect it from the document
150	var seqFrag = _buildFragment(_getCommonAncestor(seq.startNode, seq.endNode, false), seq.startNode, seq.startIndex, seq.endNode, seq.endIndex);
151	seqFrag.disconnect();
152
153	// Keep the current node/index pointer updated
154	var updateTargetNode = currentNode == targetNode;
155	if (currentNode) {
156
157	var startFrag = seqFrag.getStartFragment(),
158	endFrag = seqFrag.getEndFragment(),
159	updated = false,
160	wasStartSplit = seqFrag.wasStartSplit(),
161	wasEndSplit = seqFrag.wasEndSplit();
162
163
164	// Is this node the same as the start node of the fragment, and was the start node split?
165	if (currentNode == seq.startNode && wasStartSplit) {
166
167	debug.assert(startFrag.getPreSplitNode() == seq.startNode);
168	debug.assert(_nodeLength(seq.startNode) == seq.startIndex);
169
170	// Does the index need updating?
171	if (currentIndex >= _nodeLength(seq.startNode)) {
172
173	var remTextLen = _nodeLength(startFrag.node);
174
175	// Does the index fall in the removed range?
176	if (currentIndex < (_nodeLength(seq.startNode) + remTextLen))
177
178	// If adjusting left, then simply truncate the index to the end of the start node
179	currentIndex = _nodeLength(seq.startNode) - 1;
180
181	// Was both the end node AND start node split at the same node?.. and the node/index
182	// is pointing in the remaining text (right most)?
183	else if (currentNode == seq.endNode && wasEndSplit) {
184
185	// Adjust the node to become the remaining text
186	currentNode = endFrag.getPostSplitNode()
187
188	// Set the index to become relative to the split end node
189	currentIndex -= (_nodeLength(seq.startNode) + remTextLen);
190
191	} else
192	assert(false);
193	}
194
195	updated = true;
196
197	// Otherwise is this node the same as the end node of the fragment, and was the end node split?
198	} else if (currentNode == seq.endNode && wasEndSplit) {
199
200	var remTextLen = _nodeLength(endFrag.node);
201
202	// Does the index fall outside the removed range?
203	if (currentIndex >= remTextLen) {
204
205	// Adjust the node to become the remaining text
206	currentNode = endFrag.getPostSplitNode();
207
208	// Set the index to become relative to the split end node
209	currentIndex -= remTextLen;
210
211	updated = true;
212
213	// If not, then the node/index should be set to the start or end bounds node/index
214	} else currentNode = null;
215
216	}
217
218	if (!updated) {
219	// Determine if the disconnection of the fragment removed this dom node
220	var wasRemoved;
221	if (currentNode) {
222	wasRemoved = false;
223	seqFrag.visit(function(frag){
224	if (!frag.isShared && frag.node == currentNode) {
225	wasRemoved = true;
226	return false;
227	}
228	});
229	} else
230	wasRemoved = true;
231
232	if (wasRemoved) {
233
234	// If the node was removed, then set the node/index to the starting bounds
235	var frag = startFrag;
236	while (!frag.isShared) {
237	frag = frag.parent;
238	}
239
240	// Set the node to become the first shared node on the starting bound...
241	currentNode = frag.node;
242
243	// The index should be at the end of the start bound if the very-end of start bound still remains in the document,
244	// Otherwise the index should be set to the beggining of the start bound.
245	// It is possible for the very-end of the start fragment to still be included because if the
246	// sequence is a breaker, then the start index can be incremented exclude the start node.
247	currentIndex = frag == startFrag ? _nodeLength(currentNode, 1) : 0;
248
249	// If the shared node contains child nodes, then set the current node to become the child at which the startbounds
250	// proceeded from
251	if (currentNode.childNodes.length > 0 && frag.children.length > 0 && frag.children[0].pos > 0) {
252	currentNode = currentNode.childNodes[frag.children[0].pos - 1];
253	// Set index to the end of selected node
254	currentIndex = _nodeLength(currentNode, 1);
255	}
256	}
257	}
258
259	// If the current node is the same as the target node, the target node
260	// is a text node that has been split - so update this aswell
261	if (currentNode && updateTargetNode) targetNode = currentNode;
262	}
263
264	}
265
266	}
267
268	} // End loop: consolidating whitespaces in target node
269
270	};
271
272
273	/**
274	* Converts any NBSP entities within a given node (and in some cases just outside of the node)
275	* into whitespace, only if the conversion won't collapse the whitespace.
276	*
277	* This will not create any undoable operations
278	*
279	* @param {Node} targetNode The node to normalize all containing non breaking spaces
280	*/
281	_normalizeNBSP = function(targetNode) {
282
283	var currentNode = targetNode,
284	currentIndex = 0;
285
286	while (currentNode) {
287
288	// Get the next whitespace sequence.. including NBPS's
289	var seq = nextWSSequence(currentNode, currentIndex, false, targetNode, true, true, true, true);
290	currentNode = seq.resumeNode;
291	currentIndex = seq.resumeIndex;
292
293	// Is there a whitespace sequence?
294	if (seq.startNode) {
295
296	var isWSSeqBreaker = isBreaker(seq.startNode, seq.startIndex, seq.endNode, seq.endIndex),
297	seqTextNodes = [];
298
299	//debug.println("Found ws sequence - wordbreaker=" + isWSSeqBreaker + ", endIndex=" + seq.endIndex);
300
301	// Get all text nodes in the whitespace sequence into an array
302	_visitTextNodes(_getCommonAncestor(seq.startNode, seq.endNode, false), seq.startNode, true, function(textNode) {
303	seqTextNodes.push(textNode);
304	if (textNode == seq.endNode) return false;
305	});
306
307	// For each text node in the whitespace sequence....
308	for (var i = 0; i < seqTextNodes.length; i++) {
309
310	var textNode = seqTextNodes[i];
311
312	// For each charactor in the whitespace sequence
313	for (var index = (i == 0 ? seq.startIndex : 0); index < (i == (seqTextNodes.length - 1) ? seq.endIndex : _nodeLength(textNode)); index++) {
314
315	// debug.println("Checking whitespace at index " + index + "...");
316
317	if (textNode.nodeValue.charAt(index) == _NBSP) {
318
319	// debug.println("Found NBSP at index " + index);
320
321	// Keep NBSP if the NBSP is at the start or end of the sequence, and the sequence is not
322	// a word breaker
323	if (!(!isWSSeqBreaker &&
324	((i == 0 && index == seq.startIndex) \|\|
325	(i == (seqTextNodes.length - 1) && index == (seq.endIndex - 1))))) {
326
327	// Keep the NBSP if preceded by a whitespace
328	var ch;
329	if (index == 0)
330	ch = (i > 0) ? seqTextNodes[i - 1].nodeValue.charAt(_nodeLength(seqTextNodes[i - 1]) - 1) : null;
331	else
332	ch = textNode.nodeValue.charAt(index - 1);
333
334	if (!ch \|\| !_isAllWhiteSpace(ch)) {
335
336	// Keep the NBSP if proceeded by a whitespace
337	if (index == (_nodeLength(textNode) - 1))
338	ch = (i < (seqTextNodes.length - 1)) ? seqTextNodes[i + 1].nodeValue.charAt(0) : null;
339	else
340	ch = textNode.nodeValue.charAt(index + 1);
341
342	// debug.println("ch = " + (ch ? ch : "NULL"));
343
344	if (!ch \|\| !_isAllWhiteSpace(ch)) {
345
346	// Otherwise... replace the non breaking space with a whitespace
347
348	// debug.println("Replacing NBSP at index " + index + " (node length = " + _nodeLength(textNode) + ")");
349
350	textNode.nodeValue = textNode.nodeValue.substr(0, index) + " " + textNode.nodeValue.substr(index + 1);
351	}
352	}
353
354	}
355
356	}
357	} // End loop: iterating over whitespaces in ws seqence
358	} // End loop: Iterating over text nodes in ws sequence
359
360	}
361
362	} // End loop: searching for whitespace sequences in target node
363
364	};
365
366	/**
367	* Discovers the start and end points of the next whitespace seqeunce from a given point (inclusive)
368	*
369	* @param {Node} initNode The node to search from (towards the right)
370	* @param {Number} initIndex The index to search from.
371	* @param {Boolean} ignorePreceedingWS True to ignore the initial whitespaces encountered
372	* @param {Node} targetNode The node at which the search should reside within.
373	* @param {Boolean} seenTargetNode Flag as true if the target node has been visited.
374	* @param {Boolean} includeNBSP True to include non breaking spaces as whitespace, false to only count whitespace.
375	* @param {Boolean} ignoreInternalSingleWS True to ignore any single whitespace sequences that are definatly breaking two words apart
376	* @param {Boolean} extendRange True to allow the sequences to go past the target node for boundry cases.
377	*
378	* @return {Object} An object with the following members:
379	* seenTargetNode - true if the target node was encountered.
380	* resumeNode - The node to resume the search for remaining ws sequences in the target node
381	* resumeIndex - The index to resume the search for remaining ws sequences in the target node
382	* startNode - The start node of the sequence. Null if there was none.
383	* startIndex - The start index of the sequence (if there was one)
384	* endNode - The end node of the sequence, if there was one.
385	* endIndex - The end index of the sequence, if there was one.
386	*/
387	function nextWSSequence(initNode, initIndex, ignorePreceedingWS, targetNode, seenTargetNode, includeNBSP, ignoreInternalSingleWS, extendRange) {
388
389	var resumeNode = null,
390	resumeIndex = initIndex,
391	startNode, startIndex, endNode, endIndex, startWSStyle, curWSStyle;
392
393	// Locate the next whitespace sequence from the current node onwards (if any).
394	_visitAllNodes(null, initNode, true, function(domNode) {
395
396	// Has the search space exhausted? I.E: Has the traversal gone past the target node's descendants -
397	// and at this point isn't looking for any whitespace to consolidate?
398	if (seenTargetNode && domNode != targetNode && (!startNode \|\| !extendRange) && !_isAncestor(targetNode, domNode)) // Case: if target is text node, then it can split... and prematurely end search
399	return false; // Finished consolidating/removing ws
400
401	// Update flag if domnode is the target node
402	seenTargetNode \|= (domNode == targetNode);
403
404	// Set helper: the whitespace CSS style for the current visited dom node
405	curWSStyle = getWSStyle(domNode);
406
407	if (startNode) {
408
409	// If building a whitespace sequence, check to see if the ancestors of the starting node - up to
410	// the common ancestor of the start node and this current node - can be contained in a whitespace
411	// sequence.
412	var ca = _getCommonAncestor(domNode, startNode, false);
413	var ancestors = _getAncestors(startNode, ca, false, false);
414	var terminateSeq = false;
415	for (var i in ancestors) {
416	if (!(_isInlineLevel(ancestors[i]) && !nonWSInlineElements[_nodeName(ancestors[i])])) {
417	terminateSeq = true;
418	break;
419	}
420	}
421
422	// Whitespace sequences cannot contain different breaking mechanisms.
423	if (terminateSeq \|\| curWSStyle != startWSStyle) {
424	resumeNode = domNode;
425	resumeIndex = 0;
426	return false;
427	}
428	}
429
430	if (domNode.nodeType == Node.TEXT_NODE) {
431
432	if (domNode.parentNode.nodeType != Node.COMMENT_NODE) {
433
434	if (!_doesTextSupportNonWS(domNode)) {
435
436	debug.assert(!ignorePreceedingWS);
437
438	// If there is potentially something to consolidate, abort this traversal
439	if (startNode) {
440	// Record current position to resume traversal after consolidation
441	resumeNode = domNode;
442	resumeIndex = 0;
443	return false;
444	}
445
446	} else {
447
448	// Iterate over charactors in the text run
449	while (resumeIndex < _nodeLength(domNode)) {
450	var ch = domNode.nodeValue.charAt(resumeIndex);
451	if (_isAllWhiteSpace(ch) \|\| (includeNBSP && ch == _NBSP)) {
452
453	if (!ignorePreceedingWS) {
454
455	// Note start/end node/index of whitespace sequence
456	if (startNode) {
457	endNode = domNode;
458	endIndex = resumeIndex + 1;
459	} else {
460	startNode = domNode;
461	startWSStyle = curWSStyle;
462	startIndex = resumeIndex;
463	endNode = null;
464	}
465	}
466
467	} else { // Non whitespace charactor
468	ignorePreceedingWS = false;
469
470	// Is there a current sequence that has more than 1 whitespace, or one that resides at the start of
471	// this text run?
472	if (endNode \|\| (startNode && (ignoreInternalSingleWS \|\| startIndex == 0))) {
473
474	// Record current position to resume traversal after consolidation
475	resumeNode = domNode;
476	return false;
477
478	// Ignore any previous single-whitespace sequences, that do not reside at the start of the text run
479	} else
480	startNode = null;
481	}
482
483	resumeIndex++;
484
485	} // End loop: iterating over charactors in text run
486	}
487
488	} else ignorePreceedingWS = false;
489
490	} else { // Not a text node
491
492	ignorePreceedingWS = false;
493
494	if (domNode.nodeType != Node.COMMENT_NODE) {
495	// Whitespace sequences can contain a subset of inline elements.
496	if (startNode &&
497	!(_isInlineLevel(domNode) && !nonWSInlineElements[_nodeName(domNode)])) {
498	resumeNode = domNode;
499	resumeIndex = 0;
500	return false;
501	}
502	}
503
504	// The element at this point can be part of the current whitespace sequence...
505
506	}
507
508	resumeIndex = 0;
509
510	}); // End visit
511
512	// If sequence is one in length, must set end position
513	if (startNode && !endNode) {
514	endNode = startNode;
515	endIndex = startIndex + 1;
516	}
517
518	return {
519	seenTargetNode : seenTargetNode,
520	resumeNode : resumeNode,
521	resumeIndex : resumeIndex,
522	startNode : startNode,
523	startIndex : startIndex,
524	endNode : endNode,
525	endIndex : endIndex
526	};
527
528
529	}
530
531	/**
532	* @param {Node} startNode The starting text node of the whitespace sequence
533	* @param {Number} startIndex The starting index of the whitespace sequence
534	* @param {Node} endNode The ending text node of the whitespace sequence
535	* @param {Number} endIndex The ending index of the whitespace sequence
536	* @return {Boolean} True iff the given whitespace sequence breaks two words/breakable-elements apart.
537	*/
538	function isBreaker(startNode, startIndex, endNode, endIndex) {
539
540	var startWSStyle = getWSStyle(startNode);
541
542	// Look to the left
543	if (startIndex == 0) {
544	var found = false;
545	_visitAllNodes(null, startNode, false, function(domNode) {
546
547	if (domNode == startNode) return; // Skip initial text node
548
549	var res = scan(domNode, startNode);
550
551	if (!res && found) {
552	// Check that all ancestors up to and excluding the common ancestor of this dom node
553	// and the start node, are all nodes which are breaked by whitespace
554	var ca = _getCommonAncestor(domNode, startNode, false);
555	var ancestors = _getAncestors(domNode, ca, false, false);
556
557	for (var i in ancestors) {
558	// Reset found flag
559	found = false;
560
561	// Check ancestor if it does not break on whitespace...
562	if (!scan(ancestors[i], startNode) && !found)
563	return false;
564	}
565
566	// Restore flags
567	res = false;
568	found = true;
569
570
571	}
572	return res;
573	});
574
575	if (!found) return false;
576	}
577
578	// Look to the right
579	if (endIndex == _nodeLength(endNode)) {
580	var found = false;
581	_visitAllNodes(null, endNode, true, function(domNode){
582	if (domNode == endNode) return; // Skip initial text node
583
584	var res = scan(domNode, endNode);
585
586	if (!res && found) {
587	// Check that all ancestors up to and excluding the common ancestor of this dom node
588	// and the end node, are all nodes which are breakable by whitespace
589	var ca = _getCommonAncestor(domNode, endNode, false);
590	var ancestors = _getAncestors(endNode, ca, false, false);
591
592	for (var i in ancestors) {
593	// Reset found flag
594	found = false;
595
596	// Check ancestor if it does not break on whitespace...
597	if (!scan(ancestors[i], endNode) && !found)
598	return false;
599	}
600	// Restore flags
601	found = true;
602	res = false;
603
604	}
605
606	return res;
607	});
608
609	if (!found) return false;
610	}
611
612	return true;
613
614	/**
615	* Inner helper function.
616	*
617	* Sets the "found" local to true if domnode is considered breakable (in it's context)
618	*
619	* @param {Node} domNode The node to check
620	* @param {Node} initialNode The end or start node of the scan
621	* @return {Boolean} True to continue scanning, false to abort...a result was found.
622	*/
623	function scan(domNode, initialNode) {
624
625	if (domNode.nodeType == Node.TEXT_NODE) {
626	if (_nodeLength(domNode) > 0) {
627	found = _doesTextSupportNonWS(domNode); // Non-WS nodes are not breakable
628	return false;
629	}
630
631	} else if (breakableElements[_nodeName(domNode)]) {
632	found = !_isAncestor(domNode, initialNode); // WS Doesn't break from within breakable nodes to outside of them
633	return false;
634
635	// If hit a block level element or line break before a breakable node, then the sequence must be
636	// leading or trailing text.
637	} else if(_isBlockLevel(domNode) \|\| _nodeName(domNode) == "br")
638	return false;
639
640	// Keep looking...
641	return true;
642	}// End inner scan
643
644	}
645
646	/**
647	* @param {Node} node A node to get it's whitespace CSS style for.
648	* @return {String} The CSS white-space style for the given node,
649	* never null/always is a style.
650	*/
651	function getWSStyle(node) {
652
653	var style = _getComputedStyle(node, "white-space");
654
655	if (!style) {
656
657	// Check if descends from PRE
658	do {
659	if (_nodeName(node) == "pre") {
660	style = "pre";
661	break;
662	}
663	node = node.parentNode;
664	} while (node && node.nodeType == Node.ELEMENT_NODE);
665
666	// Set as normal
667	if (!style) style = "normal";
668	}
669
670	return style;
671
672	}
673
674	})();
675

Note: See TracBrowser for help on using the repository browser.

Download in other formats: