source: trunk/modules/GetHtml/DOMwalk.js

Last change on this file was 1356, checked in by gogo, 14 months ago

#1245 - Fix percent width and height when using DomWalk?

  • Property svn:keywords set to LastChangedDate LastChangedRevision LastChangedBy HeadURL Id
File size: 12.8 KB
Line 
1
2  /*--------------------------------------:noTabs=true:tabSize=2:indentSize=2:--
3    --  Xinha (is not htmlArea) - http://xinha.gogo.co.nz/
4    --
5    --  Use of Xinha is granted by the terms of the htmlArea License (based on
6    --  BSD license)  please read license.txt in this package for details.
7    --
8    --  Xinha was originally based on work by Mihai Bazon which is:
9    --      Copyright (c) 2003-2004 dynarch.com.
10    --      Copyright (c) 2002-2003 interactivetools.com, inc.
11    --      This copyright notice MUST stay intact for use.
12    --
13    --  This is the standard implementation of the method for rendering HTML code from the DOM
14    --
15    --  The file is loaded by the Xinha Core when no alternative method (plugin) is loaded.
16    --
17    --
18    --  $HeadURL$
19    --  $LastChangedDate$
20    --  $LastChangedRevision$
21    --  $LastChangedBy$
22    --------------------------------------------------------------------------*/
23function GetHtmlImplementation(editor) {
24    this.editor = editor;
25}
26
27GetHtmlImplementation._pluginInfo = {
28  name          : "GetHtmlImplementation DOMwalk",
29  origin        : "Xinha Core",
30  version       : "$LastChangedRevision$".replace(/^[^:]*:\s*(.*)\s*\$$/, '$1'),
31  developer     : "The Xinha Core Developer Team",
32  developer_url : "$HeadURL$".replace(/^[^:]*:\s*(.*)\s*\$$/, '$1'),
33  sponsor       : "",
34  sponsor_url   : "",
35  license       : "htmlArea"
36};
37
38// Retrieves the HTML code from the given node.  This is a replacement for
39// getting innerHTML, using standard DOM calls.
40// Wrapper legacy see #442
41Xinha.getHTML = function(root, outputRoot, editor)
42{
43  return Xinha.getHTMLWrapper(root,outputRoot,editor);
44};
45
46Xinha.emptyAttributes = " checked disabled ismap readonly nowrap compact declare selected defer multiple noresize noshade "
47
48Xinha.getHTMLWrapper = function(root, outputRoot, editor, indent)
49{
50  var html = "";
51  if ( !indent )
52  {
53    indent = '';
54  }
55
56  switch ( root.nodeType )
57  {
58    case 10:// Node.DOCUMENT_TYPE_NODE
59    case 6: // Node.ENTITY_NODE
60    case 12:// Node.NOTATION_NODE
61      // this all are for the document type, probably not necessary
62    break;
63
64    case 2: // Node.ATTRIBUTE_NODE
65      // Never get here, this has to be handled in the ELEMENT case because
66      // of IE crapness requring that some attributes are grabbed directly from
67      // the attribute (nodeValue doesn't return correct values), see
68      //http://groups.google.com/groups?hl=en&lr=&ie=UTF-8&oe=UTF-8&safe=off&selm=3porgu4mc4ofcoa1uqkf7u8kvv064kjjb4%404ax.com
69      // for information
70    break;
71
72    case 4: // Node.CDATA_SECTION_NODE
73      // Mozilla seems to convert CDATA into a comment when going into wysiwyg mode,
74      //  don't know about IE
75      html += (Xinha.is_ie ? ('\n' + indent) : '') + '<![CDATA[' + root.data + ']]>' ;
76    break;
77
78    case 5: // Node.ENTITY_REFERENCE_NODE
79      html += '&' + root.nodeValue + ';';
80    break;
81
82    case 7: // Node.PROCESSING_INSTRUCTION_NODE
83      // PI's don't seem to survive going into the wysiwyg mode, (at least in moz)
84      // so this is purely academic
85      html += (Xinha.is_ie ? ('\n' + indent) : '') + '<'+'?' + root.target + ' ' + root.data + ' ?>';
86    break;
87
88    case 1: // Node.ELEMENT_NODE
89    case 11: // Node.DOCUMENT_FRAGMENT_NODE
90    case 9: // Node.DOCUMENT_NODE
91      var closed;
92      var i;
93      var root_tag = (root.nodeType == 1) ? root.tagName.toLowerCase() : '';
94      if ( ( root_tag == "script" || root_tag == "noscript" ) && editor.config.stripScripts )
95      {
96        break;
97      }
98      if ( outputRoot )
99      {
100        outputRoot = !(editor.config.htmlRemoveTags && editor.config.htmlRemoveTags.test(root_tag));
101      }
102      if ( Xinha.is_ie && root_tag == "head" )
103      {
104        if ( outputRoot )
105        {
106          html += (Xinha.is_ie ? ('\n' + indent) : '') + "<head>";
107        }
108       
109        var save_multiline = RegExp.multiline;
110        RegExp.multiline = true;
111        var txt =
112        root.innerHTML
113        .replace(Xinha.RE_tagName, function(str, p1, p2) { return p1 + p2.toLowerCase(); }) // lowercasize
114        .replace(/\s*=\s*(([^'"][^>\s]*)([>\s])|"([^"]+)"|'([^']+)')/g, '="$2$4$5"$3') //add attribute quotes
115        .replace(/<(link|meta)((\s*\S*="[^"]*")*)>([\n\r]*)/g, '<$1$2 />\n'); //terminate singlet tags
116        RegExp.multiline = save_multiline;
117        html += txt + '\n';
118        if ( outputRoot )
119        {
120          html += (Xinha.is_ie ? ('\n' + indent) : '') + "</head>";
121        }
122        break;
123      }
124      else if ( outputRoot )
125      {
126        closed = (!(root.hasChildNodes() || Xinha.needsClosingTag(root)));
127        html += ((Xinha.isBlockElement(root)) ? ('\n' + indent) : '') + "<" + root.tagName.toLowerCase();
128        var attrs = root.attributes;
129       
130        for ( i = attrs.length-1; i >= 0; --i )
131        {
132          var a = attrs.item(i);
133          // In certain browsers (*cough* firefox) the dom node loses
134          // information if the image is currently broken.  In order to prevent
135          // corrupting the height and width of image tags, we strip height and
136          // width from the image rather than reporting bad information.
137          if (Xinha.is_real_gecko && (root.tagName.toLowerCase() == 'img') &&
138              ((a.nodeName.toLowerCase() == 'height') || (a.nodeName.toLowerCase() == 'width')))
139          {
140            if (!root.complete || root.naturalWidth === 0)
141            {
142              // This means that firefox has been unable to read the dimensions from the actual image
143              continue;
144            }
145          }
146
147          if(root.tagName.toLowerCase() == 'img' && a.nodeName.toLowerCase() == 'complete') continue;
148
149          if (typeof a.nodeValue == 'object' ) continue; // see #684
150          if (root.tagName.toLowerCase() == "input"
151              && root.type.toLowerCase() == "checkbox"
152              && a.nodeName.toLowerCase() == "value"
153              && a.nodeValue.toLowerCase() == "on")
154          {
155            continue;
156          }
157          if ( !a.specified
158            // IE claims these are !a.specified even though they are.  Perhaps others too?
159            && !(root.tagName.toLowerCase().match(/input|option/) && a.nodeName == 'value')
160            && !(root.tagName.toLowerCase().match(/area/) && a.nodeName.match(/shape|coords/i))
161          )
162          {
163            continue;
164          }
165          var name = a.nodeName.toLowerCase();
166          if ( /_moz_editor_bogus_node/.test(name) || ( name == 'class' && a.nodeValue == 'webkit-block-placeholder') )
167          {
168            html = "";
169            break;
170          }
171          if ( /(_moz)|(contenteditable)|(_msh)/.test(name) )
172          {
173            // avoid certain attributes
174            continue;
175          }
176          var value;
177          if ( Xinha.emptyAttributes.indexOf(" "+name+" ") != -1)
178          {
179            value = name;
180          }
181          else if ( name != "style" )
182          {
183            // IE5.5 reports 25 when cellSpacing is
184            // 1; other values might be doomed too.
185            // For this reason we extract the
186            // values directly from the root node.
187            // I'm starting to HATE JavaScript
188            // development.  Browser differences
189            // suck.
190            //
191            // If you have a percent width in width/height then we may need to use nodeValue
192            //  it may well be OK to use nodeValue all the time these days, but it's easier
193            //  to just do it for these and not fix what's not broke
194            if(name == "width" || name == "height")
195            {
196              if(a.nodeValue.match(/%$/))
197              {
198                value = a.nodeValue;
199              }
200              else
201              {
202                value = root[a.nodeName];
203              }
204            }
205            // Using Gecko the values of href and src are converted to absolute links
206            // unless we get them using nodeValue()           
207            else if ( typeof root[a.nodeName] != "undefined" && name != "href" && name != "src" && !(/^on/.test(name)) )
208            {
209              value = root[a.nodeName];
210            }
211            else
212            {
213              value = a.nodeValue;
214                          if (name == 'class')
215                          {
216                                value = value.replace(/Apple-style-span/,'');
217                                if (!value) continue;
218                          }
219              // IE seems not willing to return the original values - it converts to absolute
220              // links using a.nodeValue, a.value, a.stringValue, root.getAttribute("href")
221              // So we have to strip the baseurl manually :-/
222              if ( Xinha.is_ie && (name == "href" || name == "src") )
223              {
224                value = editor.stripBaseURL(value);
225              }
226
227              // High-ascii (8bit) characters in links seem to cause problems for some sites,
228              // while this seems to be consistent with RFC 3986 Section 2.4
229              // because these are not "reserved" characters, it does seem to
230              // cause links to international resources not to work.  See ticket:167
231
232              // IE always returns high-ascii characters un-encoded in links even if they
233              // were supplied as % codes (it unescapes them when we pul the value from the link).
234
235              // Hmmm, very strange if we use encodeURI here, or encodeURIComponent in place
236              // of escape below, then the encoding is wrong.  I mean, completely.
237              // Nothing like it should be at all.  Using escape seems to work though.
238              // It's in both browsers too, so either I'm doing something wrong, or
239              // something else is going on?
240
241              if ( editor.config.only7BitPrintablesInURLs && ( name == "href" || name == "src" ) )
242              {
243                value = value.replace(/([^!-~]+)/g, function(match) { return escape(match); });
244              }
245            }
246          }
247          else if ( !Xinha.is_ie )
248          {
249            value = root.style.cssText.replace(/rgb\(.*?\)/ig,function(rgb){ return Xinha._colorToRgb(rgb) });
250          }
251          else if (!value) // IE8 has style in attributes (see below), but it's empty!
252          {
253            continue;
254          }
255
256/* This looks wrong, http://trac.xinha.org/ticket/1391#comment:7
257          if ( /^(_moz)?$/.test(value) )
258          {
259            // Mozilla reports some special tags
260            // here; we don't need them.
261            continue;
262          }
263*/
264
265          html += " " + name + '="' + Xinha.htmlEncode(value) + '"';
266        }
267        //IE fails to put style in attributes list & cssText is UPPERCASE
268        if ( Xinha.is_ie && root.style.cssText )
269        {
270          html += ' style="' + root.style.cssText.replace(/(^)?([^:]*):(.*?)(;|$)/g, function(m0, m1,m2,m3, m4){return m2.toLowerCase() + ':' + m3 + m4;}) + '"';
271        }
272        if ( Xinha.is_ie && root.tagName.toLowerCase() == "option" && root.selected )
273        {
274          html += ' selected="selected"';
275        }
276        if ( html !== "" )
277        {
278          if ( closed && root_tag=="p" )
279          {
280            //never use <p /> as empty paragraphs won't be visible
281            html += ">&nbsp;</p>";
282          }
283          else if ( closed )
284          {
285            html += " />";
286          }
287          else
288          {
289            html += ">";
290          }
291        }
292      }
293      var containsBlock = false;
294      if ( root_tag == "script" || root_tag == "noscript" )
295      {
296        if ( !editor.config.stripScripts )
297        {
298          if (Xinha.is_ie)
299          {
300            var innerText = "\n" + root.innerHTML.replace(/^[\n\r]*/,'').replace(/\s+$/,'') + '\n' + indent;
301          }
302          else
303          {
304            var innerText = (root.hasChildNodes()) ? root.firstChild.nodeValue : '';
305          }
306          html += innerText + '</'+root_tag+'>' + ((Xinha.is_ie) ? '\n' : '');
307        }
308      }
309      else if (root_tag == "pre")
310      {
311        html += ((Xinha.is_ie) ? '\n' : '') + root.innerHTML.replace(/<br>/g,'\n') + '</'+root_tag+'>';
312      }
313      else
314      {
315        for ( i = root.firstChild; i; i = i.nextSibling )
316        {
317          if ( !containsBlock && i.nodeType == 1 && Xinha.isBlockElement(i) )
318          {
319            containsBlock = true;
320          }
321          html += Xinha.getHTMLWrapper(i, true, editor, indent + '  ');
322        }
323        if ( outputRoot && !closed )
324        {
325          html += (((Xinha.isBlockElement(root) && containsBlock) || root_tag == 'head' || root_tag == 'html') ? ('\n' + indent) : '') + "</" + root.tagName.toLowerCase() + ">";
326        }
327      }
328    break;
329
330    case 3: // Node.TEXT_NODE
331      if ( /^script|noscript|style$/i.test(root.parentNode.tagName) )
332      {
333        html = root.data;
334      }
335      else if(root.data.trim() == '')
336      {
337        if(root.data)
338        {
339          html = ' ';
340        }
341        else
342        {
343          html = '';
344        }
345      }
346      else
347      {
348        html = Xinha.htmlEncode(root.data);
349      }
350    break;
351
352    case 8: // Node.COMMENT_NODE
353      html = "<!--" + root.data + "-->";
354    break;
355  }
356  return html;
357};
358
359
360
Note: See TracBrowser for help on using the repository browser.