Ticket #287 (closed enhancement: worksforme)
Making Xinha leave HTML for Flash and scripting intact (object, script, and noscript)
| Reported by: | mharrisonline | Owned by: | gogo |
|---|---|---|---|
| Priority: | normal | Milestone: | |
| Component: | Xinha Core | Version: | |
| Severity: | normal | Keywords: | |
| Cc: |
Description
When HTMLArea 3 was adapted by the Jones Standard user group to be a WYSIWYG editor for content in that course management system, it had to be changed to prevent IE from corrupting existing HTML that contained Flash movies, and also to (as much as possible) not remove scripting. This modification has been applied to Xinha, however Xinha's code changes rapidly, and this modification is adapted to the nightly build from perhaps a month ago. It stops Xinha from removing the nested <embed> code for the Flash movie from the <object>.
Although it is possible to display Flash with just <object>, it prevents the movie from streaming, doesn't alert the user if they need a newer plugin, and doesn't work for everybody. Placeholders would not work, since they would not already be in legacy content. This simply makes regular html for Flash work without modification.
Also, in IE Xinha will remove the contents of a <script>, and corrupt <noscript> as well. This allows it to leave <script> and <noscript> alone. Probably not everyone will want JavaScript? to survive being opened in Xinha, but here is how to do it. This doesn't prevent onLoad events from being stripped out of the body tag, however.
I hope this will be useful to other members of the Xinha community, or at least save someone the pain of trying to figure out how to make Flash work in Xinha.
Mike Harris Jones International University
Within the function HTMLArea.Config,
this.fullPage = false;
was changed to
this.fullPage = true;
This might work with simply the full page plugin instead, and perhaps this modification could be adapted to work with the default setting. Content in Jones Standard is actual HTML files, but this obviously won't work for everyone. Xinha in IE and fullpage already preserves JavaScript? in the head of the document, this modification allows it to preserve JavaScript? in the body as well.
Within the function HTMLArea.prototype.forceRedraw,
this._doc.body.innerHTML = this.getInnerHTML();
was uncommented, otherwise for some reason the line above it
this._doc.body.style.visibility = "visible";
would sometimes appear in the content.
HTMLArea._blockTags had additional tags added:
HTMLArea._blockTags = " body form textarea fieldset ul ol dl li div embed" + "p h1 h2 h3 h4 h5 h6 quote pre button table thead object script" + "tbody tfoot tr td th iframe address noscript blockquote";
_
HTMLArea._closingTags had more tags added:
HTMLArea._closingTags = " head body script form style div span tr td th tbody table em strong button object b i code cite dfn abbr acronym font a title ";
_
There were many changes in the function HTMLArea.getHTMLWrapper, but this function seems to have been greatly modified recently. So, here is the entire now one month(!) obsolete HTMLArea.getHTMLWrapper function with the new code commented as //!!. It's a pretty bloated and redundant hack, but it worked great for our purposes:
HTMLArea.getHTMLWrapper = function(root, outputRoot, editor) {
var html = "";
switch (root.nodeType) {
case 10:// Node.DOCUMENT_TYPE_NODE
case 6: // Node.ENTITY_NODE
case 12:// Node.NOTATION_NODE
// this all are for the document type, probably not necessary
break;
case 2: // Node.ATTRIBUTE_NODE
break;
case 4: // Node.CDATA_SECTION_NODE
// Mozilla seems to convert CDATA into a comment when going into wysiwyg mode,
// don't know about IE
html += '<![CDATA[' + root.data + ']]>';
break;
case 5: // Node.ENTITY_REFERENCE_NODE
html += '&' + root.nodeValue + ';';
break;
case 7: // Node.PROCESSING_INSTRUCTION_NODE
// PI's don't seem to survive going into the wysiwyg mode, (at least in moz)
// so this is purely academic
html += '<?' + root.target + ' ' + root.data + ' ?>';
break;
case 1: // Node.ELEMENT_NODE
case 11: // Node.DOCUMENT_FRAGMENT_NODE
case 9: // Node.DOCUMENT_NODE
{
var closed;
var i;
var root_tag = (root.nodeType == 1) ? root.tagName.toLowerCase() : '';
if (root_tag == 'br' && !root.nextSibling)
break;
if (outputRoot)
outputRoot = !(editor.config.htmlRemoveTags && editor.config.htmlRemoveTags.test(root_tag));
if (HTMLArea.is_ie && root_tag == "head") {
if (outputRoot)
html += "<head>";
// lowercasize
var save_multiline = RegExp.multiline;
RegExp.multiline = true;
var txt = root.innerHTML.replace(HTMLArea.RE_tagName, function(str, p1, p2) {
return p1 + p2.toLowerCase();
});
RegExp.multiline = save_multiline;
html += txt;
if (outputRoot)
html += "</head>";
break;
// !!Without this code the beginning of script tags are sometimes corrupted:
} else if (HTMLArea.is_ie && root_tag == "body") {
if (outputRoot)
html += "<body";
closed = (!(root.hasChildNodes() || HTMLArea.needsClosingTag(root)));
html = "<" + root.tagName.toLowerCase();
var attrs = root.attributes;
for (i = 0; i < attrs.length; ++i) {
var a = attrs.item(i);
if (!a.specified) {
continue;
}
var name = a.nodeName.toLowerCase();
if (/_moz|contenteditable|_msh/.test(name)) {
// avoid certain attributes
continue;
}
var value;
if (name != "style") {
// IE5.5 reports 25 when cellSpacing is
// 1; other values might be doomed too.
// For this reason we extract the
// values directly from the root node.
//
// Using Gecko the values of href and src are converted to absolute links
// unless we get them using nodeValue()
if (typeof root[a.nodeName] != "undefined" && name != "href" && name != "src" && name !="onclick" && name !="onmouseover" && name !="onmouseout" && name !="onmousedown") {
value = root[a.nodeName];
} else {
value = a.nodeValue;
// IE seems not willing to return the original values - it converts to absolute
// links using a.nodeValue, a.value, a.stringValue, root.getAttribute("href")
// So we have to strip the baseurl manually -/
if (HTMLArea.is_ie && (name == "href" || name == "src")) {
value = editor.stripBaseURL(value);
}
}
} else { // IE fails to put style in attributes list
// FIXME: cssText reported by IE is UPPERCASE
value = root.style.cssText;
}
if (/(_moz|^$)/.test(value)) {
// Mozilla reports some special tags
// here; we don't need them.
continue;
}
html += " " + name + '="' + HTMLArea.htmlEncode(value) + '"';
}
if (html != "") {
html += closed ? " />" : ">";}
// !! htmlarea formats HTML in IE, formatting gets lost otherwise, code doesn't work in Mozilla though
if (!HTMLArea.is_ie) { html += "";} else {html += "\r";}
// end of new code
// lowercasize
var save_multiline = RegExp.multiline;
RegExp.multiline = true;
var txt = root.innerHTML.replace(HTMLArea.RE_tagName, function(str, p1, p2) {
return p1 + p2.toLowerCase();
});
RegExp.multiline = save_multiline;
html += txt;
if (outputRoot)
html += "</body>";
break;
// !! Teach HTMLArea to not empty code between script tags and remove script attributes
} else if (HTMLArea.is_ie && root_tag == "script") {
if (outputRoot)
html += "<script";
closed = (!(root.hasChildNodes() || HTMLArea.needsClosingTag(root)));
html = "<" + root.tagName.toLowerCase();
var attrs = root.attributes;
for (i = 0; i < attrs.length; ++i) {
var a = attrs.item(i);
if (!a.specified) {
continue;
}
var name = a.nodeName.toLowerCase();
if (/_moz|contenteditable|_msh/.test(name)) {
// avoid certain attributes
continue;
}
var value;
if (name != "style") {
// IE5.5 reports 25 when cellSpacing is
// 1; other values might be doomed too.
// For this reason we extract the
// values directly from the root node.
//
// Using Gecko the values of href and src are converted to absolute links
// unless we get them using nodeValue()
if (typeof root[a.nodeName] != "undefined" && name != "href" && name != "src" && name !="onclick" && name !="onmouseover" && name !="onmouseout" && name !="onmousedown") {
value = root[a.nodeName];
} else {
value = a.nodeValue;
// IE seems not willing to return the original values - it converts to absolute
// links using a.nodeValue, a.value, a.stringValue, root.getAttribute("href")
// So we have to strip the baseurl manually -/
if (HTMLArea.is_ie && (name == "href" || name == "src")) {
value = editor.stripBaseURL(value);
}
}
} else { // IE fails to put style in attributes list
// FIXME: cssText reported by IE is UPPERCASE
value = root.style.cssText;
}
if (/(_moz|^$)/.test(value)) {
// Mozilla reports some special tags
// here; we don't need them.
continue;
}
html = HTMLArea.htmlEncode(html);
html += " " + name + '="' + value + '"';
}
// close standalone tags like <br> (<br />)
html += closed ? " />" : ">";
// lowercasize
var save_multiline = RegExp.multiline;
RegExp.multiline = true;
var txt = root.innerHTML.replace(HTMLArea.RE_tagName, function(str, p1, p2) {
return p1 + p2.toLowerCase();
});
RegExp.multiline = save_multiline;
html += txt;
if (outputRoot)
html += "</script>";
break;
// !! Teach HTMLArea to not remove noscript nodes
} else if (HTMLArea.is_ie && root_tag == "noscript") {
if (outputRoot)
html += "<noscript>";
// lowercasize
var save_multiline = RegExp.multiline;
RegExp.multiline = true;
var txt = root.innerHTML.replace(HTMLArea.RE_tagName, function(str, p1, p2) {
return p1 + p2.toLowerCase();
});
RegExp.multiline = save_multiline;
html += txt;
if (outputRoot)
html += "</noscript>";
break;
// !! Teach HTMLArea to not corrupt object
// parameters and expel nested embed nodes, thus preventing Flash code
// from being destroyed
} else if (HTMLArea.is_ie && root_tag == "object") {
if (outputRoot)
html += "<object";
closed = (!(root.hasChildNodes() || HTMLArea.needsClosingTag(root)));
html = "<" + root.tagName.toLowerCase();
var attrs = root.attributes;
for (i = 0; i < attrs.length; ++i) {
var a = attrs.item(i);
if (!a.specified) {
continue;
}
var name = a.nodeName.toLowerCase();
if (/_moz|contenteditable|_msh/.test(name)) {
// avoid certain attributes
continue;
}
var value;
if (name != "style") {
// IE5.5 reports 25 when cellSpacing is
// 1; other values might be doomed too.
// For this reason we extract the
// values directly from the root node.
//
// Using Gecko the values of href and src are converted to absolute links
// unless we get them using nodeValue()
if (typeof root[a.nodeName] != "undefined" && name != "href" && name != "src" && name !="onclick" && name !="onmouseover" && name !="onmouseout" && name !="onmousedown") {
value = root[a.nodeName];
} else {
value = a.nodeValue;
// IE seems not willing to return the original values - it converts to absolute
// links using a.nodeValue, a.value, a.stringValue, root.getAttribute("href")
// So we have to strip the baseurl manually -/
if (HTMLArea.is_ie && (name == "href" || name == "src")) {
value = editor.stripBaseURL(value);
}
}
} else { // IE fails to put style in attributes list
// FIXME: cssText reported by IE is UPPERCASE
value = root.style.cssText;
}
if (/(_moz|^$)/.test(value)) {
// Mozilla reports some special tags
// here; we don't need them.
continue;
}
html += " " + name + '="' + value + '"';
}
// close standalone tags like <br> (<br />)
html += closed ? " />" : ">";
// lowercasize
var save_multiline = RegExp.multiline;
RegExp.multiline = true;
var txt = root.innerHTML.replace(HTMLArea.RE_tagName, function(str, p1, p2) {
return p1 + p2.toLowerCase();
});
RegExp.multiline = save_multiline;
html += txt;
if (outputRoot)
html += "</object>";
break;
//!! end of Jones Standard modifications for script and multimedia support
} else if (outputRoot) {
closed = (!(root.hasChildNodes() || HTMLArea.needsClosingTag(root)));
html = "<" + root.tagName.toLowerCase();
var attrs = root.attributes;
for (i = 0; i < attrs.length; ++i) {
var a = attrs.item(i);
if (!a.specified) {
continue;
}
var name = a.nodeName.toLowerCase();
if (/_moz_editor_bogus_node/.test(name)) {
html = "";
break;
}
if (/(_moz)|(contenteditable)|(_msh)/.test(name)) {
// avoid certain attributes
continue;
}
var value;
if (name != "style") {
// IE5.5 reports 25 when cellSpacing is
// 1; other values might be doomed too.
// For this reason we extract the
// values directly from the root node.
// I'm starting to HATE JavaScript
// development. Browser differences
// suck.
//
// Using Gecko the values of href and src are converted to absolute links
// unless we get them using nodeValue()
if (typeof root[a.nodeName] != "undefined" && name != "href" && name != "src" && !/^on/.test(name)) {
value = root[a.nodeName];
} else {
value = a.nodeValue;
// IE seems not willing to return the original values - it converts to absolute
// links using a.nodeValue, a.value, a.stringValue, root.getAttribute("href")
// So we have to strip the baseurl manually :-/
if (HTMLArea.is_ie && (name == "href" || name == "src")) {
value = editor.stripBaseURL(value);
}
}
} else { // IE fails to put style in attributes list
// FIXME: cssText reported by IE is UPPERCASE
value = root.style.cssText;
}
if (/^(_moz)?$/.test(value)) {
// Mozilla reports some special tags
// here; we don't need them.
continue;
}
html += " " + name + '="' + HTMLArea.htmlEncode(value) + '"';
}
if (html != "") {
html += closed ? " />" : ">";
}
}
for (i = root.firstChild; i; i = i.nextSibling) {
html += HTMLArea.getHTMLWrapper(i, true, editor);
}
if (outputRoot && !closed) {
html += "</" + root.tagName.toLowerCase() + ">";
}
break;
}
case 3: // Node.TEXT_NODE
// If a text node is alone in an element and all spaces, replace it with an non breaking one
// This partially undoes the damage done by moz, which translates ' 's into spaces in the data element
html = /^script|style$/i.test(root.parentNode.tagName) ? root.data : HTMLArea.htmlEncode(root.data);
break;
case 8: // Node.COMMENT_NODE
html = "<!--" + root.data + "-->";
break; // skip comments, for now.
}
return html;
};
