HTML API: Add support for BR, EMBED, & other tags.

Adds support for the following HTML elements to the HTML Processor:

 - AREA, BR, EMBED, KEYGEN, WBR
 - Only the opening BR tag is supported, as the invalid closer `</br>`
   involves more complicated rules, to be implemented later.

Previously, these elements were not supported and the HTML Processor
would bail when encountering them. With this patch it will proceed to
parse an HTML document when encountering those tags as long as other
normal conditions don't cause it to bail (such as complicated format
reconstruction rules).

Props jonsurrell, dmsnell
Fixes #60283



git-svn-id: https://develop.svn.wordpress.org/trunk@57316 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Dennis Snell
2024-01-19 21:40:01 +00:00
parent 5815624ead
commit 91e51f92a8
4 changed files with 127 additions and 19 deletions

View File

@@ -102,17 +102,17 @@
* - Containers: ADDRESS, BLOCKQUOTE, DETAILS, DIALOG, DIV, FOOTER, HEADER, MAIN, MENU, SPAN, SUMMARY.
* - Custom elements: All custom elements are supported. :)
* - Form elements: BUTTON, DATALIST, FIELDSET, LABEL, LEGEND, METER, PROGRESS, SEARCH.
* - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U.
* - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U, WBR.
* - Heading elements: H1, H2, H3, H4, H5, H6, HGROUP.
* - Links: A.
* - Lists: DD, DL, DT, LI, OL, LI.
* - Media elements: AUDIO, CANVAS, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, VIDEO.
* - Paragraph: P.
* - Phrasing elements: ABBR, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR.
* - Media elements: AUDIO, CANVAS, EMBED, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, VIDEO.
* - Paragraph: BR, P.
* - Phrasing elements: AREA, ABBR, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR.
* - Sectioning elements: ARTICLE, ASIDE, HR, NAV, SECTION.
* - Templating elements: SLOT.
* - Text decoration: RUBY.
* - Deprecated elements: ACRONYM, BLINK, CENTER, DIR, ISINDEX, MULTICOL, NEXTID, SPACER.
* - Deprecated elements: ACRONYM, BLINK, CENTER, DIR, ISINDEX, KEYGEN, MULTICOL, NEXTID, SPACER.
*
* ### Supported markup
*
@@ -934,12 +934,28 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
$this->run_adoption_agency_algorithm();
return true;
/*
* > An end tag whose tag name is "br"
* > Parse error. Drop the attributes from the token, and act as described in the next
* > entry; i.e. act as if this was a "br" start tag token with no attributes, rather
* > than the end tag token that it actually is.
*/
case '-BR':
$this->last_error = self::ERROR_UNSUPPORTED;
throw new WP_HTML_Unsupported_Exception( 'Closing BR tags require unimplemented special handling.' );
/*
* > A start tag whose tag name is one of: "area", "br", "embed", "img", "keygen", "wbr"
*/
case '+AREA':
case '+BR':
case '+EMBED':
case '+IMG':
case '+KEYGEN':
case '+WBR':
$this->reconstruct_active_formatting_elements();
$this->insert_html_element( $this->state->current_token );
$this->state->frameset_ok = false;
return true;
/*
@@ -977,13 +993,11 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
case 'BASEFONT':
case 'BGSOUND':
case 'BODY':
case 'BR':
case 'CAPTION':
case 'COL':
case 'COLGROUP':
case 'DD':
case 'DT':
case 'EMBED':
case 'FORM':
case 'FRAME':
case 'FRAMESET':
@@ -991,7 +1005,6 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
case 'HTML':
case 'IFRAME':
case 'INPUT':
case 'KEYGEN':
case 'LI':
case 'LINK':
case 'LISTING':
@@ -1031,7 +1044,6 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
case 'TR':
case 'TRACK':
case 'UL':
case 'WBR':
case 'XMP':
$this->last_error = self::ERROR_UNSUPPORTED;
throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." );
@@ -1692,6 +1704,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
'IMG' === $tag_name ||
'INPUT' === $tag_name ||
'LINK' === $tag_name ||
'KEYGEN' === $tag_name || // Obsolete but still treated as void.
'META' === $tag_name ||
'SOURCE' === $tag_name ||
'TRACK' === $tag_name ||