@@ -1,5 +1,7 @@
< ? php
/**
* HTML API: WP_HTML_Tag_Processor class
*
* Scans through an HTML document to find specific tags, then
* transforms those tags by adding, removing, or updating the
* values of the HTML attributes within that tag (opener).
@@ -27,7 +29,7 @@
*/
/**
* M odifies attributes in an HTML document for tags matching a query.
* Core class used to m odify attributes in an HTML document for tags matching a query.
*
* ## Usage
*
@@ -222,11 +224,11 @@
* The Tag Processor's design incorporates a "garbage-in-garbage-out" philosophy.
* HTML5 specifies that certain invalid content be transformed into different forms
* for display, such as removing null bytes from an input document and replacing
* invalid characters with the Unicode replacement character U+FFFD <20> . Where errors
* or transformations exist within the HTML5 specification, the Tag Processor leaves
* those invalid inputs untouched, passing them through to the final browser to handle.
* While this implies that certain operations will be non-spec-compliant, such as
* reading the value of an attribute with invalid content, it also preserves a
* invalid characters with the Unicode replacement character ` U+FFFD` (visually "<22> ").
* Where errors or transformations exist within the HTML5 specification, the Tag Processor
* leaves those invalid inputs untouched, passing them through to the final browser
* to handle. While this implies that certain operations will be non-spec-compliant,
* such as reading the value of an attribute with invalid content, it also preserves a
* simplicity and efficiency for handling those error cases.
*
* Most operations within the Tag Processor are designed to minimize the difference
@@ -544,6 +546,10 @@ class WP_HTML_Tag_Processor {
}
// Ensure that the tag closes before the end of the document.
if ( $this -> bytes_already_parsed >= strlen ( $this -> html ) ) {
return false ;
}
$tag_ends_at = strpos ( $this -> html , '>' , $this -> bytes_already_parsed );
if ( false === $tag_ends_at ) {
return false ;
@@ -1243,8 +1249,6 @@ class WP_HTML_Tag_Processor {
* Move the internal cursor past any immediate successive whitespace.
*
* @since 6.2.0
*
* @return void
*/
private function skip_whitespace () {
$this -> bytes_already_parsed += strspn ( $this -> html , " \t \ f \r \n " , $this -> bytes_already_parsed );
@@ -1254,8 +1258,6 @@ class WP_HTML_Tag_Processor {
* Applies attribute updates and cleans up once a tag is fully parsed.
*
* @since 6.2.0
*
* @return void
*/
private function after_tag () {
$this -> get_updated_html ();
@@ -1274,8 +1276,6 @@ class WP_HTML_Tag_Processor {
*
* @see WP_HTML_Tag_Processor::$lexical_updates
* @see WP_HTML_Tag_Processor::$classname_updates
*
* @return void
*/
private function class_name_updates_to_attributes_updates () {
if ( count ( $this -> classname_updates ) === 0 ) {
@@ -1415,6 +1415,7 @@ class WP_HTML_Tag_Processor {
*
* @since 6.2.0
* @since 6.2.1 Accumulates shift for internal cursor and passed pointer.
* @since 6.3.0 Invalidate any bookmarks whose targets are overwritten.
*
* @param int $shift_this_point Accumulate and return shift for this position.
* @return int How many bytes the given pointer moved in response to the updates.
@@ -1464,7 +1465,7 @@ class WP_HTML_Tag_Processor {
* Adjust bookmark locations to account for how the text
* replacements adjust offsets in the input document.
*/
foreach ( $this -> bookmarks as $bookmark ) {
foreach ( $this -> bookmarks as $bookmark_name => $bookmark ) {
/*
* Each lexical update which appears before the bookmark's endpoints
* might shift the offsets for those endpoints. Loop through each change
@@ -1475,20 +1476,22 @@ class WP_HTML_Tag_Processor {
$tail_delta = 0 ;
foreach ( $this -> lexical_updates as $diff ) {
$update_head = $bookmark -> start >= $diff -> start ;
$update_tail = $bookmark -> end >= $diff -> start ;
if ( ! $update_head && ! $update_tail ) {
if ( $bookmark -> start < $diff -> start && $bookmark -> end < $diff -> start ) {
break ;
}
if ( $bookmark -> start >= $diff -> start && $bookmark -> end < $diff -> end ) {
$this -> release_bookmark ( $bookmark_name );
continue 2 ;
}
$delta = strlen ( $diff -> text ) - ( $diff -> end - $diff -> start );
if ( $update_head ) {
if ( $bookmark -> start >= $diff -> start ) {
$head_delta += $delta ;
}
if ( $update_tail ) {
if ( $bookmark -> end >= $diff -> end ) {
$tail_delta += $delta ;
}
}
@@ -1502,6 +1505,18 @@ class WP_HTML_Tag_Processor {
return $accumulated_shift_for_given_point ;
}
/**
* Checks whether a bookmark with the given name exists.
*
* @since 6.3.0
*
* @param string $bookmark_name Name to identify a bookmark that potentially exists.
* @return bool Whether that bookmark exists.
*/
public function has_bookmark ( $bookmark_name ) {
return array_key_exists ( $bookmark_name , $this -> bookmarks );
}
/**
* Move the internal cursor in the Tag Processor to a given bookmark's location.
*
@@ -1770,6 +1785,31 @@ class WP_HTML_Tag_Processor {
return strtoupper ( $tag_name );
}
/**
* Indicates if the currently matched tag contains the self-closing flag.
*
* No HTML elements ought to have the self-closing flag and for those, the self-closing
* flag will be ignored. For void elements this is benign because they "self close"
* automatically. For non-void HTML elements though problems will appear if someone
* intends to use a self-closing element in place of that element with an empty body.
* For HTML foreign elements and custom elements the self-closing flag determines if
* they self-close or not.
*
* This function does not determine if a tag is self-closing,
* but only if the self-closing flag is present in the syntax.
*
* @since 6.3.0
*
* @return bool Whether the currently matched tag contains the self-closing flag.
*/
public function has_self_closing_flag () {
if ( ! $this -> tag_name_starts_at ) {
return false ;
}
return '/' === $this -> html [ $this -> tag_ends_at - 1 ];
}
/**
* Indicates if the current tag token is a tag closer.
*
@@ -1894,7 +1934,7 @@ class WP_HTML_Tag_Processor {
*
* Result: <div id="new"/>
*/
$existing_attribute = $this -> attributes [ $comparable_name ];
$existing_attribute = $this -> attributes [ $comparable_name ];
$this -> lexical_updates [ $comparable_name ] = new WP_HTML_Text_Replacement (
$existing_attribute -> start ,
$existing_attribute -> end ,
@@ -2125,7 +2165,6 @@ class WP_HTML_Tag_Processor {
* @type string|null $class_name Tag must contain this class name to match.
* @type string $tag_closers "visit" or "skip": whether to stop on tag closers, e.g. </div>.
* }
* @return void
*/
private function parse_query ( $query ) {
if ( null !== $query && $query === $this -> last_query ) {