Merged in feature/MAW-855-import-code-into-aws (pull request #2)

code import from pantheon

* code import from pantheon
This commit is contained in:
Tony Volpe
2023-12-04 23:08:14 +00:00
parent 8c9b1312bc
commit 8f4b5efda6
4766 changed files with 185592 additions and 239967 deletions

View File

@@ -5,6 +5,7 @@ namespace Yoast\WP\SEO\Builders;
use wpdb;
use Yoast\WP\SEO\Exceptions\Indexable\Author_Not_Built_Exception;
use Yoast\WP\SEO\Helpers\Author_Archive_Helper;
use Yoast\WP\SEO\Helpers\Options_Helper;
use Yoast\WP\SEO\Helpers\Post_Helper;
use Yoast\WP\SEO\Models\Indexable;
use Yoast\WP\SEO\Values\Indexables\Indexable_Builder_Versions;
@@ -32,6 +33,13 @@ class Indexable_Author_Builder {
*/
protected $version;
/**
* Holds the options helper instance.
*
* @var Options_Helper
*/
protected $options_helper;
/**
* Holds the taxonomy helper instance.
*
@@ -51,17 +59,20 @@ class Indexable_Author_Builder {
*
* @param Author_Archive_Helper $author_archive The author archive helper.
* @param Indexable_Builder_Versions $versions The Indexable version manager.
* @param Options_Helper $options_helper The options helper.
* @param Post_Helper $post_helper The post helper.
* @param wpdb $wpdb The WPDB instance.
*/
public function __construct(
Author_Archive_Helper $author_archive,
Indexable_Builder_Versions $versions,
Options_Helper $options_helper,
Post_Helper $post_helper,
wpdb $wpdb
) {
$this->author_archive = $author_archive;
$this->version = $versions->get_latest_version_for_type( 'user' );
$this->options_helper = $options_helper;
$this->post_helper = $post_helper;
$this->wpdb = $wpdb;
}
@@ -215,7 +226,8 @@ class Indexable_Author_Builder {
}
// We will check if the author has public posts the WP way, instead of the indexable way, to make sure we get proper results even if SEO optimization is not run.
if ( $this->author_archive->author_has_public_posts_wp( $user_id ) === false ) {
// In case the user has no public posts, we check if the user should be indexed anyway.
if ( $this->options_helper->get( 'noindex-author-noposts-wpseo', false ) === true && $this->author_archive->author_has_public_posts_wp( $user_id ) === false ) {
$exception = Author_Not_Built_Exception::author_archives_are_not_indexed_for_users_without_posts( $user_id );
}

View File

@@ -2,6 +2,8 @@
namespace Yoast\WP\SEO\Builders;
use DOMDocument;
use WP_HTML_Tag_Processor;
use WPSEO_Image_Utils;
use Yoast\WP\SEO\Helpers\Image_Helper;
use Yoast\WP\SEO\Helpers\Options_Helper;
@@ -211,14 +213,146 @@ class Indexable_Link_Builder {
return $links;
}
/**
* Gathers all images from content with WP's WP_HTML_Tag_Processor() and returns them along with their IDs, if possible.
*
* @param string $content The content.
*
* @return int[] An associated array of image IDs, keyed by their URL.
*/
protected function gather_images_wp( $content ) {
$processor = new WP_HTML_Tag_Processor( $content );
$images = [];
$query = [
'tag_name' => 'img',
];
/**
* Filter 'wpseo_image_attribute_containing_id' - Allows filtering what attribute will be used to extract image IDs from.
*
* Defaults to "class", which is where WP natively stores the image IDs, in a `wp-image-<ID>` format.
*
* @api string The attribute to be used to extract image IDs from.
*/
$attribute = \apply_filters( 'wpseo_image_attribute_containing_id', 'class' );
while ( $processor->next_tag( $query ) ) {
$src = \htmlentities( $processor->get_attribute( 'src' ), ( ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401 ), \get_bloginfo( 'charset' ) );
$classes = $processor->get_attribute( $attribute );
$id = $this->extract_id_of_classes( $classes );
$images[ $src ] = $id;
}
return $images;
}
/**
* Gathers all images from content with DOMDocument() and returns them along with their IDs, if possible.
*
* @param string $content The content.
*
* @return int[] An associated array of image IDs, keyed by their URL.
*/
protected function gather_images_domdocument( $content ) {
$images = [];
$charset = \get_bloginfo( 'charset' );
/**
* Filter 'wpseo_image_attribute_containing_id' - Allows filtering what attribute will be used to extract image IDs from.
*
* Defaults to "class", which is where WP natively stores the image IDs, in a `wp-image-<ID>` format.
*
* @api string The attribute to be used to extract image IDs from.
*/
$attribute = \apply_filters( 'wpseo_image_attribute_containing_id', 'class' );
libxml_use_internal_errors( true );
$post_dom = new DOMDocument();
$post_dom->loadHTML( '<?xml encoding="' . $charset . '">' . $content );
libxml_clear_errors();
foreach ( $post_dom->getElementsByTagName( 'img' ) as $img ) {
$src = \htmlentities( $img->getAttribute( 'src' ), ( ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401 ), $charset );
$classes = $img->getAttribute( $attribute );
$id = $this->extract_id_of_classes( $classes );
$images[ $src ] = $id;
}
return $images;
}
/**
* Extracts image ID out of the image's classes.
*
* @param string $classes The classes assigned to the image.
*
* @return int The ID that's extracted from the classes.
*/
protected function extract_id_of_classes( $classes ) {
if ( ! $classes ) {
return 0;
}
/**
* Filter 'wpseo_extract_id_pattern' - Allows filtering the regex patern to be used to extract image IDs from class/attribute names.
*
* Defaults to the pattern that extracts image IDs from core's `wp-image-<ID>` native format in image classes.
*
* @api string The regex pattern to be used to extract image IDs from class names. Empty string if the whole class/attribute should be returned.
*/
$pattern = \apply_filters( 'wpseo_extract_id_pattern', '/(?<!\S)wp-image-(\d+)(?!\S)/i' );
if ( $pattern === '' ) {
return (int) $classes;
}
$matches = [];
if ( preg_match( $pattern, $classes, $matches ) ) {
return (int) $matches[1];
}
return 0;
}
/**
* Gathers all images from content.
*
* @param string $content The content.
*
* @return string[] An array of urls.
* @return int[] An associated array of image IDs, keyed by their URLs.
*/
protected function gather_images( $content ) {
/**
* Filter 'wpseo_force_creating_and_using_attachment_indexables' - Filters if we should use attachment indexables to find all content images. Instead of scanning the content.
*
* The default value is false.
*
* @since 21.1
*/
$should_not_parse_content = \apply_filters( 'wpseo_force_creating_and_using_attachment_indexables', false );
/**
* Filter 'wpseo_force_skip_image_content_parsing' - Filters if we should force skip scanning the content to parse images.
* This filter can be used if the regex gives a faster result than scanning the code.
*
* The default value is false.
*
* @since 21.1
*/
$should_not_parse_content = \apply_filters( 'wpseo_force_skip_image_content_parsing', $should_not_parse_content );
if ( ! $should_not_parse_content && class_exists( WP_HTML_Tag_Processor::class ) ) {
return $this->gather_images_wp( $content );
}
if ( ! $should_not_parse_content && class_exists( DOMDocument::class ) ) {
return $this->gather_images_DOMDocument( $content );
}
if ( \strpos( $content, 'src' ) === false ) {
// Nothing to do.
return [];
@@ -229,7 +363,7 @@ class Indexable_Link_Builder {
// Used modifiers iU to match case insensitive and make greedy quantifiers lazy.
if ( \preg_match_all( "/$regexp/iU", $content, $matches, \PREG_SET_ORDER ) ) {
foreach ( $matches as $match ) {
$images[] = \trim( $match[2], "'" );
$images[ $match[2] ] = 0;
}
}
@@ -241,7 +375,7 @@ class Indexable_Link_Builder {
*
* @param Indexable $indexable The indexable.
* @param string[] $links The link URLs.
* @param string[] $images The image sources.
* @param int[] $images The image sources.
*
* @return SEO_Links[] The link models.
*/
@@ -262,13 +396,12 @@ class Indexable_Link_Builder {
}
);
$images = \array_map(
function( $link ) use ( $home_url, $indexable ) {
return $this->create_internal_link( $link, $home_url, $indexable, true );
},
$images
);
return \array_merge( $links, $images );
$image_links = [];
foreach ( $images as $image_url => $image_id ) {
$image_links[] = $this->create_internal_link( $image_url, $home_url, $indexable, true, $image_id );
}
return \array_merge( $links, $image_links );
}
/**
@@ -294,10 +427,11 @@ class Indexable_Link_Builder {
* @param array $home_url The home url, as parsed by wp_parse_url.
* @param Indexable $indexable The indexable of the post containing the link.
* @param bool $is_image Whether or not the link is an image.
* @param int $image_id The ID of the internal image.
*
* @return SEO_Links The created link.
*/
protected function create_internal_link( $url, $home_url, $indexable, $is_image = false ) {
protected function create_internal_link( $url, $home_url, $indexable, $is_image = false, $image_id = 0 ) {
$parsed_url = \wp_parse_url( $url );
$link_type = $this->url_helper->get_link_type( $parsed_url, $home_url, $is_image );
@@ -326,11 +460,12 @@ class Indexable_Link_Builder {
if ( $model->type === SEO_Links::TYPE_INTERNAL_IMAGE ) {
$permalink = $this->build_permalink( $url, $home_url );
if ( ! $this->options_helper->get( 'disable-attachment' ) ) {
/** The `wpseo_force_creating_and_using_attachment_indexables` filter is documented in indexable-link-builder.php */
if ( ! $this->options_helper->get( 'disable-attachment' ) || \apply_filters( 'wpseo_force_creating_and_using_attachment_indexables', false ) ) {
$model = $this->enhance_link_from_indexable( $model, $permalink );
}
else {
$target_post_id = WPSEO_Image_Utils::get_attachment_by_url( $permalink );
$target_post_id = ( $image_id !== 0 ) ? $image_id : WPSEO_Image_Utils::get_attachment_by_url( $permalink );
if ( ! empty( $target_post_id ) ) {
$model->target_post_id = $target_post_id;