Merged in feature/MAW-855-import-code-into-aws (pull request #2)

code import from pantheon

* code import from pantheon
This commit is contained in:
Tony Volpe
2023-12-04 23:08:14 +00:00
parent 8c9b1312bc
commit 8f4b5efda6
4766 changed files with 185592 additions and 239967 deletions

View File

@@ -4,16 +4,23 @@ namespace Yoast\WP\SEO\Premium\Actions;
use WP_Query;
use WPSEO_Premium_Prominent_Words_Support;
use Yoast\WP\SEO\Helpers\Prominent_Words_Helper;
use Yoast\WP\SEO\Models\Indexable;
use Yoast\WP\SEO\Premium\Helpers\Prominent_Words_Helper;
use Yoast\WP\SEO\Premium\Repositories\Prominent_Words_Repository;
use Yoast\WP\SEO\Repositories\Indexable_Repository;
use Yoast\WP\SEO\Repositories\Prominent_Words_Repository;
use Yoast\WP\SEO\Repositories\SEO_Links_Repository;
/**
* Handles the actual requests to the prominent words endpoints.
*/
class Link_Suggestions_Action {
/**
* The amount of indexables to retrieve in one go
* when generating internal linking suggestions.
*/
const BATCH_SIZE = 1000;
/**
* The repository to retrieve prominent words from.
*
@@ -28,6 +35,13 @@ class Link_Suggestions_Action {
*/
protected $indexable_repository;
/**
* The repository to retrieve links from.
*
* @var SEO_Links_Repository
*/
protected $links_repository;
/**
* Contains helper functions for calculating with and comparing prominent words.
*
@@ -49,30 +63,36 @@ class Link_Suggestions_Action {
* @param Indexable_Repository $indexable_repository The repository to retrieve indexables from.
* @param Prominent_Words_Helper $prominent_words_helper Class with helper methods for prominent words.
* @param WPSEO_Premium_Prominent_Words_Support $prominent_words_support The prominent words support class.
* @param SEO_Links_Repository $links_repository The repository to retrieve links from.
*/
public function __construct(
Prominent_Words_Repository $prominent_words_repository,
Indexable_Repository $indexable_repository,
Prominent_Words_Helper $prominent_words_helper,
WPSEO_Premium_Prominent_Words_Support $prominent_words_support
WPSEO_Premium_Prominent_Words_Support $prominent_words_support,
SEO_Links_Repository $links_repository
) {
$this->prominent_words_repository = $prominent_words_repository;
$this->indexable_repository = $indexable_repository;
$this->prominent_words_helper = $prominent_words_helper;
$this->prominent_words_support = $prominent_words_support;
$this->links_repository = $links_repository;
}
/**
* Suggests a list of links, based on the given array of prominent words.
*
* @param array $words_from_request The prominent words as an array mapping words to weights.
* @param int $limit The maximum number of link suggestions to retrieve.
* @param int $object_id The object id for the current indexable.
* @param string $object_type The object type for the current indexable.
* @param array $words_from_request The prominent words as an array mapping words to weights.
* @param int $limit The maximum number of link suggestions to retrieve.
* @param int $object_id The object id for the current indexable.
* @param string $object_type The object type for the current indexable.
* @param bool $include_existing_links Optional. Whether or not to include existing links, defaults to true.
* @param array $post_type Optional. The list of post types where suggestions may come from.
* @param bool $only_include_public Optional. Only include public indexables, defaults to false.
*
* @return array Links for the post that are suggested.
*/
public function get_suggestions( $words_from_request, $limit, $object_id, $object_type ) {
public function get_suggestions( $words_from_request, $limit, $object_id, $object_type, $include_existing_links = true, $post_type = [], $only_include_public = false ) {
$current_indexable_id = null;
$current_indexable = $this->indexable_repository->find_by_id_and_type( $object_id, $object_type );
if ( $current_indexable ) {
@@ -81,9 +101,9 @@ class Link_Suggestions_Action {
/*
* Gets best suggestions (returns a sorted array [$indexable_id => score]).
* The indexables are processed in batches of 100 indexables each.
* The indexables are processed in batches of 1000 indexables each.
*/
$suggestions_scores = $this->retrieve_suggested_indexable_ids( $words_from_request, $limit, 100, $current_indexable_id );
$suggestions_scores = $this->retrieve_suggested_indexable_ids( $words_from_request, $limit, self::BATCH_SIZE, $current_indexable_id, $include_existing_links, $post_type, $only_include_public );
$indexable_ids = \array_keys( $suggestions_scores );
@@ -109,6 +129,41 @@ class Link_Suggestions_Action {
return $this->create_suggestions( $suggestions_indexables, $suggestions_scores );
}
/**
* Suggests a list of links, based on the given array of prominent words.
*
* @param int $id The object id for the current indexable.
* @param int $limit The maximum number of link suggestions to retrieve.
* @param bool $include_existing_links Optional. Whether or not to include existing links, defaults to true.
*
* @return array Links for the post that are suggested.
*/
public function get_indexable_suggestions_for_indexable( $id, $limit, $include_existing_links = true ) {
$weighted_words = [];
$prominent_words = $this->prominent_words_repository->query()
->where( 'indexable_id', $id )
->find_array();
foreach ( $prominent_words as $prominent_word ) {
$weighted_words[ $prominent_word['stem'] ] = $prominent_word['weight'];
}
/*
* Gets best suggestions (returns a sorted array [$indexable_id => score]).
* The indexables are processed in batches of 1000 indexables each.
*/
$suggestions_scores = $this->retrieve_suggested_indexable_ids( $weighted_words, $limit, self::BATCH_SIZE, $id, $include_existing_links );
$indexable_ids = \array_keys( $suggestions_scores );
// Return the empty list if no suggestions have been found.
if ( empty( $indexable_ids ) ) {
return [];
}
// Retrieve indexables for suggestions.
return $this->indexable_repository->query()->where_id_in( $indexable_ids )->find_array();
}
/**
* Retrieves the titles of the posts with the given IDs.
*
@@ -297,16 +352,20 @@ class Link_Suggestions_Action {
* Request prominent words for indexables in the batch (including the iDF of all words) to calculate
* their vector length later.
*
* @param array $stems The stems in the request.
* @param int $batch_size How many indexables to request in one query.
* @param int $page The start of the current batch (in pages).
* @param array $stems The stems in the request.
* @param int $batch_size How many indexables to request in one query.
* @param int $page The start of the current batch (in pages).
* @param int[] $excluded_ids The indexable IDs to exclude.
* @param array $post_type The post types that will be searched.
* @param bool $only_include_public If only public indexables are included.
*
* @return array An array of ProminentWords objects, containing their stem, weight, indexable id,
* and document frequency.
*/
protected function get_candidate_words( $stems, $batch_size, $page ) {
protected function get_candidate_words( $stems, $batch_size, $page, $excluded_ids = [], $post_type = [], $only_include_public = false ) {
return $this->prominent_words_repository->find_by_list_of_ids(
$this->prominent_words_repository->find_ids_by_stems( $stems, $batch_size, $page )
$this->prominent_words_repository->find_ids_by_stems( $stems, $batch_size, $page, $excluded_ids, $post_type, $only_include_public )
);
}
@@ -315,27 +374,42 @@ class Link_Suggestions_Action {
* The candidate indexables are analyzed in batches.
* After having computed scores for a batch the function saves the best candidates until now.
*
* @param array $request_words The words to match, as an array mapping words to weights.
* @param int $limit The max number of suggestions that should be returned by the function.
* @param int $batch_size The number of indexables that should be analyzed in every batch.
* @param int|null $current_indexable_id The id for the current indexable.
* @param array $request_words The words to match, as an array mapping words to weights.
* @param int $limit The max number of suggestions that should be returned by the function.
* @param int $batch_size The number of indexables that should be analyzed in every batch.
* @param int|null $current_indexable_id The id for the current indexable.
* @param bool $include_existing_links Optional. Whether or not to include existing links, defaults to true.
* @param array $post_type Optional. The list of post types where suggestions may come from.
* @param bool $only_include_public Optional. Only include public indexables, defaults to false.
*
* @return array An array mapping indexable IDs to scores. Higher scores mean better matches.
*/
protected function retrieve_suggested_indexable_ids( $request_words, $limit, $batch_size, $current_indexable_id ) {
protected function retrieve_suggested_indexable_ids( $request_words, $limit, $batch_size, $current_indexable_id, $include_existing_links = true, $post_type = [], $only_include_public = false ) {
// Combine stems, weights and DFs from request.
$request_data = $this->compose_request_data( $request_words );
// Calculate vector length of the request set (needed for score normalization later).
$request_vector_length = $this->prominent_words_helper->compute_vector_length( $request_data );
// Get all links the post already links to, those shouldn't be suggested.
$excluded_indexable_ids = [ $current_indexable_id ];
if ( ! $include_existing_links && $current_indexable_id ) {
$links = $this->links_repository->query()
->distinct()
->select( 'indexable_id' )
->where( 'target_indexable_id', $current_indexable_id )
->find_many();
$excluded_indexable_ids = \array_merge( $excluded_indexable_ids, \wp_list_pluck( $links, 'indexable_id' ) );
}
$excluded_indexable_ids = \array_filter( $excluded_indexable_ids );
$request_stems = \array_keys( $request_data );
$scores = [];
$page = 1;
do {
// Retrieve the words of all indexables in this batch that share prominent word stems with request.
$candidates_words = $this->get_candidate_words( $request_stems, $batch_size, $page );
$candidates_words = $this->get_candidate_words( $request_stems, $batch_size, $page, $excluded_indexable_ids, $post_type, $only_include_public );
// Transform the prominent words table so that it is indexed by indexable_ids.
$candidates_words_by_indexable_ids = $this->group_words_by_indexable_id( $candidates_words );
@@ -347,10 +421,6 @@ class Link_Suggestions_Action {
++$batch_scores_size;
}
if ( $current_indexable_id && isset( $scores[ $current_indexable_id ] ) ) {
unset( $scores[ $current_indexable_id ] );
}
// Sort the list of scores and keep only the top $limit of the scores.
$scores = $this->get_top_suggestions( $scores, $limit );
@@ -393,10 +463,11 @@ class Link_Suggestions_Action {
// Sort the indexables by descending score.
\uasort(
$scores,
static function( $score_1, $score_2 ) {
static function ( $score_1, $score_2 ) {
if ( $score_1 === $score_2 ) {
return 0;
}
return ( ( $score_1 < $score_2 ) ? 1 : -1 );
}
);
@@ -506,7 +577,7 @@ class Link_Suggestions_Action {
protected function sort_suggestions_by_field( array &$link_suggestions, $field ) {
\usort(
$link_suggestions,
static function( $suggestion_1, $suggestion_2 ) use ( $field ) {
static function ( $suggestion_1, $suggestion_2 ) use ( $field ) {
if ( $suggestion_1[ $field ] === $suggestion_2[ $field ] ) {
return 0;
}
@@ -527,7 +598,7 @@ class Link_Suggestions_Action {
protected function filter_suggestions( $link_suggestions, $cornerstone ) {
return \array_filter(
$link_suggestions,
static function( $suggestion ) use ( $cornerstone ) {
static function ( $suggestion ) use ( $cornerstone ) {
return (bool) $suggestion['isCornerstone'] === $cornerstone;
}
);