diff --git a/src/Bootstrap.php b/src/Bootstrap.php index 20abea2..0de7c26 100644 --- a/src/Bootstrap.php +++ b/src/Bootstrap.php @@ -13,6 +13,7 @@ use YDPL\Providers\MetaBoxServiceProvider; use YDPL\Providers\RestAPIServiceProvider; use YDPL\Providers\SettingsServiceProvider; +use YDPL\Providers\TextExtractionServiceProvider; use YDPL\Vendor_Prefixed\DI\ContainerBuilder; use YDPL\Vendor_Prefixed\Psr\Container\ContainerInterface; @@ -71,6 +72,7 @@ protected function get_providers(): array new SettingsServiceProvider(), new RestAPIServiceProvider(), new MetaBoxServiceProvider(), + new TextExtractionServiceProvider(), ); } diff --git a/src/Controllers/RestAPIController.php b/src/Controllers/RestAPIController.php index 0e804c9..42d9d59 100644 --- a/src/Controllers/RestAPIController.php +++ b/src/Controllers/RestAPIController.php @@ -12,6 +12,7 @@ use Exception; use WP_REST_Request; use WP_REST_Response; +use YDPL\Providers\TextExtractionServiceProvider; use YDPL\Services\TranslationService; use YDPL\Singletons\SiteOptionsSingleton; use YDPL\Traits\ErrorLog; @@ -25,10 +26,12 @@ class RestAPIController protected TranslationService $service; protected SiteOptionsSingleton $options; + protected TextExtractionServiceProvider $texts; public function __construct() { $this->service = new TranslationService(); + $this->texts = new TextExtractionServiceProvider(); $this->options = ydpl_resolve_from_container( 'ydpl.site_options' ); } @@ -46,6 +49,35 @@ public function handle_translate_request( WP_REST_Request $request ): WP_REST_Re return $this->set_failure_response( 400, 'Invalid input parameters.' ); } + // Secure mode prevents hijacking the DeepL credits. + if ( $this->options->secure_mode_enabled() ) { + if ( is_numeric( $object_id ) && (int) $object_id > 0 ) { + $object = "post-" . $object_id; + } else { + // get referer from request + $url = $request->get_header( 'referer' ) ?? false; + $url_host = preg_replace( '/^www\./', '', wp_parse_url( $url, PHP_URL_HOST ) ); + $website_host = preg_replace( '/^www\./', '', wp_parse_url( home_url(), PHP_URL_HOST ) ); + // make sure the URL in on the same domain as this website. + if ( $url && $url_host === $website_host ) { + $object = "url-{$url}"; + } + } + if (!isset($object)) { + return $this->set_failure_response( 400, 'Could not determine text-object.' ); + } + + $text_allowed = $this->texts->get_allowed_text( $object ); + if ( $text ) { + // In case we send texts to translate, only allow the ones that are actually in the content. + $text = $this->texts->array_intersect_loose( $text, $text_allowed ); + } else { + // If we do not send texts to translate, we use the texts that are in the content. + $text = $text_allowed; + } + unset( $text_allowed ); + } + // Is required when configured as such in the plugin settings. if ( $this->options->rest_api_param_object_id_is_mandatory() && empty( $object_id ) ) { return $this->set_failure_response( 400, 'Invalid input parameters.' ); diff --git a/src/Controllers/SettingsController.php b/src/Controllers/SettingsController.php index 84426d5..8549118 100644 --- a/src/Controllers/SettingsController.php +++ b/src/Controllers/SettingsController.php @@ -51,6 +51,7 @@ public function section_fields_render( array $args ): void 'supported_languages' => ydpl_resolve_from_container( 'ydpl.supported_target.languages' ), 'configured_supported_languages' => ydpl_resolve_from_container( 'ydpl.site_options' )->configured_supported_languages(), 'rest_api_param_object_id_is_mandatory' => ydpl_resolve_from_container( 'ydpl.site_options' )->rest_api_param_object_id_is_mandatory(), + 'secure_mode_enabled' => ydpl_resolve_from_container( 'ydpl.site_options' )->secure_mode_enabled(), ) ); } diff --git a/src/Providers/SettingsServiceProvider.php b/src/Providers/SettingsServiceProvider.php index 10cf82a..018470e 100644 --- a/src/Providers/SettingsServiceProvider.php +++ b/src/Providers/SettingsServiceProvider.php @@ -104,5 +104,14 @@ public function register_settings_options(): void 'ydpl_section_rest_api', array( 'settings_field_id' => 'ydpl_rest_api_param_object_id_is_mandatory' ) ); + + add_settings_field( + 'ydpl_secure_mode_enabled', + __( 'Run translations in secure mode', 'yard-deepl' ), + array( $this->controller, 'section_fields_render' ), + 'yard-deepl', + 'ydpl_section_rest_api', + array( 'settings_field_id' => 'ydpl_secure_mode_enabled' ) + ); } } diff --git a/src/Providers/TextExtractionServiceProvider.php b/src/Providers/TextExtractionServiceProvider.php new file mode 100644 index 0000000..4eba88e --- /dev/null +++ b/src/Providers/TextExtractionServiceProvider.php @@ -0,0 +1,279 @@ + + * @subpackage YDPL\Providers\TextExtractionServiceProvider + */ + +namespace YDPL\Providers; + +/** + * Exit when accessed directly. + */ +if (!defined('ABSPATH')) { + exit; +} + +use YDPL\Contracts\ServiceProviderInterface; +use YDPL\Singletons\SiteOptionsSingleton; + +class TextExtractionServiceProvider implements ServiceProviderInterface +{ + protected SiteOptionsSingleton $options; + + public function __construct() + { + $this->options = ydpl_resolve_from_container('ydpl.site_options'); + } + + /** + * Register the service provider. + * + * @since 0.0.1 + */ + public function register(): void + { + add_action('save_post', [$this, 'action_save_post']); + } + + /** + * Save post action. + * + * @param int $post_id The ID of the post being saved. + * + * + * @return void + */ + public function action_save_post(int $object_id): void + { + if (defined('DOING_AUTOSAVE') && DOING_AUTOSAVE) { + return; + } + + if (defined('DOING_AJAX') && DOING_AJAX) { + return; + } + + if (defined('REST_REQUEST') && REST_REQUEST) { + return; + } + + add_action('shutdown', function () use ($object_id) { + $this->get_allowed_text($object_id, true); + + // Unfortunately, we have no way of knowing which URL was updated, so we have to refresh all of them. + global $wpdb; + $wpdb->query("DELETE FROM {$wpdb->options} WHERE option_name LIKE 'ydpl-cache-%'"); + }); + } + + /** + * Get allowed text array for a post, or a URL. + * + * @param string $object The object to get the allowed text for. + * @param bool $refresh + * @return array + */ + public function get_allowed_text(string $object, bool $refresh = false): array + { + list($type, $resource_identifier) = explode('-', $object, 2); + switch ($type) { + case 'post': + // Get the stored list of allowed texts from post-meta. + $allowed_text = get_post_meta($resource_identifier, 'ydpl_allowed_text', true); + if (!empty($allowed_text) && !$refresh) { + return $allowed_text['text'] ?? []; + } + // If we have no cache at all, we build it, cache it and return it. + $url = get_permalink($resource_identifier); + if (!$url) { + return []; + } + $allowed_text = $this->extract_text($url); + update_post_meta($resource_identifier, 'ydpl_allowed_text', ['text' => $allowed_text, 'timestamp' => microtime(true)]); + break; + case 'url': + // Get the stored list of allowed texts from post-meta. + $url = $resource_identifier; + $resource_identifier = 'ydpl-cache-' . md5($url); + $allowed_text = get_option($resource_identifier); + if (!empty($allowed_text) && !$refresh) { + return $allowed_text['text'] ?? []; + } + // If we have no cache at all, we build it, cache it and return it. + $allowed_text = $this->extract_text($url); + // check if the option exists, if not, add it with autoload set to 'no'. + $data = ['text' => $allowed_text, 'timestamp' => microtime(true), 'url' => $url]; + if (false === get_option($resource_identifier)) { + add_option($resource_identifier, $data, '', 'no'); + } else { + update_option($resource_identifier, $data); + } + break; + } + + return $allowed_text; + } + + /** + * Extract text array from a URL. + * + * @param string $url The URL to extract text from. + * + * @return array + */ + public function extract_text(string $url) + { + // Preserve user state. + $cookies = $_COOKIE; + $content = wp_remote_get($url, [ + 'cookies' => $cookies, + 'referer' => $url, + ]); + + if (is_wp_error($content)) { + return []; + } + + $content = wp_remote_retrieve_body($content); + + if (!$content) { + return []; + } + + // Use DOM and xpath to extract the content. + $dom = new \DOMDocument(); + @$dom->loadHTML($content); + $xpath = new \DOMXPath($dom); + $content_selector = [ + // A list of jQuery / CSS selectors to extract text from. We wil translate this list to xpath compatible selectors, this is this way for ease of maint. + 'div', + 'p', + 'span', + 'h1', + 'h2', + 'h3', + 'h4', + 'h5', + 'h6', + 'li', + 'button', + 'blockquote', + 'a', + 'label', + 'details', + 'summary', + 'figcaption', + 'code', + 'pre', + 'th', + 'td', + 'textarea', + 'time', + 'input[type="button"]', + 'input[type="submit"]', + 'input[type="reset"]', + ]; + + $allowed_text = []; + foreach ($content_selector as $selector) { + $where = 'text'; + if (str_contains($selector, '[')) { + $selector = str_replace('[', '[@', $selector); + // For now, this fits the bill. We can expand this later. + $where = 'attr-value'; + } + $nodes = $xpath->query("*/{$selector}"); + + $allowed_text = array_merge($allowed_text, self::extract_from_dom_nodes($nodes, $where)); + } + + foreach ($nodes as $node) { + $allowed_text[] = trim($node->textContent); + } + if ($allowed_text) { + $allowed_text = array_unique(array_filter(array_map('trim', $allowed_text)), SORT_STRING); + usort($allowed_text, 'strcasecmp'); + } + + return $allowed_text; + } + + /** + * Array Intersect, but all strings are compared loosely to allow for accents, whitespace difference and case-insensitivity. + * + * @param string[] $text List of texts to intersect. Items not in other lists will be removed. + * @param string[] $intersect List of texts to intersect with. + * + * @return array + */ + public function array_intersect_loose(mixed $text, array $intersect): array + { + return array_uintersect($text, $intersect, [$this, 'compare_function']); + } + + /** + * Normalize a string for comparison. + * + * @param string $string_to_normalize The string to normalize. + * @param string $encoding The encoding of the string. + * + * @return string + */ + private static function normalize_string($string_to_normalize, $encoding = "UTF-8") + { + $string_to_normalize = trim($string_to_normalize); + $string_to_normalize = preg_replace('/\s+/', ' ', $string_to_normalize); + $string_to_normalize = preg_replace('/&([^;])[^;]*;/', "$1", htmlentities(mb_strtolower($string_to_normalize, $encoding), null, $encoding)); + + return $string_to_normalize; + } + + /** + * Internal compare function for array_uintersect, for loose comparison. + * + * @param string $a A string. + * @param string $b Another string. + * + * @return int + */ + private static function compare_function($a, $b) + { + return strcmp(self::normalize_string($a), self::normalize_string($b)); + } + + /** + * Extract text from DOM nodes, recursively. + * + * @param \DOMNodeList $nodes List of DOM nodes. + * Not strong typed in the signature to prevent errors in case a different library version gives a slightly different object. + * + * @return array + */ + private static function extract_from_dom_nodes($nodes, $where = 'text'): array + { + $allowed_text = []; + foreach ($nodes as $node) { + $cnodes = $node->childNodes; + foreach ($cnodes as $cnode) { // we want 'if $cnodes', but that doesn't seem to work. Revisit. + $allowed_text = array_merge($allowed_text, self::extract_from_dom_nodes($cnodes, $where)); + continue 2; + } + list($where, $what) = explode('-', $where . '-unknown'); + switch ($where) { + case 'text': + default: + $allowed_text[] = trim($node->nodeValue); + break; + case 'attr': + $allowed_text[] = trim($node->getAttribute($what)); + break; + } + + } + + return $allowed_text; + } +} diff --git a/src/Singletons/SiteOptionsSingleton.php b/src/Singletons/SiteOptionsSingleton.php index 6dc11ab..a620fb9 100644 --- a/src/Singletons/SiteOptionsSingleton.php +++ b/src/Singletons/SiteOptionsSingleton.php @@ -67,4 +67,14 @@ public function rest_api_param_object_id_is_mandatory(): bool return 'on' === $value; } + + /** + * @since 0.0.1 + */ + public function secure_mode_enabled(): bool + { + $value = $this->options['ydpl_secure_mode_enabled'] ?? ''; + + return 'on' === $value; + } } diff --git a/src/Views/admin/partials/settings/settings-fields.php b/src/Views/admin/partials/settings/settings-fields.php index 90b6d97..7721a6c 100644 --- a/src/Views/admin/partials/settings/settings-fields.php +++ b/src/Views/admin/partials/settings/settings-fields.php @@ -11,6 +11,7 @@ $supported_languages = is_array( $supported_languages ?? null ) ? $supported_languages : array(); $configured_supported_languages = is_array( $configured_supported_languages ?? null ) ? $configured_supported_languages : array(); $rest_api_param_object_id_is_mandatory = $rest_api_param_object_id_is_mandatory ?? true; +$secure_mode_enabled = $secure_mode_enabled ?? false; ?> @@ -30,3 +31,7 @@ > + + +> +