diff --git a/src/Bootstrap.php b/src/Bootstrap.php
index 20abea2..0de7c26 100644
--- a/src/Bootstrap.php
+++ b/src/Bootstrap.php
@@ -13,6 +13,7 @@
use YDPL\Providers\MetaBoxServiceProvider;
use YDPL\Providers\RestAPIServiceProvider;
use YDPL\Providers\SettingsServiceProvider;
+use YDPL\Providers\TextExtractionServiceProvider;
use YDPL\Vendor_Prefixed\DI\ContainerBuilder;
use YDPL\Vendor_Prefixed\Psr\Container\ContainerInterface;
@@ -71,6 +72,7 @@ protected function get_providers(): array
new SettingsServiceProvider(),
new RestAPIServiceProvider(),
new MetaBoxServiceProvider(),
+ new TextExtractionServiceProvider(),
);
}
diff --git a/src/Controllers/RestAPIController.php b/src/Controllers/RestAPIController.php
index 0e804c9..42d9d59 100644
--- a/src/Controllers/RestAPIController.php
+++ b/src/Controllers/RestAPIController.php
@@ -12,6 +12,7 @@
use Exception;
use WP_REST_Request;
use WP_REST_Response;
+use YDPL\Providers\TextExtractionServiceProvider;
use YDPL\Services\TranslationService;
use YDPL\Singletons\SiteOptionsSingleton;
use YDPL\Traits\ErrorLog;
@@ -25,10 +26,12 @@ class RestAPIController
protected TranslationService $service;
protected SiteOptionsSingleton $options;
+ protected TextExtractionServiceProvider $texts;
public function __construct()
{
$this->service = new TranslationService();
+ $this->texts = new TextExtractionServiceProvider();
$this->options = ydpl_resolve_from_container( 'ydpl.site_options' );
}
@@ -46,6 +49,35 @@ public function handle_translate_request( WP_REST_Request $request ): WP_REST_Re
return $this->set_failure_response( 400, 'Invalid input parameters.' );
}
+ // Secure mode prevents hijacking the DeepL credits.
+ if ( $this->options->secure_mode_enabled() ) {
+ if ( is_numeric( $object_id ) && (int) $object_id > 0 ) {
+ $object = "post-" . $object_id;
+ } else {
+ // get referer from request
+ $url = $request->get_header( 'referer' ) ?? false;
+ $url_host = preg_replace( '/^www\./', '', wp_parse_url( $url, PHP_URL_HOST ) );
+ $website_host = preg_replace( '/^www\./', '', wp_parse_url( home_url(), PHP_URL_HOST ) );
+ // make sure the URL in on the same domain as this website.
+ if ( $url && $url_host === $website_host ) {
+ $object = "url-{$url}";
+ }
+ }
+ if (!isset($object)) {
+ return $this->set_failure_response( 400, 'Could not determine text-object.' );
+ }
+
+ $text_allowed = $this->texts->get_allowed_text( $object );
+ if ( $text ) {
+ // In case we send texts to translate, only allow the ones that are actually in the content.
+ $text = $this->texts->array_intersect_loose( $text, $text_allowed );
+ } else {
+ // If we do not send texts to translate, we use the texts that are in the content.
+ $text = $text_allowed;
+ }
+ unset( $text_allowed );
+ }
+
// Is required when configured as such in the plugin settings.
if ( $this->options->rest_api_param_object_id_is_mandatory() && empty( $object_id ) ) {
return $this->set_failure_response( 400, 'Invalid input parameters.' );
diff --git a/src/Controllers/SettingsController.php b/src/Controllers/SettingsController.php
index 84426d5..8549118 100644
--- a/src/Controllers/SettingsController.php
+++ b/src/Controllers/SettingsController.php
@@ -51,6 +51,7 @@ public function section_fields_render( array $args ): void
'supported_languages' => ydpl_resolve_from_container( 'ydpl.supported_target.languages' ),
'configured_supported_languages' => ydpl_resolve_from_container( 'ydpl.site_options' )->configured_supported_languages(),
'rest_api_param_object_id_is_mandatory' => ydpl_resolve_from_container( 'ydpl.site_options' )->rest_api_param_object_id_is_mandatory(),
+ 'secure_mode_enabled' => ydpl_resolve_from_container( 'ydpl.site_options' )->secure_mode_enabled(),
)
);
}
diff --git a/src/Providers/SettingsServiceProvider.php b/src/Providers/SettingsServiceProvider.php
index 10cf82a..018470e 100644
--- a/src/Providers/SettingsServiceProvider.php
+++ b/src/Providers/SettingsServiceProvider.php
@@ -104,5 +104,14 @@ public function register_settings_options(): void
'ydpl_section_rest_api',
array( 'settings_field_id' => 'ydpl_rest_api_param_object_id_is_mandatory' )
);
+
+ add_settings_field(
+ 'ydpl_secure_mode_enabled',
+ __( 'Run translations in secure mode', 'yard-deepl' ),
+ array( $this->controller, 'section_fields_render' ),
+ 'yard-deepl',
+ 'ydpl_section_rest_api',
+ array( 'settings_field_id' => 'ydpl_secure_mode_enabled' )
+ );
}
}
diff --git a/src/Providers/TextExtractionServiceProvider.php b/src/Providers/TextExtractionServiceProvider.php
new file mode 100644
index 0000000..4eba88e
--- /dev/null
+++ b/src/Providers/TextExtractionServiceProvider.php
@@ -0,0 +1,279 @@
+
+ * @subpackage YDPL\Providers\TextExtractionServiceProvider
+ */
+
+namespace YDPL\Providers;
+
+/**
+ * Exit when accessed directly.
+ */
+if (!defined('ABSPATH')) {
+ exit;
+}
+
+use YDPL\Contracts\ServiceProviderInterface;
+use YDPL\Singletons\SiteOptionsSingleton;
+
+class TextExtractionServiceProvider implements ServiceProviderInterface
+{
+ protected SiteOptionsSingleton $options;
+
+ public function __construct()
+ {
+ $this->options = ydpl_resolve_from_container('ydpl.site_options');
+ }
+
+ /**
+ * Register the service provider.
+ *
+ * @since 0.0.1
+ */
+ public function register(): void
+ {
+ add_action('save_post', [$this, 'action_save_post']);
+ }
+
+ /**
+ * Save post action.
+ *
+ * @param int $post_id The ID of the post being saved.
+ *
+ *
+ * @return void
+ */
+ public function action_save_post(int $object_id): void
+ {
+ if (defined('DOING_AUTOSAVE') && DOING_AUTOSAVE) {
+ return;
+ }
+
+ if (defined('DOING_AJAX') && DOING_AJAX) {
+ return;
+ }
+
+ if (defined('REST_REQUEST') && REST_REQUEST) {
+ return;
+ }
+
+ add_action('shutdown', function () use ($object_id) {
+ $this->get_allowed_text($object_id, true);
+
+ // Unfortunately, we have no way of knowing which URL was updated, so we have to refresh all of them.
+ global $wpdb;
+ $wpdb->query("DELETE FROM {$wpdb->options} WHERE option_name LIKE 'ydpl-cache-%'");
+ });
+ }
+
+ /**
+ * Get allowed text array for a post, or a URL.
+ *
+ * @param string $object The object to get the allowed text for.
+ * @param bool $refresh
+ * @return array
+ */
+ public function get_allowed_text(string $object, bool $refresh = false): array
+ {
+ list($type, $resource_identifier) = explode('-', $object, 2);
+ switch ($type) {
+ case 'post':
+ // Get the stored list of allowed texts from post-meta.
+ $allowed_text = get_post_meta($resource_identifier, 'ydpl_allowed_text', true);
+ if (!empty($allowed_text) && !$refresh) {
+ return $allowed_text['text'] ?? [];
+ }
+ // If we have no cache at all, we build it, cache it and return it.
+ $url = get_permalink($resource_identifier);
+ if (!$url) {
+ return [];
+ }
+ $allowed_text = $this->extract_text($url);
+ update_post_meta($resource_identifier, 'ydpl_allowed_text', ['text' => $allowed_text, 'timestamp' => microtime(true)]);
+ break;
+ case 'url':
+ // Get the stored list of allowed texts from post-meta.
+ $url = $resource_identifier;
+ $resource_identifier = 'ydpl-cache-' . md5($url);
+ $allowed_text = get_option($resource_identifier);
+ if (!empty($allowed_text) && !$refresh) {
+ return $allowed_text['text'] ?? [];
+ }
+ // If we have no cache at all, we build it, cache it and return it.
+ $allowed_text = $this->extract_text($url);
+ // check if the option exists, if not, add it with autoload set to 'no'.
+ $data = ['text' => $allowed_text, 'timestamp' => microtime(true), 'url' => $url];
+ if (false === get_option($resource_identifier)) {
+ add_option($resource_identifier, $data, '', 'no');
+ } else {
+ update_option($resource_identifier, $data);
+ }
+ break;
+ }
+
+ return $allowed_text;
+ }
+
+ /**
+ * Extract text array from a URL.
+ *
+ * @param string $url The URL to extract text from.
+ *
+ * @return array
+ */
+ public function extract_text(string $url)
+ {
+ // Preserve user state.
+ $cookies = $_COOKIE;
+ $content = wp_remote_get($url, [
+ 'cookies' => $cookies,
+ 'referer' => $url,
+ ]);
+
+ if (is_wp_error($content)) {
+ return [];
+ }
+
+ $content = wp_remote_retrieve_body($content);
+
+ if (!$content) {
+ return [];
+ }
+
+ // Use DOM and xpath to extract the content.
+ $dom = new \DOMDocument();
+ @$dom->loadHTML($content);
+ $xpath = new \DOMXPath($dom);
+ $content_selector = [
+ // A list of jQuery / CSS selectors to extract text from. We wil translate this list to xpath compatible selectors, this is this way for ease of maint.
+ 'div',
+ 'p',
+ 'span',
+ 'h1',
+ 'h2',
+ 'h3',
+ 'h4',
+ 'h5',
+ 'h6',
+ 'li',
+ 'button',
+ 'blockquote',
+ 'a',
+ 'label',
+ 'details',
+ 'summary',
+ 'figcaption',
+ 'code',
+ 'pre',
+ 'th',
+ 'td',
+ 'textarea',
+ 'time',
+ 'input[type="button"]',
+ 'input[type="submit"]',
+ 'input[type="reset"]',
+ ];
+
+ $allowed_text = [];
+ foreach ($content_selector as $selector) {
+ $where = 'text';
+ if (str_contains($selector, '[')) {
+ $selector = str_replace('[', '[@', $selector);
+ // For now, this fits the bill. We can expand this later.
+ $where = 'attr-value';
+ }
+ $nodes = $xpath->query("*/{$selector}");
+
+ $allowed_text = array_merge($allowed_text, self::extract_from_dom_nodes($nodes, $where));
+ }
+
+ foreach ($nodes as $node) {
+ $allowed_text[] = trim($node->textContent);
+ }
+ if ($allowed_text) {
+ $allowed_text = array_unique(array_filter(array_map('trim', $allowed_text)), SORT_STRING);
+ usort($allowed_text, 'strcasecmp');
+ }
+
+ return $allowed_text;
+ }
+
+ /**
+ * Array Intersect, but all strings are compared loosely to allow for accents, whitespace difference and case-insensitivity.
+ *
+ * @param string[] $text List of texts to intersect. Items not in other lists will be removed.
+ * @param string[] $intersect List of texts to intersect with.
+ *
+ * @return array
+ */
+ public function array_intersect_loose(mixed $text, array $intersect): array
+ {
+ return array_uintersect($text, $intersect, [$this, 'compare_function']);
+ }
+
+ /**
+ * Normalize a string for comparison.
+ *
+ * @param string $string_to_normalize The string to normalize.
+ * @param string $encoding The encoding of the string.
+ *
+ * @return string
+ */
+ private static function normalize_string($string_to_normalize, $encoding = "UTF-8")
+ {
+ $string_to_normalize = trim($string_to_normalize);
+ $string_to_normalize = preg_replace('/\s+/', ' ', $string_to_normalize);
+ $string_to_normalize = preg_replace('/&([^;])[^;]*;/', "$1", htmlentities(mb_strtolower($string_to_normalize, $encoding), null, $encoding));
+
+ return $string_to_normalize;
+ }
+
+ /**
+ * Internal compare function for array_uintersect, for loose comparison.
+ *
+ * @param string $a A string.
+ * @param string $b Another string.
+ *
+ * @return int
+ */
+ private static function compare_function($a, $b)
+ {
+ return strcmp(self::normalize_string($a), self::normalize_string($b));
+ }
+
+ /**
+ * Extract text from DOM nodes, recursively.
+ *
+ * @param \DOMNodeList $nodes List of DOM nodes.
+ * Not strong typed in the signature to prevent errors in case a different library version gives a slightly different object.
+ *
+ * @return array
+ */
+ private static function extract_from_dom_nodes($nodes, $where = 'text'): array
+ {
+ $allowed_text = [];
+ foreach ($nodes as $node) {
+ $cnodes = $node->childNodes;
+ foreach ($cnodes as $cnode) { // we want 'if $cnodes', but that doesn't seem to work. Revisit.
+ $allowed_text = array_merge($allowed_text, self::extract_from_dom_nodes($cnodes, $where));
+ continue 2;
+ }
+ list($where, $what) = explode('-', $where . '-unknown');
+ switch ($where) {
+ case 'text':
+ default:
+ $allowed_text[] = trim($node->nodeValue);
+ break;
+ case 'attr':
+ $allowed_text[] = trim($node->getAttribute($what));
+ break;
+ }
+
+ }
+
+ return $allowed_text;
+ }
+}
diff --git a/src/Singletons/SiteOptionsSingleton.php b/src/Singletons/SiteOptionsSingleton.php
index 6dc11ab..a620fb9 100644
--- a/src/Singletons/SiteOptionsSingleton.php
+++ b/src/Singletons/SiteOptionsSingleton.php
@@ -67,4 +67,14 @@ public function rest_api_param_object_id_is_mandatory(): bool
return 'on' === $value;
}
+
+ /**
+ * @since 0.0.1
+ */
+ public function secure_mode_enabled(): bool
+ {
+ $value = $this->options['ydpl_secure_mode_enabled'] ?? '';
+
+ return 'on' === $value;
+ }
}
diff --git a/src/Views/admin/partials/settings/settings-fields.php b/src/Views/admin/partials/settings/settings-fields.php
index 90b6d97..7721a6c 100644
--- a/src/Views/admin/partials/settings/settings-fields.php
+++ b/src/Views/admin/partials/settings/settings-fields.php
@@ -11,6 +11,7 @@
$supported_languages = is_array( $supported_languages ?? null ) ? $supported_languages : array();
$configured_supported_languages = is_array( $configured_supported_languages ?? null ) ? $configured_supported_languages : array();
$rest_api_param_object_id_is_mandatory = $rest_api_param_object_id_is_mandatory ?? true;
+$secure_mode_enabled = $secure_mode_enabled ?? false;
?>
@@ -30,3 +31,7 @@
>
+
+
+>
+