Skip to content
wiki.fftac.org

Dictionaryvalidator - Source Excerpt 01

Back to Dictionaryvalidator

Summary

This source excerpt preserves a bounded section of Antichrist.net/wp-content/plugins/uaix-locale-router/src/DictionaryValidator.php so readers can inspect the evidence without opening the full source file.

**Source path:** Antichrist.net/wp-content/plugins/uaix-locale-router/src/DictionaryValidator.php

<?php

namespace UAIXLocaleRouter;

use UAIXLocaleRouter\Support\Json;


if ( ! defined( 'ABSPATH' ) ) {
	exit;
}

final class DictionaryValidator {
	/**
	 * Maximum accepted dictionary JSON payload size.
	 */
	const MAX_JSON_BYTES = 1048576;

	/**
	 * Validate an uploaded JSON dictionary string.
	 *
	 * @param string $json Raw JSON.
	 * @param string $filename Uploaded filename.
	 * @return array
	 */
	public static function validate_json_string( $json, $filename = '' ) {
		if ( strlen( (string) $json ) > self::MAX_JSON_BYTES ) {
			return self::finalize_result(
				array(
					'localeTag'  => '',
					'status'     => 'rejected',
					'issues'     => array(
						self::issue( 'error', 'oversized_payload', 'The uploaded dictionary is larger than the allowed 1 MB limit.' ),
					),
					'templateVersion' => 1,
					'normalized_dictionary' => array(),
				)
			);
		}

		$data = Json::decode_string( $json, null );

		if ( ! is_array( $data ) ) {
			return self::finalize_result(
				array(
					'localeTag'  => '',
					'status'     => 'rejected',
					'issues'     => array(
						self::issue( 'error', 'invalid_json', 'The uploaded dictionary is not valid JSON.' ),
					),
					'templateVersion' => 1,
					'normalized_dictionary' => array(),
				)
			);
		}

		return self::validate_dictionary_data( $data, $filename );
	}

	/**
	 * Validate a stored dictionary file.
	 *
	 * @param string $file_path Dictionary file path.
	 * @return array
	 */
	public static function validate_file( $file_path ) {
		$file_size = file_exists( $file_path ) ? filesize( $file_path ) : false;

		if ( false !== $file_size && $file_size > self::MAX_JSON_BYTES ) {
			return self::finalize_result(
				array(
					'localeTag'  => '',
					'status'     => 'rejected',
					'issues'     => array(
						self::issue( 'error', 'oversized_payload', 'The dictionary file is larger than the allowed 1 MB limit.' ),
					),
					'templateVersion' => 1,
					'normalized_dictionary' => array(),
				)
			);
		}

		$contents = file_exists( $file_path ) ? file_get_contents( $file_path ) : false;

		if ( false === $contents ) {
			return self::finalize_result(
				array(
					'localeTag'  => '',
					'status'     => 'rejected',
					'issues'     => array(
						self::issue( 'error', 'missing_file', 'The dictionary file could not be read.' ),
					),
					'templateVersion' => 1,
					'normalized_dictionary' => array(),
				)
			);
		}

		return self::validate_json_string( $contents, basename( $file_path ) );
	}

	/**
	 * Validate a decoded dictionary payload.
	 *
	 * @param array  $data Dictionary data.
	 * @param string $filename Source filename.
	 * @return array
	 */
	public static function validate_dictionary_data( array $data, $filename = '' ) {
		$template           = DictionaryRepository::load_template_dictionary();
		$template_strings   = isset( $template['strings'] ) && is_array( $template['strings'] ) ? $template['strings'] : array();
		$template_version   = isset( $template['meta']['version'] ) ? absint( $template['meta']['version'] ) : 1;
		$locale_tag         = isset( $data['meta']['locale'] ) ? LocaleValidator::canonicalize_tag( $data['meta']['locale'] ) : '';
		$filename_locale    = '' !== $filename ? LocaleValidator::canonicalize_tag( basename( (string) $filename, '.json' ) ) : '';
		$dictionary         = array(
			'meta'    => isset( $data['meta'] ) && is_array( $data['meta'] ) ? $data['meta'] : array(),
			'strings' => isset( $data['strings'] ) && is_array( $data['strings'] ) ? $data['strings'] : array(),
		);
		$normalized         = TranslationCatalog::normalize_dictionary_payload( $dictionary );
		$key_mode           = isset( $normalized['detectedKeyMode'] ) ? (string) $normalized['detectedKeyMode'] : TranslationCatalog::detect_dictionary_key_mode( $dictionary['strings'] );
		$source_strings     = array_keys( TranslationCatalog::source_to_runtime_keys() );
		$issues             = array();
		$dictionary_strings = $dictionary['strings'];
		$quality_status     = isset( $dictionary['meta']['qualityStatus'] ) ? (string) $dictionary['meta']['qualityStatus'] : 'draft';
		$valid_statuses     = array( 'draft', 'ai-generated', 'reviewed', 'approved', 'rejected' );
		$expected_keys      = 'runtime-key' === $key_mode ? array_keys( $template_strings ) : $source_strings;
		$expected_key_map   = array_fill_keys( $expected_keys, true );

		if ( '' === $locale_tag ) {
			$issues[] = self::issue( 'error', 'missing_locale', 'Dictionary metadata must declare a locale tag.' );
		} elseif ( ! LocaleValidator::is_valid_locale_tag( $locale_tag ) ) {
			$issues[] = self::issue( 'error', 'invalid_locale_tag', 'The locale tag is not a valid BCP 47 style tag.' );
		} else {
			$supported_locale = LocaleRepository::get_supported_locale( $locale_tag );

			if ( empty( $supported_locale ) || empty( $supported_locale['enabled'] ) ) {
				$issues[] = self::issue( 'error', 'unsupported_locale', 'The dictionary locale is not enabled or supported by this site.' );
			}
		}

		if ( '' !== $filename_locale && '' !== $locale_tag && $filename_locale !== $locale_tag ) {
			$issues[] = self::issue( 'error', 'filename_locale_mismatch', 'The locale tag does not match the uploaded filename.' );
		}

		if ( '' !== $locale_tag && ! empty( $dictionary['meta']['fallback'] ) ) {
			$fallback = LocaleValidator::canonicalize_tag( $dictionary['meta']['fallback'] );

			if ( '' === $fallback ) {
				$issues[] = self::issue( 'error', 'invalid_fallback', 'The declared fallback locale is invalid.' );
			}
		}

		if ( isset( $dictionary['meta']['version'] ) && absint( $dictionary['meta']['version'] ) !== $template_version ) {
			$issues[] = self::issue( 'warning', 'template_version_mismatch', 'The dictionary version does not match the template version.' );
		}

		if ( ! in_array( $quality_status, $valid_statuses, true ) ) {
			$issues[] = self::issue( 'warning', 'unknown_quality_status', 'The dictionary quality status is not recognized.' );
		}

		$missing_keys = array_diff( $expected_keys, array_keys( $dictionary_strings ) );
		$extra_keys   = array_diff( array_keys( $dictionary_strings ), $expected_keys );

		foreach ( array_keys( $dictionary_strings ) as $dictionary_key ) {
			if ( ! self::is_safe_dictionary_key( $dictionary_key ) ) {
				$issues[] = self::issue( 'error', 'unsafe_key', sprintf( 'Unsafe dictionary key: %s', (string) $dictionary_key ), array( 'key' => (string) $dictionary_key ) );
			}
		}

		foreach ( $missing_keys as $missing_key ) {
			$issues[] = self::issue( 'error', 'missing_key', sprintf( 'Missing key: %s', $missing_key ), array( 'key' => $missing_key ) );
		}

		foreach ( $extra_keys as $extra_key ) {
			$issues[] = self::issue( 'warning', 'extra_key', sprintf( 'Unknown key: %s', $extra_key ), array( 'key' => $extra_key ) );
		}

		$same_as_source = 0;

		foreach ( $expected_keys as $key ) {
			if ( ! array_key_exists( $key, $dictionary_strings ) ) {
				continue;
			}

			$template_value = 'runtime-key' === $key_mode
				? ( isset( $template_strings[ $key ] ) ? (string) $template_strings[ $key ] : '' )
				: (string) $key;
			$value = $dictionary_strings[ $key ];

			if ( ! is_string( $value ) ) {
				$issues[] = self::issue( 'error', 'type_mismatch', sprintf( 'Key %s must contain a string value.', $key ), array( 'key' => $key ) );
				continue;
			}

			if ( '' === trim( $value ) ) {
				$issues[] = self::issue( 'warning', 'empty_value', sprintf( 'Key %s is empty.', $key ), array( 'key' => $key ) );
			}

			if ( is_string( $template_value ) && $value === $template_value ) {
				++$same_as_source;
			}

			$template_placeholders   = self::extract_placeholders( $template_value );
			$dictionary_placeholders = self::extract_placeholders( $value );

			if ( $template_placeholders !== $dictionary_placeholders ) {
				$issues[] = self::issue( 'error', 'placeholder_mismatch', sprintf( 'Placeholder mismatch for key %s.', $key ), array( 'key' => $key ) );
			}

			if ( preg_match( '/[\x00-\x08\x0B\x0C\x0E-\x1F]/', $value ) ) {
				$issues[] = self::issue( 'error', 'control_character', sprintf( 'Control characters detected in key %s.', $key ), array( 'key' => $key ) );
			}

			if ( strlen( $template_value ) > 20 && strlen( $value ) > 0 && strlen( $value ) < ( strlen( $template_value ) / 4 ) ) {
				$issues[] = self::issue( 'warning', 'length_outlier', sprintf( 'The translation for %s is much shorter than the template.', $key ), array( 'key' => $key ) );
			}

			if ( substr_count( $template_value, '<' ) !== substr_count( $value, '<' ) || substr_count( $template_value, '>' ) !== substr_count( $value, '>' ) ) {
				$issues[] = self::issue( 'warning', 'markup_imbalance', sprintf( 'HTML or markup balance changed for key %s.', $key ), array( 'key' => $key ) );
			}
		}