From 0d39b818b9b39d7be0d626d03c9328c9ed9b6a34 Mon Sep 17 00:00:00 2001 From: Ivan Kochurkin Date: Sun, 30 Jan 2022 16:07:37 +0300 Subject: [PATCH] ATN serialized data: remove shifting by 2, remove UUID --- src/Atn/ATNDeserializer.php | 112 +++--------------------------------- 1 file changed, 7 insertions(+), 105 deletions(-) diff --git a/src/Atn/ATNDeserializer.php b/src/Atn/ATNDeserializer.php index e8e38a1..1f56f62 100644 --- a/src/Atn/ATNDeserializer.php +++ b/src/Atn/ATNDeserializer.php @@ -46,38 +46,7 @@ final class ATNDeserializer { - /** - * This value should never change. Updates following this version are - * reflected as change in the unique ID SERIALIZED_UUID. - */ - public const SERIALIZED_VERSION = 3; - - /** - * This is the earliest supported serialized UUID. - * Stick to serialized version for now, we don't need a UUID instance. - */ - private const BASE_SERIALIZED_UUID = 'AADB8D7E-AEEF-4415-AD2B-8204D6CF042E'; - - /** - * This UUID indicates the serialized ATN contains two sets of IntervalSets, - * where the second set's values are encoded as 32-bit integers to support - * the full Unicode SMP range up to U+10FFFF. - */ - private const ADDED_UNICODE_SMP = '59627784-3BE5-417A-B9EB-8131A7286089'; - - /** - * This list contains all of the currently supported UUIDs, ordered by when - * the feature first appeared in this branch. - */ - private const SUPPORTED_UUIDS = [ - self::BASE_SERIALIZED_UUID, - self::ADDED_UNICODE_SMP, - ]; - - /** - * This is the current serialized UUID. - */ - private const SERIALIZED_UUID = self::ADDED_UNICODE_SMP; + public const SERIALIZED_VERSION = 4; /** @var ATNDeserializationOptions */ private $deserializationOptions; @@ -88,9 +57,6 @@ final class ATNDeserializer /** @var int */ private $pos = 0; - /** @var string */ - private $uuid = ''; - /** @var array|null */ private $stateFactories; @@ -102,38 +68,10 @@ public function __construct(?ATNDeserializationOptions $options = null) $this->deserializationOptions = $options ?? ATNDeserializationOptions::defaultOptions(); } - /** - * Determines if a particular serialized representation of an ATN supports - * a particular feature, identified by the {@see UUID} used for serializing - * the ATN at the time the feature was first introduced. - * - * @param string $feature The {@see UUID} marking the first time the - * feature was supported in the serialized ATN. - * @param string $actualUuid The {@see UUID} of the actual serialized ATN - * which is currently being deserialized. - * - * @return bool `true` if the `actualUuid` value represents a serialized - * ATN at or after the feature identified by `feature` was - * introduced; otherwise, `false`. - */ - protected function isFeatureSupported(string $feature, string $actualUuid) : bool - { - $featureIndex = \array_search($feature, self::SUPPORTED_UUIDS, true); - - if ($featureIndex === false) { - return false; - } - - $actualUuidIndex = \array_search($actualUuid, self::SUPPORTED_UUIDS, true); - - return $actualUuidIndex >= $featureIndex; - } - public function deserialize(string $data) : ATN { $this->reset($data); $this->checkVersion(); - $this->checkUUID(); $atn = $this->readATN(); $this->readStates($atn); $this->readRules($atn); @@ -145,14 +83,10 @@ public function deserialize(string $data) : ATN return $this->readInt(); }); - // Next, if the ATN was serialized with the Unicode SMP feature, - // deserialize sets with 32-bit arguments <= U+10FFFF. - - if ($this->isFeatureSupported(self::ADDED_UNICODE_SMP, $this->uuid)) { - $this->readSets($sets, function () { - return $this->readInt32(); - }); - } + // Next, deserialize sets with 32-bit arguments <= U+10FFFF. + $this->readSets($sets, function () { + return $this->readInt32(); + }); $this->readEdges($atn, $sets); $this->readDecisions($atn); @@ -178,10 +112,8 @@ private function reset(string $data) : void return; } - $this->data = [StringUtils::codePoint($characters[0])]; - for ($i = 1, $length = \count($characters); $i < $length; $i++) { - $code = StringUtils::codePoint($characters[$i]); - $this->data[] = $code > 1 ? $code - 2 : $code + 65533; + for ($i = 0, $length = \count($characters); $i < $length; $i++) { + $this->data[] = StringUtils::codePoint($characters[$i]); } $this->pos = 0; @@ -201,21 +133,6 @@ private function checkVersion() : void } } - private function checkUUID() : void - { - $uuid = $this->readUUID(); - - if (!\in_array($uuid, self::SUPPORTED_UUIDS, true)) { - throw new \InvalidArgumentException(\sprintf( - 'Could not deserialize ATN with UUID: %s (expected %s or a legacy UUID).', - $uuid, - self::SERIALIZED_UUID - )); - } - - $this->uuid = $uuid; - } - private function readATN() : ATN { $grammarType = $this->readInt(); @@ -739,21 +656,6 @@ private function readInt32() : int return $low | ($high << 16); } - private function readUUID() : string - { - $bb = []; - for ($i=0; $i < 8; $i++) { - $int = $this->readInt(); - $bb[] = $int & 0xFF; - $bb[] = ($int >> 8) & 0xFF; - } - - $bb = \array_reverse($bb); - $hex = \strtoupper(\bin2hex(\implode(\array_map('chr', $bb)))); - - return \vsprintf('%s%s-%s-%s-%s-%s%s%s', \str_split($hex, 4)); - } - /** * @param array $sets */