Block: Use xxhash'd XOR mask to improve state data lower bits distribution

xxhash is generally well known for its hash key properties, so this is a suitable use case.
We XOR the state data with a partial hash of xxh3(typeID), which provides sufficient hash distribution regardless of the size of state data.
The previous method started to break down as the number of bits exceeded the number of significant bits of type ID (about 10 currently).

As well as being better for hash distribution regardless of state data size, this also reduces the load factor of RuntimeBlockRegistry to 1.08 (previously around 1.24), which is a nice bonus.
This commit is contained in:
Dylan K. Taylor 2023-10-17 15:20:31 +01:00
parent d0d16cdeb7
commit 63fcf9879a
No known key found for this signature in database
GPG Key ID: 8927471A91CAFD3D
3 changed files with 36 additions and 9 deletions

View File

@ -50,12 +50,14 @@ use pocketmine\math\Vector3;
use pocketmine\nbt\tag\CompoundTag;
use pocketmine\player\Player;
use pocketmine\utils\AssumptionFailedError;
use pocketmine\utils\Binary;
use pocketmine\world\BlockTransaction;
use pocketmine\world\format\Chunk;
use pocketmine\world\Position;
use pocketmine\world\World;
use function count;
use function get_class;
use function hash;
use const PHP_INT_MAX;
class Block{
@ -64,8 +66,10 @@ class Block{
/**
* @internal
* Hardcoded int is `Binary::readLong(hash('xxh3', Binary::writeLLong(BlockTypeIds::AIR), binary: true))`
* TODO: it would be much easier if we could just make this 0 or some other easy value
*/
public const EMPTY_STATE_ID = (BlockTypeIds::AIR << self::INTERNAL_STATE_DATA_BITS) | (BlockTypeIds::AIR & self::INTERNAL_STATE_DATA_MASK);
public const EMPTY_STATE_ID = (BlockTypeIds::AIR << self::INTERNAL_STATE_DATA_BITS) | (-7482769108513497636 & self::INTERNAL_STATE_DATA_MASK);
protected BlockIdentifier $idInfo;
protected string $fallbackName;
@ -80,6 +84,23 @@ class Block{
private Block $defaultState;
private int $stateIdXorMask;
/**
* Computes the mask to be XOR'd with the state data.
* This is to improve distribution of the state data bits, which occupy the least significant bits of the state ID.
* Improved distribution improves PHP array performance when using the state ID as a key, as PHP arrays use some of
* the lower bits of integer keys directly without hashing.
*
* The type ID is included in the XOR mask. This is not necessary to improve distribution, but it reduces the number
* of operations required to compute the state ID (micro optimization).
*/
public static function computeStateIdXorMask(int $typeId) : int{
return
$typeId << self::INTERNAL_STATE_DATA_BITS |
(Binary::readLong(hash('xxh3', Binary::writeLLong($typeId), binary: true)) & self::INTERNAL_STATE_DATA_MASK);
}
/**
* @param string $name English name of the block type (TODO: implement translations)
*/
@ -97,6 +118,9 @@ class Block{
$this->describeBlockOnlyState($calculator);
$this->requiredBlockOnlyStateDataBits = $calculator->getBitsUsed();
$this->stateIdXorMask = self::computeStateIdXorMask($idInfo->getBlockTypeId());
//this must be done last, otherwise the defaultState could have uninitialized fields
$defaultState = clone $this;
$this->defaultState = $defaultState;
$defaultState->defaultState = $defaultState;
@ -152,13 +176,7 @@ class Block{
* {@link RuntimeBlockStateRegistry::fromStateId()}.
*/
public function getStateId() : int{
$typeId = $this->getTypeId();
//TODO: this XOR mask improves hashtable distribution, but it's only effective if the number of unique block
//type IDs is larger than the number of available state data bits. We should probably hash (e.g. using xxhash)
//the type ID to create a better mask.
//Alternatively, we could hash the whole state ID, but this is currently problematic, since we currently need
//to be able to recover the state data from the state ID because of UnknownBlock.
return ($typeId << self::INTERNAL_STATE_DATA_BITS) | ($this->encodeFullState() ^ ($typeId & self::INTERNAL_STATE_DATA_MASK));
return $this->encodeFullState() ^ $this->stateIdXorMask;
}
/**

View File

@ -28,6 +28,7 @@ use function asort;
use function file_get_contents;
use function is_array;
use function json_decode;
use function log;
use function print_r;
use const SORT_STRING;
@ -125,6 +126,14 @@ class BlockTest extends TestCase{
self::assertInstanceOf(Air::class, $block);
}
public function testStateDataSizeNotTooLarge() : void{
$typeIdBitsMin = ((int) log(BlockTypeIds::FIRST_UNUSED_BLOCK_ID, 2)) + 1;
$typeIdBitsMin++; //for custom blocks
self::assertLessThanOrEqual(32, Block::INTERNAL_STATE_DATA_BITS + $typeIdBitsMin, "State data size cannot be larger than " . (32 - $typeIdBitsMin) . " bits (need at least $typeIdBitsMin bits for block type ID)");
}
public function testAsItemFromItem() : void{
$block = VanillaBlocks::FLOWER_POT();
$item = $block->asItem();

File diff suppressed because one or more lines are too long