From ecc1e1f698da011dbab202854fbf0f56a514a680 Mon Sep 17 00:00:00 2001 From: "Dylan K. Taylor" Date: Wed, 20 Jan 2021 20:04:21 +0000 Subject: [PATCH 1/2] RegionLoader: improve performance of region header validation I was unaware that fseek actually makes a syscall which is rather costly, which became painfully obvious during large world conversions on PM4. On average this problem appeared to be adding about 5ms to the load time for a newly loaded region, which is insanely expensive. --- src/pocketmine/level/format/io/region/RegionLoader.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/pocketmine/level/format/io/region/RegionLoader.php b/src/pocketmine/level/format/io/region/RegionLoader.php index 40a699a16..7c34cf4d0 100644 --- a/src/pocketmine/level/format/io/region/RegionLoader.php +++ b/src/pocketmine/level/format/io/region/RegionLoader.php @@ -340,6 +340,8 @@ class RegionLoader{ /** @var int[] $usedOffsets */ $usedOffsets = []; + $fileSize = filesize($this->filePath); + if($fileSize === false) throw new AssumptionFailedError("filesize() should not return false here"); for($i = 0; $i < 1024; ++$i){ $entry = $this->locationTable[$i]; if($entry === null){ @@ -352,8 +354,7 @@ class RegionLoader{ //TODO: more validity checks - fseek($this->filePointer, $fileOffset); - if(feof($this->filePointer)){ + if($fileOffset >= $fileSize){ throw new CorruptedRegionException("Region file location offset x=$x,z=$z points to invalid file location $fileOffset"); } if(isset($usedOffsets[$offset])){ From e8ffab1787adbae343410873c0f87a7fda9fd909 Mon Sep 17 00:00:00 2001 From: "Dylan K. Taylor" Date: Wed, 20 Jan 2021 21:05:44 +0000 Subject: [PATCH 2/2] RegionLoader: avoid hitting the disk twice during chunk reads this provides some performance improvement (although it's difficult to measure because of cache). this does mean that we read some garbage data during chunk reads, but it's less costly than hitting the disk twice. --- .../level/format/io/region/RegionLoader.php | 46 ++++++++++--------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/src/pocketmine/level/format/io/region/RegionLoader.php b/src/pocketmine/level/format/io/region/RegionLoader.php index 7c34cf4d0..05cfe40b3 100644 --- a/src/pocketmine/level/format/io/region/RegionLoader.php +++ b/src/pocketmine/level/format/io/region/RegionLoader.php @@ -27,6 +27,8 @@ use pocketmine\level\format\ChunkException; use pocketmine\level\format\io\exception\CorruptedChunkException; use pocketmine\utils\AssumptionFailedError; use pocketmine\utils\Binary; +use pocketmine\utils\BinaryDataException; +use pocketmine\utils\BinaryStream; use function assert; use function ceil; use function chr; @@ -145,34 +147,34 @@ class RegionLoader{ fseek($this->filePointer, $this->locationTable[$index]->getFirstSector() << 12); - $prefix = fread($this->filePointer, 4); - if($prefix === false or strlen($prefix) !== 4){ - throw new CorruptedChunkException("Corrupted chunk header detected (unexpected end of file reading length prefix)"); - } - $length = Binary::readInt($prefix); + /* + * this might cause us to read some junk, but under normal circumstances it won't be any more than 4096 bytes wasted. + * doing this in a single call is faster than making two seeks and reads to fetch the chunk. + * this relies on the assumption that the end of the file is always padded to a multiple of 4096 bytes. + */ + $bytesToRead = $this->locationTable[$index]->getSectorCount() << 12; + $payload = fread($this->filePointer, $bytesToRead); - if($length <= 0){ //TODO: if we reached here, the locationTable probably needs updating - return null; - } - if($length > self::MAX_SECTOR_LENGTH){ //corrupted - throw new CorruptedChunkException("Length for chunk x=$x,z=$z ($length) is larger than maximum " . self::MAX_SECTOR_LENGTH); + if($payload === false || strlen($payload) !== $bytesToRead){ + throw new CorruptedChunkException("Corrupted chunk detected (unexpected EOF, truncated or non-padded chunk found)"); } + $stream = new BinaryStream($payload); - if($length > ($this->locationTable[$index]->getSectorCount() << 12)){ //Invalid chunk, bigger than defined number of sectors - throw new CorruptedChunkException("Chunk length mismatch (expected " . ($this->locationTable[$index]->getSectorCount() << 12) . " sectors, got $length sectors)"); - } + try{ + $length = $stream->getInt(); + if($length <= 0){ //TODO: if we reached here, the locationTable probably needs updating + return null; + } - $chunkData = fread($this->filePointer, $length); - if($chunkData === false or strlen($chunkData) !== $length){ - throw new CorruptedChunkException("Corrupted chunk detected (unexpected end of file reading chunk data)"); - } + $compression = $stream->getByte(); + if($compression !== self::COMPRESSION_ZLIB and $compression !== self::COMPRESSION_GZIP){ + throw new CorruptedChunkException("Invalid compression type (got $compression, expected " . self::COMPRESSION_ZLIB . " or " . self::COMPRESSION_GZIP . ")"); + } - $compression = ord($chunkData[0]); - if($compression !== self::COMPRESSION_ZLIB and $compression !== self::COMPRESSION_GZIP){ - throw new CorruptedChunkException("Invalid compression type (got $compression, expected " . self::COMPRESSION_ZLIB . " or " . self::COMPRESSION_GZIP . ")"); + return $stream->get($length - 1); //length prefix includes the compression byte + }catch(BinaryDataException $e){ + throw new CorruptedChunkException("Corrupted chunk detected: " . $e->getMessage(), 0, $e); } - - return substr($chunkData, 1); } /**