TextFormat: improved exception messages for PCRE failures

This commit is contained in:
Dylan K. Taylor 2020-06-15 23:43:01 +01:00
parent 23b97d8e2d
commit a4e250a3e6

View File

@ -34,6 +34,12 @@ use function preg_split;
use function str_repeat;
use function str_replace;
use const JSON_UNESCAPED_SLASHES;
use const PREG_BACKTRACK_LIMIT_ERROR;
use const PREG_BAD_UTF8_ERROR;
use const PREG_BAD_UTF8_OFFSET_ERROR;
use const PREG_INTERNAL_ERROR;
use const PREG_JIT_STACKLIMIT_ERROR;
use const PREG_RECURSION_LIMIT_ERROR;
use const PREG_SPLIT_DELIM_CAPTURE;
use const PREG_SPLIT_NO_EMPTY;
@ -68,17 +74,26 @@ abstract class TextFormat{
public const ITALIC = TextFormat::ESCAPE . "o";
public const RESET = TextFormat::ESCAPE . "r";
private static function makePcreError(string $info) : \InvalidArgumentException{
throw new \InvalidArgumentException("$info: Encountered PCRE error " . preg_last_error() . " during regex operation");
private static function makePcreError() : \InvalidArgumentException{
$errorCode = preg_last_error();
$message = [
PREG_INTERNAL_ERROR => "Internal error",
PREG_BACKTRACK_LIMIT_ERROR => "Backtrack limit reached",
PREG_RECURSION_LIMIT_ERROR => "Recursion limit reached",
PREG_BAD_UTF8_ERROR => "Malformed UTF-8",
PREG_BAD_UTF8_OFFSET_ERROR => "Bad UTF-8 offset",
PREG_JIT_STACKLIMIT_ERROR => "PCRE JIT stack limit reached"
][$errorCode] ?? "Unknown (code $errorCode)";
throw new \InvalidArgumentException("PCRE error: $message");
}
/**
* @throws \InvalidArgumentException
*/
private static function preg_replace(string $pattern, string $replacement, string $string, string $errorContext) : string{
private static function preg_replace(string $pattern, string $replacement, string $string) : string{
$result = preg_replace($pattern, $replacement, $string);
if($result === null){
throw self::makePcreError($errorContext);
throw self::makePcreError();
}
return $result;
}
@ -90,7 +105,7 @@ abstract class TextFormat{
*/
public static function tokenize(string $string) : array{
$result = preg_split("/(" . TextFormat::ESCAPE . "[0-9a-fk-or])/u", $string, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
if($result === false) throw self::makePcreError("Failed to tokenize string");
if($result === false) throw self::makePcreError();
return $result;
}
@ -101,11 +116,11 @@ abstract class TextFormat{
*/
public static function clean(string $string, bool $removeFormat = true) : string{
$string = mb_scrub($string, 'UTF-8');
$string = self::preg_replace("/[\x{E000}-\x{F8FF}]/u", "", $string, "Stripping private-area characters"); //remove unicode private-use-area characters (they might break the console)
$string = self::preg_replace("/[\x{E000}-\x{F8FF}]/u", "", $string); //remove unicode private-use-area characters (they might break the console)
if($removeFormat){
$string = str_replace(TextFormat::ESCAPE, "", self::preg_replace("/" . TextFormat::ESCAPE . "[0-9a-fk-or]/u", "", $string, "Removing color codes"));
$string = str_replace(TextFormat::ESCAPE, "", self::preg_replace("/" . TextFormat::ESCAPE . "[0-9a-fk-or]/u", "", $string));
}
return str_replace("\x1b", "", self::preg_replace("/\x1b[\\(\\][[0-9;\\[\\(]+[Bm]/u", "", $string, "Removing special characters"));
return str_replace("\x1b", "", self::preg_replace("/\x1b[\\(\\][[0-9;\\[\\(]+[Bm]/u", "", $string));
}
/**
@ -114,7 +129,7 @@ abstract class TextFormat{
* @param string $placeholder default "&"
*/
public static function colorize(string $string, string $placeholder = "&") : string{
return self::preg_replace('/' . preg_quote($placeholder, "/") . '([0-9a-fk-or])/u', TextFormat::ESCAPE . '$1', $string, "Colorizing string");
return self::preg_replace('/' . preg_quote($placeholder, "/") . '([0-9a-fk-or])/u', TextFormat::ESCAPE . '$1', $string);
}
/**