AsyncPool: Switch to a more efficient collection algorithm, revamp internals (#2493)

This greatly improves GC performance by being more intelligent about how it collects garbage tasks. It knows that if X task in the queue is not finished, none of the tasks behind it can be finished either, so there's no point checking them.

This also presents the opportunity to cleanup a lot of async pool internals, so I've taken it and torched a lot of garbage.
This commit is contained in:
Dylan K. Taylor 2018-10-25 15:36:38 +01:00 committed by GitHub
parent a7eaec13b9
commit 212d72657a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 64 additions and 93 deletions

View File

@ -39,17 +39,11 @@ class AsyncPool{
/** @var int */ /** @var int */
private $workerMemoryLimit; private $workerMemoryLimit;
/** @var AsyncTask[] */ /** @var \SplQueue[]|AsyncTask[][] */
private $tasks = []; private $taskQueues = [];
/** @var int[] */
private $taskWorkers = [];
/** @var int */
private $nextTaskId = 1;
/** @var AsyncWorker[] */ /** @var AsyncWorker[] */
private $workers = []; private $workers = [];
/** @var int[] */
private $workerUsage = [];
/** @var \Closure[] */ /** @var \Closure[] */
private $workerStartHooks = []; private $workerStartHooks = [];
@ -124,11 +118,13 @@ class AsyncPool{
*/ */
private function getWorker(int $worker) : AsyncWorker{ private function getWorker(int $worker) : AsyncWorker{
if(!isset($this->workers[$worker])){ if(!isset($this->workers[$worker])){
$this->workerUsage[$worker] = 0;
$this->workers[$worker] = new AsyncWorker($this->logger, $worker, $this->workerMemoryLimit); $this->workers[$worker] = new AsyncWorker($this->logger, $worker, $this->workerMemoryLimit);
$this->workers[$worker]->setClassLoader($this->classLoader); $this->workers[$worker]->setClassLoader($this->classLoader);
$this->workers[$worker]->start(self::WORKER_START_OPTIONS); $this->workers[$worker]->start(self::WORKER_START_OPTIONS);
$this->taskQueues[$worker] = new \SplQueue();
foreach($this->workerStartHooks as $hook){ foreach($this->workerStartHooks as $hook){
$hook($worker); $hook($worker);
} }
@ -147,18 +143,15 @@ class AsyncPool{
if($worker < 0 or $worker >= $this->size){ if($worker < 0 or $worker >= $this->size){
throw new \InvalidArgumentException("Invalid worker $worker"); throw new \InvalidArgumentException("Invalid worker $worker");
} }
if($task->getTaskId() !== null){ if($task->isSubmitted()){
throw new \InvalidArgumentException("Cannot submit the same AsyncTask instance more than once"); throw new \InvalidArgumentException("Cannot submit the same AsyncTask instance more than once");
} }
$task->progressUpdates = new \Threaded; $task->progressUpdates = new \Threaded;
$task->setTaskId($this->nextTaskId++); $task->setSubmitted();
$this->tasks[$task->getTaskId()] = $task;
$this->getWorker($worker)->stack($task); $this->getWorker($worker)->stack($task);
$this->workerUsage[$worker]++; $this->taskQueues[$worker]->enqueue($task);
$this->taskWorkers[$task->getTaskId()] = $worker;
} }
/** /**
@ -173,8 +166,8 @@ class AsyncPool{
public function selectWorker() : int{ public function selectWorker() : int{
$worker = null; $worker = null;
$minUsage = PHP_INT_MAX; $minUsage = PHP_INT_MAX;
foreach($this->workerUsage as $i => $usage){ foreach($this->taskQueues as $i => $queue){
if($usage < $minUsage){ if(($usage = $queue->count()) < $minUsage){
$worker = $i; $worker = $i;
$minUsage = $usage; $minUsage = $usage;
if($usage === 0){ if($usage === 0){
@ -205,7 +198,7 @@ class AsyncPool{
* @return int * @return int
*/ */
public function submitTask(AsyncTask $task) : int{ public function submitTask(AsyncTask $task) : int{
if($task->getTaskId() !== null){ if($task->isSubmitted()){
throw new \InvalidArgumentException("Cannot submit the same AsyncTask instance more than once"); throw new \InvalidArgumentException("Cannot submit the same AsyncTask instance more than once");
} }
@ -214,43 +207,25 @@ class AsyncPool{
return $worker; return $worker;
} }
/**
* Removes a completed or crashed task from the pool.
*
* @param AsyncTask $task
* @param bool $force
*/
private function removeTask(AsyncTask $task, bool $force = false) : void{
if(isset($this->taskWorkers[$task->getTaskId()])){
if(!$force and ($task->isRunning() or !$task->isGarbage())){
return;
}
$this->workerUsage[$this->taskWorkers[$task->getTaskId()]]--;
}
unset($this->tasks[$task->getTaskId()]);
unset($this->taskWorkers[$task->getTaskId()]);
}
/**
* Collects garbage from running workers.
*/
private function collectWorkers() : void{
foreach($this->workers as $worker){
$worker->collect();
}
}
/** /**
* Collects finished and/or crashed tasks from the workers, firing their on-completion hooks where appropriate. * Collects finished and/or crashed tasks from the workers, firing their on-completion hooks where appropriate.
* *
* @throws \ReflectionException * @throws \ReflectionException
*/ */
public function collectTasks() : void{ public function collectTasks() : void{
foreach($this->tasks as $task){ foreach($this->taskQueues as $worker => $queue){
$doGC = false;
while(!$queue->isEmpty()){
/** @var AsyncTask $task */
$task = $queue->bottom();
$task->checkProgressUpdates(); $task->checkProgressUpdates();
if($task->isGarbage() and !$task->isRunning() and !$task->isCrashed()){ if(!$task->isRunning() and $task->isGarbage()){ //make sure the task actually executed before trying to collect
if(!$task->hasCancelledRun()){ $doGC = true;
$queue->dequeue();
if($task->isCrashed()){
$this->logger->critical("Could not execute asynchronous task " . (new \ReflectionClass($task))->getShortName() . ": Task crashed");
}elseif(!$task->hasCancelledRun()){
try{ try{
/* /*
* It's possible for a task to submit a progress update and then finish before the progress * It's possible for a task to submit a progress update and then finish before the progress
@ -268,23 +243,22 @@ class AsyncPool{
$this->logger->logException($e); $this->logger->logException($e);
} }
} }
}else{
$this->removeTask($task); break; //current task is still running, skip to next worker
}elseif($task->isCrashed()){ }
$this->logger->critical("Could not execute asynchronous task " . (new \ReflectionClass($task))->getShortName() . ": Task crashed"); }
$this->removeTask($task, true); if($doGC){
$this->workers[$worker]->collect();
} }
} }
$this->collectWorkers();
} }
public function shutdownUnusedWorkers() : int{ public function shutdownUnusedWorkers() : int{
$ret = 0; $ret = 0;
foreach($this->workerUsage as $i => $usage){ foreach($this->taskQueues as $i => $queue){
if($usage === 0){ if($queue->isEmpty()){
$this->workers[$i]->quit(); $this->workers[$i]->quit();
unset($this->workers[$i], $this->workerUsage[$i]); unset($this->workers[$i], $this->taskQueues[$i]);
$ret++; $ret++;
} }
} }
@ -301,24 +275,21 @@ class AsyncPool{
foreach($this->workers as $worker){ foreach($this->workers as $worker){
/** @var AsyncTask $task */ /** @var AsyncTask $task */
while(($task = $worker->unstack()) !== null){ while(($task = $worker->unstack()) !== null){
//cancelRun() is not strictly necessary here, but it might be used to inform plugins of the task state //NOOP: the below loop will deal with marking tasks as garbage
//(i.e. it never executed). }
}
foreach($this->taskQueues as $queue){
while(!$queue->isEmpty()){
/** @var AsyncTask $task */
$task = $queue->dequeue();
$task->cancelRun(); $task->cancelRun();
$this->removeTask($task, true);
} }
} }
foreach($this->tasks as $task){
$task->cancelRun();
$this->removeTask($task, true);
}
$this->taskWorkers = [];
$this->tasks = [];
foreach($this->workers as $worker){ foreach($this->workers as $worker){
$worker->quit(); $worker->quit();
} }
$this->workers = []; $this->workers = [];
$this->workerUsage = []; $this->taskQueues = [];
} }
} }

View File

@ -59,8 +59,8 @@ abstract class AsyncTask extends Collectable{
private $result = null; private $result = null;
private $serialized = false; private $serialized = false;
private $cancelRun = false; private $cancelRun = false;
/** @var int|null */ /** @var bool */
private $taskId = null; private $submitted = false;
private $crashed = false; private $crashed = false;
@ -114,15 +114,15 @@ abstract class AsyncTask extends Collectable{
$this->serialized = $serialize; $this->serialized = $serialize;
} }
public function setTaskId(int $taskId) : void{ public function setSubmitted() : void{
$this->taskId = $taskId; $this->submitted = true;
} }
/** /**
* @return int|null * @return bool
*/ */
public function getTaskId() : ?int{ public function isSubmitted() : bool{
return $this->taskId; return $this->submitted;
} }
/** /**