nextcloud-custom-apps-face-.../facerecognition/lib/BackgroundJob/Tasks/CreateClustersTask.php
2024-09-03 09:12:12 +05:00

430 lines
14 KiB
PHP

<?php
/**
* @copyright Copyright (c) 2017-2023 Matias De lellis <mati86dl@gmail.com>
* @copyright Copyright (c) 2018, Branko Kokanovic <branko@kokanovic.org>
*
* @author Branko Kokanovic <branko@kokanovic.org>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
namespace OCA\FaceRecognition\BackgroundJob\Tasks;
use OCP\IUser;
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionBackgroundTask;
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionContext;
use OCA\FaceRecognition\Db\FaceMapper;
use OCA\FaceRecognition\Db\ImageMapper;
use OCA\FaceRecognition\Db\PersonMapper;
use OCA\FaceRecognition\Helper\Euclidean;
use OCA\FaceRecognition\Helper\Requirements;
use OCA\FaceRecognition\Clusterer\ChineseWhispers;
use OCA\FaceRecognition\Service\SettingsService;
/**
* Taks that, for each user, creates person clusters for each.
*/
class CreateClustersTask extends FaceRecognitionBackgroundTask {
/** @var PersonMapper Person mapper*/
private $personMapper;
/** @var ImageMapper Image mapper*/
private $imageMapper;
/** @var FaceMapper Face mapper*/
private $faceMapper;
/** @var SettingsService Settings service*/
private $settingsService;
/**
* @param PersonMapper $personMapper
* @param ImageMapper $imageMapper
* @param FaceMapper $faceMapper
* @param SettingsService $settingsService
*/
public function __construct(PersonMapper $personMapper,
ImageMapper $imageMapper,
FaceMapper $faceMapper,
SettingsService $settingsService)
{
parent::__construct();
$this->personMapper = $personMapper;
$this->imageMapper = $imageMapper;
$this->faceMapper = $faceMapper;
$this->settingsService = $settingsService;
}
/**
* @inheritdoc
*/
public function description() {
return "Create new persons or update existing persons";
}
/**
* @inheritdoc
*/
public function execute(FaceRecognitionContext $context) {
$this->setContext($context);
$eligable_users = $this->context->getEligibleUsers();
foreach($eligable_users as $user) {
$this->createClusterIfNeeded($user);
yield;
}
return true;
}
/**
* @return void
*/
private function createClusterIfNeeded(string $userId) {
$modelId = $this->settingsService->getCurrentFaceModel();
// Depending on whether we already have clusters, decide if we should create/recreate them.
//
$hasPersons = $this->personMapper->countPersons($userId, $modelId) > 0;
if ($hasPersons) {
$forceRecreate = $this->needRecreateBySettings($userId);
$haveEnoughFaces = $this->hasNewFacesToRecreate($userId, $modelId);
$haveStaled = $this->hasStalePersonsToRecreate($userId, $modelId);
if ($forceRecreate) {
$this->logInfo('Clusters already exist, but there was some change that requires recreating the clusters');
}
else if ($haveEnoughFaces || $haveStaled) {
$this->logInfo('Face clustering will be recreated with new information or changes');
}
else {
// If there is no invalid persons, and there is no recent new faces, no need to recreate cluster
$this->logInfo('Clusters already exist, estimated there is no need to recreate them');
return;
}
}
else {
// User should not be able to use this directly, used in tests
$forceTestCreation = $this->settingsService->_getForceCreateClusters($userId);
$needCreate = $this->needCreateFirstTime($userId, $modelId);
if ($forceTestCreation) {
$this->logInfo('Force the creation of clusters for testing');
}
else if ($needCreate) {
$this->logInfo('Face clustering will be created for the first time.');
}
else {
$this->logInfo(
'Skipping cluster creation, not enough data (yet) collected. ' .
'For cluster creation, you need either one of the following:');
$this->logInfo('* have 1000 faces already processed');
$this->logInfo('* or you need to have 95% of you images processed');
$this->logInfo('Use stats command to track progress');
return;
}
}
// Ok. If we are here, the clusters must be recreated.
//
$min_face_size = $this->settingsService->getMinimumFaceSize();
$min_confidence = $this->settingsService->getMinimumConfidence();
$faces = array_merge(
$this->faceMapper->getGroupableFaces($userId, $modelId, $min_face_size, $min_confidence),
$this->faceMapper->getNonGroupableFaces($userId, $modelId, $min_face_size, $min_confidence)
);
$facesCount = count($faces);
$this->logInfo('There are ' . $facesCount . ' faces for clustering');
$noSlices = 1;
$sliceSize = $facesCount;
$defaultSlice = $this->settingsService->getClusterigBatchSize();
if ($defaultSlice > 0) {
// The minimum batch size is 20000 faces
$defaultSlice = max($defaultSlice, 2000);
// The maximun batch size is the faces count.
$defaultSlice = min($defaultSlice, $facesCount);
$noSlices = intval($facesCount / $defaultSlice) + 1;
$sliceSize = ceil($facesCount / $noSlices);
}
$this->logDebug('We will cluster with ' . $noSlices . ' batch(es) of ' . $sliceSize . ' faces');
$newClusters = [];
for ($i = 0; $i < $noSlices ; $i++) {
$facesSliced = array_slice($faces, $i * $sliceSize, $sliceSize);
$newClusters = array_merge($newClusters, $this->getNewClusters($facesSliced));
}
// Cluster is associative array where key is person ID.
// Value is array of face IDs. For old clusters, person IDs are some existing person IDs,
// and for new clusters is whatever chinese whispers decides to identify them.
//
$currentClusters = $this->getCurrentClusters($faces);
$this->logInfo(count($newClusters) . ' clusters found after clustering');
// New merge
$mergedClusters = $this->mergeClusters($currentClusters, $newClusters);
$this->personMapper->mergeClusterToDatabase($userId, $currentClusters, $mergedClusters);
// Remove all orphaned persons (those without any faces)
// NOTE: we will do this for all models, not just for current one, but this is not problem.
$orphansDeleted = $this->personMapper->deleteOrphaned($userId);
if ($orphansDeleted > 0) {
$this->logInfo('Deleted ' . $orphansDeleted . ' persons without faces');
}
// Prevents not create/recreate the clusters unnecessarily.
$this->settingsService->setNeedRecreateClusters(false, $userId);
$this->settingsService->_setForceCreateClusters(false, $userId);
}
/**
* Evaluate whether we want to recreate clusters. We want to recreate clusters/persons if:
* - Some cluster/person is invalidated (is_valid is false for someone)
* - This means some image that belonged to this user is changed, deleted etc.
* - There are some new faces. Now, we don't want to jump the gun here. We want to either have:
* - more than 25 new faces, or
* - less than 25 new faces, but they are older than 2h
*
* (basically, we want to avoid recreating cluster for each new face being uploaded,
* however, we don't want to wait too much as clusters could be changed a lot)
*/
private function hasNewFacesToRecreate(string $userId, int $modelId): bool {
//
$facesWithoutPersons = $this->faceMapper->countFaces($userId, $modelId, true);
$this->logDebug(sprintf('Found %d faces without associated persons for user %s and model %d',
$facesWithoutPersons, $userId, $modelId));
// todo: get rid of magic numbers (move to config)
if ($facesWithoutPersons === 0)
return false;
if ($facesWithoutPersons >= 25)
return true;
// We have some faces, but not that many, let's see when oldest one is generated.
$oldestFace = $this->faceMapper->getOldestCreatedFaceWithoutPerson($userId, $modelId);
$oldestFaceTimestamp = $oldestFace->creationTime->getTimestamp();
$currentTimestamp = (new \DateTime())->getTimestamp();
$this->logDebug(sprintf('Oldest face without persons for user %s and model %d is from %s',
$userId, $modelId, $oldestFace->creationTime->format('Y-m-d H:i:s')));
// todo: get rid of magic numbers (move to config)
if ($currentTimestamp - $oldestFaceTimestamp > 2 * 60 * 60)
return true;
return false;
}
private function hasStalePersonsToRecreate(string $userId, int $modelId): bool {
return $this->personMapper->countClusters($userId, $modelId, true) > 0;
}
private function needRecreateBySettings(string $userId): bool {
return $this->settingsService->getNeedRecreateClusters($userId);
}
private function needCreateFirstTime(string $userId, int $modelId): bool {
// User should not be able to use this directly, used in tests
if ($this->settingsService->_getForceCreateClusters($userId))
return true;
$imageCount = $this->imageMapper->countUserImages($userId, $modelId);
if ($imageCount === 0)
return false;
$imageProcessed = $this->imageMapper->countUserImages($userId, $modelId, true);
if ($imageProcessed === 0)
return false;
// These are basic criteria without which we should not even consider creating clusters.
// These clusters will be small and not "stable" enough and we should better wait for more images to come.
// todo: get rid of magic numbers (move to config)
$facesCount = $this->faceMapper->countFaces($userId, $modelId);
if ($facesCount > 1000)
return true;
$percentImagesProcessed = $imageProcessed / floatval($imageCount);
if ($percentImagesProcessed > 0.95)
return true;
return false;
}
private function getCurrentClusters(array $faces): array {
$chineseClusters = array();
foreach($faces as $face) {
if ($face->person !== null) {
if (!isset($chineseClusters[$face->person])) {
$chineseClusters[$face->person] = array();
}
$chineseClusters[$face->person][] = $face->id;
}
}
return $chineseClusters;
}
private function getNewClusters(array $faces): array {
// Clustering parameters
$sensitivity = $this->settingsService->getSensitivity();
if (Requirements::pdlibLoaded()) {
// Create edges (neighbors) for Chinese Whispers
$edges = array();
$faces_count = count($faces);
for ($i = 0; $i < $faces_count; $i++) {
$face1 = $faces[$i];
if (!isset($face1->descriptor)) {
$edges[] = array($i, $i);
continue;
}
for ($j = $i; $j < $faces_count; $j++) {
$face2 = $faces[$j];
if (!isset($face2->descriptor)) {
continue;
}
$distance = dlib_vector_length($face1->descriptor, $face2->descriptor);
if ($distance < $sensitivity) {
$edges[] = array($i, $j);
}
}
}
// Given the edges get the list of labels (found clusters) for each face.
$newChineseClustersByIndex = dlib_chinese_whispers($edges);
} else {
// Create edges (neighbors) for Chinese Whispers
$edges = array();
$faces_count = count($faces);
for ($i = 0; $i < $faces_count; $i++) {
$face1 = $faces[$i];
if (!isset($face1->descriptor)) {
$edges[] = array($i, $i);
continue;
}
for ($j = $i; $j < $faces_count; $j++) {
$face2 = $faces[$j];
if (!isset($face2->descriptor)) {
continue;
}
$distance = Euclidean::distance($face1->descriptor, $face2->descriptor);
if ($distance < $sensitivity) {
$edges[] = array($i, $j);
}
}
}
// The clustering algorithm actually expects ordered lists.
$oedges = [];
ChineseWhispers::convert_unordered_to_ordered($edges, $oedges);
usort($oedges, function($a, $b) {
if ($a[0] === $b[0]) return $a[1] - $b[1];
return $a[0] - $b[0];
});
// Given the edges get the list of labels (found clusters) for each face.
$newChineseClustersByIndex = [];
ChineseWhispers::predict($oedges, $newChineseClustersByIndex);
}
$newClusters = array();
for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) {
if (!isset($newClusters[$newChineseClustersByIndex[$i]])) {
$newClusters[$newChineseClustersByIndex[$i]] = array();
}
$newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]->id;
}
return $newClusters;
}
/**
* todo: only reason this is public is because of tests. Go figure it out better.
*/
public function mergeClusters(array $oldCluster, array $newCluster): array {
// Create map of face transitions
$transitions = array();
foreach ($newCluster as $newPerson=>$newFaces) {
foreach ($newFaces as $newFace) {
$oldPersonFound = null;
foreach ($oldCluster as $oldPerson => $oldFaces) {
if (in_array($newFace, $oldFaces)) {
$oldPersonFound = $oldPerson;
break;
}
}
$transitions[$newFace] = array($oldPersonFound, $newPerson);
}
}
// Count transitions
$transitionCount = array();
foreach ($transitions as $transition) {
$key = $transition[0] . ':' . $transition[1];
if (array_key_exists($key, $transitionCount)) {
$transitionCount[$key]++;
} else {
$transitionCount[$key] = 1;
}
}
// Create map of new person -> old person transitions
$newOldPersonMapping = array();
$oldPersonProcessed = array(); // store this, so we don't waste cycles for in_array()
arsort($transitionCount);
foreach ($transitionCount as $transitionKey => $count) {
$transition = explode(":", $transitionKey);
$oldPerson = intval($transition[0]);
$newPerson = intval($transition[1]);
if (!array_key_exists($newPerson, $newOldPersonMapping)) {
if (($oldPerson === 0) || (!array_key_exists($oldPerson, $oldPersonProcessed))) {
$newOldPersonMapping[$newPerson] = $oldPerson;
$oldPersonProcessed[$oldPerson] = 0;
} else {
$newOldPersonMapping[$newPerson] = 0;
}
}
}
// Starting with new cluster, convert all new person IDs with old person IDs
$maxOldPersonId = 1;
if (count($oldCluster) > 0) {
$maxOldPersonId = (int) max(array_keys($oldCluster)) + 1;
}
$result = array();
foreach ($newCluster as $newPerson => $newFaces) {
$oldPerson = $newOldPersonMapping[$newPerson];
if ($oldPerson === 0) {
$result[$maxOldPersonId] = $newFaces;
$maxOldPersonId++;
} else {
$result[$oldPerson] = $newFaces;
}
}
return $result;
}
}