* @copyright Copyright (c) 2018, Branko Kokanovic * * @author Branko Kokanovic * * @license GNU AGPL version 3 or any later version * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * */ namespace OCA\FaceRecognition\BackgroundJob\Tasks; use OCP\IUser; use OCA\FaceRecognition\BackgroundJob\FaceRecognitionBackgroundTask; use OCA\FaceRecognition\BackgroundJob\FaceRecognitionContext; use OCA\FaceRecognition\Db\FaceMapper; use OCA\FaceRecognition\Db\ImageMapper; use OCA\FaceRecognition\Db\PersonMapper; use OCA\FaceRecognition\Helper\Euclidean; use OCA\FaceRecognition\Helper\Requirements; use OCA\FaceRecognition\Clusterer\ChineseWhispers; use OCA\FaceRecognition\Service\SettingsService; /** * Taks that, for each user, creates person clusters for each. */ class CreateClustersTask extends FaceRecognitionBackgroundTask { /** @var PersonMapper Person mapper*/ private $personMapper; /** @var ImageMapper Image mapper*/ private $imageMapper; /** @var FaceMapper Face mapper*/ private $faceMapper; /** @var SettingsService Settings service*/ private $settingsService; /** * @param PersonMapper $personMapper * @param ImageMapper $imageMapper * @param FaceMapper $faceMapper * @param SettingsService $settingsService */ public function __construct(PersonMapper $personMapper, ImageMapper $imageMapper, FaceMapper $faceMapper, SettingsService $settingsService) { parent::__construct(); $this->personMapper = $personMapper; $this->imageMapper = $imageMapper; $this->faceMapper = $faceMapper; $this->settingsService = $settingsService; } /** * @inheritdoc */ public function description() { return "Create new persons or update existing persons"; } /** * @inheritdoc */ public function execute(FaceRecognitionContext $context) { $this->setContext($context); $eligable_users = $this->context->getEligibleUsers(); foreach($eligable_users as $user) { $this->createClusterIfNeeded($user); yield; } return true; } /** * @return void */ private function createClusterIfNeeded(string $userId) { $modelId = $this->settingsService->getCurrentFaceModel(); // Depending on whether we already have clusters, decide if we should create/recreate them. // $hasPersons = $this->personMapper->countPersons($userId, $modelId) > 0; if ($hasPersons) { $forceRecreate = $this->needRecreateBySettings($userId); $haveEnoughFaces = $this->hasNewFacesToRecreate($userId, $modelId); $haveStaled = $this->hasStalePersonsToRecreate($userId, $modelId); if ($forceRecreate) { $this->logInfo('Clusters already exist, but there was some change that requires recreating the clusters'); } else if ($haveEnoughFaces || $haveStaled) { $this->logInfo('Face clustering will be recreated with new information or changes'); } else { // If there is no invalid persons, and there is no recent new faces, no need to recreate cluster $this->logInfo('Clusters already exist, estimated there is no need to recreate them'); return; } } else { // User should not be able to use this directly, used in tests $forceTestCreation = $this->settingsService->_getForceCreateClusters($userId); $needCreate = $this->needCreateFirstTime($userId, $modelId); if ($forceTestCreation) { $this->logInfo('Force the creation of clusters for testing'); } else if ($needCreate) { $this->logInfo('Face clustering will be created for the first time.'); } else { $this->logInfo( 'Skipping cluster creation, not enough data (yet) collected. ' . 'For cluster creation, you need either one of the following:'); $this->logInfo('* have 1000 faces already processed'); $this->logInfo('* or you need to have 95% of you images processed'); $this->logInfo('Use stats command to track progress'); return; } } // Ok. If we are here, the clusters must be recreated. // $min_face_size = $this->settingsService->getMinimumFaceSize(); $min_confidence = $this->settingsService->getMinimumConfidence(); $faces = array_merge( $this->faceMapper->getGroupableFaces($userId, $modelId, $min_face_size, $min_confidence), $this->faceMapper->getNonGroupableFaces($userId, $modelId, $min_face_size, $min_confidence) ); $facesCount = count($faces); $this->logInfo('There are ' . $facesCount . ' faces for clustering'); $noSlices = 1; $sliceSize = $facesCount; $defaultSlice = $this->settingsService->getClusterigBatchSize(); if ($defaultSlice > 0) { // The minimum batch size is 20000 faces $defaultSlice = max($defaultSlice, 2000); // The maximun batch size is the faces count. $defaultSlice = min($defaultSlice, $facesCount); $noSlices = intval($facesCount / $defaultSlice) + 1; $sliceSize = ceil($facesCount / $noSlices); } $this->logDebug('We will cluster with ' . $noSlices . ' batch(es) of ' . $sliceSize . ' faces'); $newClusters = []; for ($i = 0; $i < $noSlices ; $i++) { $facesSliced = array_slice($faces, $i * $sliceSize, $sliceSize); $newClusters = array_merge($newClusters, $this->getNewClusters($facesSliced)); } // Cluster is associative array where key is person ID. // Value is array of face IDs. For old clusters, person IDs are some existing person IDs, // and for new clusters is whatever chinese whispers decides to identify them. // $currentClusters = $this->getCurrentClusters($faces); $this->logInfo(count($newClusters) . ' clusters found after clustering'); // New merge $mergedClusters = $this->mergeClusters($currentClusters, $newClusters); $this->personMapper->mergeClusterToDatabase($userId, $currentClusters, $mergedClusters); // Remove all orphaned persons (those without any faces) // NOTE: we will do this for all models, not just for current one, but this is not problem. $orphansDeleted = $this->personMapper->deleteOrphaned($userId); if ($orphansDeleted > 0) { $this->logInfo('Deleted ' . $orphansDeleted . ' persons without faces'); } // Prevents not create/recreate the clusters unnecessarily. $this->settingsService->setNeedRecreateClusters(false, $userId); $this->settingsService->_setForceCreateClusters(false, $userId); } /** * Evaluate whether we want to recreate clusters. We want to recreate clusters/persons if: * - Some cluster/person is invalidated (is_valid is false for someone) * - This means some image that belonged to this user is changed, deleted etc. * - There are some new faces. Now, we don't want to jump the gun here. We want to either have: * - more than 25 new faces, or * - less than 25 new faces, but they are older than 2h * * (basically, we want to avoid recreating cluster for each new face being uploaded, * however, we don't want to wait too much as clusters could be changed a lot) */ private function hasNewFacesToRecreate(string $userId, int $modelId): bool { // $facesWithoutPersons = $this->faceMapper->countFaces($userId, $modelId, true); $this->logDebug(sprintf('Found %d faces without associated persons for user %s and model %d', $facesWithoutPersons, $userId, $modelId)); // todo: get rid of magic numbers (move to config) if ($facesWithoutPersons === 0) return false; if ($facesWithoutPersons >= 25) return true; // We have some faces, but not that many, let's see when oldest one is generated. $oldestFace = $this->faceMapper->getOldestCreatedFaceWithoutPerson($userId, $modelId); $oldestFaceTimestamp = $oldestFace->creationTime->getTimestamp(); $currentTimestamp = (new \DateTime())->getTimestamp(); $this->logDebug(sprintf('Oldest face without persons for user %s and model %d is from %s', $userId, $modelId, $oldestFace->creationTime->format('Y-m-d H:i:s'))); // todo: get rid of magic numbers (move to config) if ($currentTimestamp - $oldestFaceTimestamp > 2 * 60 * 60) return true; return false; } private function hasStalePersonsToRecreate(string $userId, int $modelId): bool { return $this->personMapper->countClusters($userId, $modelId, true) > 0; } private function needRecreateBySettings(string $userId): bool { return $this->settingsService->getNeedRecreateClusters($userId); } private function needCreateFirstTime(string $userId, int $modelId): bool { // User should not be able to use this directly, used in tests if ($this->settingsService->_getForceCreateClusters($userId)) return true; $imageCount = $this->imageMapper->countUserImages($userId, $modelId); if ($imageCount === 0) return false; $imageProcessed = $this->imageMapper->countUserImages($userId, $modelId, true); if ($imageProcessed === 0) return false; // These are basic criteria without which we should not even consider creating clusters. // These clusters will be small and not "stable" enough and we should better wait for more images to come. // todo: get rid of magic numbers (move to config) $facesCount = $this->faceMapper->countFaces($userId, $modelId); if ($facesCount > 1000) return true; $percentImagesProcessed = $imageProcessed / floatval($imageCount); if ($percentImagesProcessed > 0.95) return true; return false; } private function getCurrentClusters(array $faces): array { $chineseClusters = array(); foreach($faces as $face) { if ($face->person !== null) { if (!isset($chineseClusters[$face->person])) { $chineseClusters[$face->person] = array(); } $chineseClusters[$face->person][] = $face->id; } } return $chineseClusters; } private function getNewClusters(array $faces): array { // Clustering parameters $sensitivity = $this->settingsService->getSensitivity(); if (Requirements::pdlibLoaded()) { // Create edges (neighbors) for Chinese Whispers $edges = array(); $faces_count = count($faces); for ($i = 0; $i < $faces_count; $i++) { $face1 = $faces[$i]; if (!isset($face1->descriptor)) { $edges[] = array($i, $i); continue; } for ($j = $i; $j < $faces_count; $j++) { $face2 = $faces[$j]; if (!isset($face2->descriptor)) { continue; } $distance = dlib_vector_length($face1->descriptor, $face2->descriptor); if ($distance < $sensitivity) { $edges[] = array($i, $j); } } } // Given the edges get the list of labels (found clusters) for each face. $newChineseClustersByIndex = dlib_chinese_whispers($edges); } else { // Create edges (neighbors) for Chinese Whispers $edges = array(); $faces_count = count($faces); for ($i = 0; $i < $faces_count; $i++) { $face1 = $faces[$i]; if (!isset($face1->descriptor)) { $edges[] = array($i, $i); continue; } for ($j = $i; $j < $faces_count; $j++) { $face2 = $faces[$j]; if (!isset($face2->descriptor)) { continue; } $distance = Euclidean::distance($face1->descriptor, $face2->descriptor); if ($distance < $sensitivity) { $edges[] = array($i, $j); } } } // The clustering algorithm actually expects ordered lists. $oedges = []; ChineseWhispers::convert_unordered_to_ordered($edges, $oedges); usort($oedges, function($a, $b) { if ($a[0] === $b[0]) return $a[1] - $b[1]; return $a[0] - $b[0]; }); // Given the edges get the list of labels (found clusters) for each face. $newChineseClustersByIndex = []; ChineseWhispers::predict($oedges, $newChineseClustersByIndex); } $newClusters = array(); for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) { if (!isset($newClusters[$newChineseClustersByIndex[$i]])) { $newClusters[$newChineseClustersByIndex[$i]] = array(); } $newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]->id; } return $newClusters; } /** * todo: only reason this is public is because of tests. Go figure it out better. */ public function mergeClusters(array $oldCluster, array $newCluster): array { // Create map of face transitions $transitions = array(); foreach ($newCluster as $newPerson=>$newFaces) { foreach ($newFaces as $newFace) { $oldPersonFound = null; foreach ($oldCluster as $oldPerson => $oldFaces) { if (in_array($newFace, $oldFaces)) { $oldPersonFound = $oldPerson; break; } } $transitions[$newFace] = array($oldPersonFound, $newPerson); } } // Count transitions $transitionCount = array(); foreach ($transitions as $transition) { $key = $transition[0] . ':' . $transition[1]; if (array_key_exists($key, $transitionCount)) { $transitionCount[$key]++; } else { $transitionCount[$key] = 1; } } // Create map of new person -> old person transitions $newOldPersonMapping = array(); $oldPersonProcessed = array(); // store this, so we don't waste cycles for in_array() arsort($transitionCount); foreach ($transitionCount as $transitionKey => $count) { $transition = explode(":", $transitionKey); $oldPerson = intval($transition[0]); $newPerson = intval($transition[1]); if (!array_key_exists($newPerson, $newOldPersonMapping)) { if (($oldPerson === 0) || (!array_key_exists($oldPerson, $oldPersonProcessed))) { $newOldPersonMapping[$newPerson] = $oldPerson; $oldPersonProcessed[$oldPerson] = 0; } else { $newOldPersonMapping[$newPerson] = 0; } } } // Starting with new cluster, convert all new person IDs with old person IDs $maxOldPersonId = 1; if (count($oldCluster) > 0) { $maxOldPersonId = (int) max(array_keys($oldCluster)) + 1; } $result = array(); foreach ($newCluster as $newPerson => $newFaces) { $oldPerson = $newOldPersonMapping[$newPerson]; if ($oldPerson === 0) { $result[$maxOldPersonId] = $newFaces; $maxOldPersonId++; } else { $result[$oldPerson] = $newFaces; } } return $result; } }