From aab7e39ab39d9f5be9c25c872f8f7af51772c770 Mon Sep 17 00:00:00 2001 From: laurenspriem Date: Sat, 22 Jun 2024 12:53:35 +0530 Subject: [PATCH] [mob][photos] Cleanup feedback --- mobile/lib/face/db.dart | 12 - .../face_ml/feedback/cluster_feedback.dart | 322 ++++++++---------- .../ui/viewer/file_details/face_widget.dart | 2 +- 3 files changed, 148 insertions(+), 188 deletions(-) diff --git a/mobile/lib/face/db.dart b/mobile/lib/face/db.dart index 7a28de5f0a..f05cf5c620 100644 --- a/mobile/lib/face/db.dart +++ b/mobile/lib/face/db.dart @@ -1080,16 +1080,4 @@ class FaceMLDataDB { } await forceUpdateClusterIds(faceIDToClusterID); } - - Future addFacesToCluster( - List faceIDs, - int clusterID, - ) async { - final faceIDToClusterID = {}; - for (final faceID in faceIDs) { - faceIDToClusterID[faceID] = clusterID; - } - - await forceUpdateClusterIds(faceIDToClusterID); - } } diff --git a/mobile/lib/services/machine_learning/face_ml/feedback/cluster_feedback.dart b/mobile/lib/services/machine_learning/face_ml/feedback/cluster_feedback.dart index 6ca2c33dc9..3e7621c1c8 100644 --- a/mobile/lib/services/machine_learning/face_ml/feedback/cluster_feedback.dart +++ b/mobile/lib/services/machine_learning/face_ml/feedback/cluster_feedback.dart @@ -244,8 +244,12 @@ class ClusterFeedbackService { } } - Future addFilesToCluster(List faceIDs, int clusterID) async { - await FaceMLDataDB.instance.addFacesToCluster(faceIDs, clusterID); + Future addFacesToCluster(List faceIDs, int clusterID) async { + final faceIDToClusterID = {}; + for (final faceID in faceIDs) { + faceIDToClusterID[faceID] = clusterID; + } + await FaceMLDataDB.instance.forceUpdateClusterIds(faceIDToClusterID); Bus.instance.fire(PeopleChangedEvent()); return; } @@ -411,7 +415,6 @@ class ClusterFeedbackService { return susClusters; } - // TODO: iterate over this method to find sweet spot Future breakUpCluster( int clusterID, { bool useDbscan = false, @@ -468,178 +471,6 @@ class ClusterFeedbackService { return clusterResult; } - /// WARNING: this method is purely for debugging purposes, never use in production - Future createFakeClustersByBlurValue() async { - try { - // Delete old clusters - await FaceMLDataDB.instance.dropClustersAndPersonTable(); - final List persons = - await PersonService.instance.getPersons(); - for (final PersonEntity p in persons) { - await PersonService.instance.deletePerson(p.remoteID); - } - - // Create new fake clusters based on blur value. One for values between 0 and 10, one for 10-20, etc till 200 - final int startClusterID = DateTime.now().microsecondsSinceEpoch; - final faceIDsToBlurValues = - await FaceMLDataDB.instance.getFaceIDsToBlurValues(200); - final faceIdToCluster = {}; - for (final entry in faceIDsToBlurValues.entries) { - final faceID = entry.key; - final blurValue = entry.value; - final newClusterID = startClusterID + blurValue ~/ 10; - faceIdToCluster[faceID] = newClusterID; - } - await FaceMLDataDB.instance.updateFaceIdToClusterId(faceIdToCluster); - - Bus.instance.fire(PeopleChangedEvent()); - } catch (e, s) { - _logger.severe("Error in createFakeClustersByBlurValue", e, s); - rethrow; - } - } - - Future debugLogClusterBlurValues( - int clusterID, { - int? clusterSize, - bool logClusterSummary = false, - bool logBlurValues = false, - }) async { - if (!kDebugMode) return; - - // Logging the clusterID - _logger.info( - "Debug logging for cluster $clusterID${clusterSize != null ? ' with $clusterSize photos' : ''}", - ); - const int biggestClusterID = 1715061228725148; - - // Logging the cluster summary for the cluster - if (logClusterSummary) { - final summaryMap = await FaceMLDataDB.instance.getClusterToClusterSummary( - [clusterID, biggestClusterID], - ); - final summary = summaryMap[clusterID]; - if (summary != null) { - _logger.info( - "Cluster summary for cluster $clusterID says the amount of faces is: ${summary.$2}", - ); - } - - final biggestClusterSummary = summaryMap[biggestClusterID]; - final clusterSummary = summaryMap[clusterID]; - if (biggestClusterSummary != null && clusterSummary != null) { - _logger.info( - "Cluster summary for biggest cluster $biggestClusterID says the size is: ${biggestClusterSummary.$2}", - ); - _logger.info( - "Cluster summary for current cluster $clusterID says the size is: ${clusterSummary.$2}", - ); - - // Mean distance - final biggestMean = Vector.fromList( - EVector.fromBuffer(biggestClusterSummary.$1).values, - dtype: DType.float32, - ); - final currentMean = Vector.fromList( - EVector.fromBuffer(clusterSummary.$1).values, - dtype: DType.float32, - ); - final bigClustersMeanDistance = 1 - biggestMean.dot(currentMean); - _logger.info( - "Mean distance between biggest cluster and current cluster: $bigClustersMeanDistance", - ); - _logger.info( - 'Element differences between the two means are ${biggestMean - currentMean}', - ); - final currentL2Norm = currentMean.norm(); - _logger.info( - 'L2 norm of current mean: $currentL2Norm', - ); - final trueDistance = - biggestMean.distanceTo(currentMean, distance: Distance.cosine); - _logger.info('True distance between the two means: $trueDistance'); - - // Median distance - const sampleSize = 100; - final Iterable biggestEmbeddings = await FaceMLDataDB - .instance - .getFaceEmbeddingsForCluster(biggestClusterID); - final List biggestSampledEmbeddingsProto = - _randomSampleWithoutReplacement( - biggestEmbeddings, - sampleSize, - ); - final List biggestSampledEmbeddings = - biggestSampledEmbeddingsProto - .map( - (embedding) => Vector.fromList( - EVector.fromBuffer(embedding).values, - dtype: DType.float32, - ), - ) - .toList(growable: false); - - final Iterable currentEmbeddings = - await FaceMLDataDB.instance.getFaceEmbeddingsForCluster(clusterID); - final List currentSampledEmbeddingsProto = - _randomSampleWithoutReplacement( - currentEmbeddings, - sampleSize, - ); - final List currentSampledEmbeddings = - currentSampledEmbeddingsProto - .map( - (embedding) => Vector.fromList( - EVector.fromBuffer(embedding).values, - dtype: DType.float32, - ), - ) - .toList(growable: false); - - // Calculate distances and find the median - final List distances = []; - final List trueDistances = []; - for (final biggestEmbedding in biggestSampledEmbeddings) { - for (final currentEmbedding in currentSampledEmbeddings) { - distances.add(1 - biggestEmbedding.dot(currentEmbedding)); - trueDistances.add( - biggestEmbedding.distanceTo( - currentEmbedding, - distance: Distance.cosine, - ), - ); - } - } - distances.sort(); - trueDistances.sort(); - final double medianDistance = distances[distances.length ~/ 2]; - final double trueMedianDistance = - trueDistances[trueDistances.length ~/ 2]; - _logger.info( - "Median distance between biggest cluster and current cluster: $medianDistance (using sample of $sampleSize)", - ); - _logger.info( - 'True distance median between the two embeddings: $trueMedianDistance', - ); - } - } - - // Logging the blur values for the cluster - if (logBlurValues) { - final List blurValues = await FaceMLDataDB.instance - .getBlurValuesForCluster(clusterID) - .then((value) => value.toList()); - final blurValuesIntegers = - blurValues.map((value) => value.round()).toList(); - blurValuesIntegers.sort(); - _logger.info( - "Blur values for cluster $clusterID${clusterSize != null ? ' with $clusterSize photos' : ''}: $blurValuesIntegers", - ); - } - - return; - } - /// Returns a list of suggestions. For each suggestion we return a record consisting of the following elements: /// 1. clusterID: the ID of the cluster /// 2. distance: the distance between the person's cluster and the suggestion @@ -1126,6 +957,147 @@ class ClusterFeedbackService { "Sorting suggestions based on distance to person took ${endTime.difference(startTime).inMilliseconds} ms for ${suggestions.length} suggestions, of which ${clusterSummaryCallTime.difference(startTime).inMilliseconds} ms was spent on the cluster summary call", ); } + + Future debugLogClusterBlurValues( + int clusterID, { + int? clusterSize, + bool logClusterSummary = false, + bool logBlurValues = false, + }) async { + if (!kDebugMode) return; + + // Logging the clusterID + _logger.info( + "Debug logging for cluster $clusterID${clusterSize != null ? ' with $clusterSize photos' : ''}", + ); + const int biggestClusterID = 1715061228725148; + + // Logging the cluster summary for the cluster + if (logClusterSummary) { + final summaryMap = await FaceMLDataDB.instance.getClusterToClusterSummary( + [clusterID, biggestClusterID], + ); + final summary = summaryMap[clusterID]; + if (summary != null) { + _logger.info( + "Cluster summary for cluster $clusterID says the amount of faces is: ${summary.$2}", + ); + } + + final biggestClusterSummary = summaryMap[biggestClusterID]; + final clusterSummary = summaryMap[clusterID]; + if (biggestClusterSummary != null && clusterSummary != null) { + _logger.info( + "Cluster summary for biggest cluster $biggestClusterID says the size is: ${biggestClusterSummary.$2}", + ); + _logger.info( + "Cluster summary for current cluster $clusterID says the size is: ${clusterSummary.$2}", + ); + + // Mean distance + final biggestMean = Vector.fromList( + EVector.fromBuffer(biggestClusterSummary.$1).values, + dtype: DType.float32, + ); + final currentMean = Vector.fromList( + EVector.fromBuffer(clusterSummary.$1).values, + dtype: DType.float32, + ); + final bigClustersMeanDistance = 1 - biggestMean.dot(currentMean); + _logger.info( + "Mean distance between biggest cluster and current cluster: $bigClustersMeanDistance", + ); + _logger.info( + 'Element differences between the two means are ${biggestMean - currentMean}', + ); + final currentL2Norm = currentMean.norm(); + _logger.info( + 'L2 norm of current mean: $currentL2Norm', + ); + final trueDistance = + biggestMean.distanceTo(currentMean, distance: Distance.cosine); + _logger.info('True distance between the two means: $trueDistance'); + + // Median distance + const sampleSize = 100; + final Iterable biggestEmbeddings = await FaceMLDataDB + .instance + .getFaceEmbeddingsForCluster(biggestClusterID); + final List biggestSampledEmbeddingsProto = + _randomSampleWithoutReplacement( + biggestEmbeddings, + sampleSize, + ); + final List biggestSampledEmbeddings = + biggestSampledEmbeddingsProto + .map( + (embedding) => Vector.fromList( + EVector.fromBuffer(embedding).values, + dtype: DType.float32, + ), + ) + .toList(growable: false); + + final Iterable currentEmbeddings = + await FaceMLDataDB.instance.getFaceEmbeddingsForCluster(clusterID); + final List currentSampledEmbeddingsProto = + _randomSampleWithoutReplacement( + currentEmbeddings, + sampleSize, + ); + final List currentSampledEmbeddings = + currentSampledEmbeddingsProto + .map( + (embedding) => Vector.fromList( + EVector.fromBuffer(embedding).values, + dtype: DType.float32, + ), + ) + .toList(growable: false); + + // Calculate distances and find the median + final List distances = []; + final List trueDistances = []; + for (final biggestEmbedding in biggestSampledEmbeddings) { + for (final currentEmbedding in currentSampledEmbeddings) { + distances.add(1 - biggestEmbedding.dot(currentEmbedding)); + trueDistances.add( + biggestEmbedding.distanceTo( + currentEmbedding, + distance: Distance.cosine, + ), + ); + } + } + distances.sort(); + trueDistances.sort(); + final double medianDistance = distances[distances.length ~/ 2]; + final double trueMedianDistance = + trueDistances[trueDistances.length ~/ 2]; + _logger.info( + "Median distance between biggest cluster and current cluster: $medianDistance (using sample of $sampleSize)", + ); + _logger.info( + 'True distance median between the two embeddings: $trueMedianDistance', + ); + } + } + + // Logging the blur values for the cluster + if (logBlurValues) { + final List blurValues = await FaceMLDataDB.instance + .getBlurValuesForCluster(clusterID) + .then((value) => value.toList()); + final blurValuesIntegers = + blurValues.map((value) => value.round()).toList(); + blurValuesIntegers.sort(); + _logger.info( + "Blur values for cluster $clusterID${clusterSize != null ? ' with $clusterSize photos' : ''}: $blurValuesIntegers", + ); + } + + return; + } } /// Returns a map of person's clusterID to map of closest clusterID to with disstance diff --git a/mobile/lib/ui/viewer/file_details/face_widget.dart b/mobile/lib/ui/viewer/file_details/face_widget.dart index 67d2368d71..48e9a43bb1 100644 --- a/mobile/lib/ui/viewer/file_details/face_widget.dart +++ b/mobile/lib/ui/viewer/file_details/face_widget.dart @@ -279,7 +279,7 @@ class _FaceWidgetState extends State { try { if (isJustRemoved) { await ClusterFeedbackService.instance - .addFilesToCluster([widget.face.faceID], widget.clusterID!); + .addFacesToCluster([widget.face.faceID], widget.clusterID!); } else { await ClusterFeedbackService.instance .removeFilesFromCluster([widget.file], widget.clusterID!);