mirror of
https://github.com/ente-io/ente.git
synced 2025-05-02 12:08:03 +00:00
587 lines
21 KiB
Dart
587 lines
21 KiB
Dart
import "dart:async";
|
|
import "dart:io" show Platform;
|
|
import "dart:math" show min;
|
|
import "dart:typed_data" show Uint8List;
|
|
|
|
import "package:flutter/foundation.dart" show kDebugMode;
|
|
import "package:logging/logging.dart";
|
|
import "package:photos/core/event_bus.dart";
|
|
import "package:photos/db/files_db.dart";
|
|
import "package:photos/db/ml/db.dart";
|
|
import "package:photos/events/machine_learning_control_event.dart";
|
|
import "package:photos/events/people_changed_event.dart";
|
|
import "package:photos/models/ml/face/face.dart";
|
|
import "package:photos/models/ml/ml_versions.dart";
|
|
import "package:photos/service_locator.dart";
|
|
import "package:photos/services/filedata/filedata_service.dart";
|
|
import "package:photos/services/filedata/model/file_data.dart";
|
|
import 'package:photos/services/machine_learning/face_ml/face_clustering/face_clustering_service.dart';
|
|
import "package:photos/services/machine_learning/face_ml/face_clustering/face_db_info_for_clustering.dart";
|
|
import "package:photos/services/machine_learning/face_ml/person/person_service.dart";
|
|
import "package:photos/services/machine_learning/ml_indexing_isolate.dart";
|
|
import 'package:photos/services/machine_learning/ml_result.dart';
|
|
import "package:photos/services/machine_learning/semantic_search/semantic_search_service.dart";
|
|
import "package:photos/utils/ml_util.dart";
|
|
import "package:photos/utils/network_util.dart";
|
|
import "package:photos/utils/ram_check_util.dart";
|
|
|
|
class MLService {
|
|
final _logger = Logger("MLService");
|
|
|
|
// Singleton pattern
|
|
MLService._privateConstructor();
|
|
static final instance = MLService._privateConstructor();
|
|
factory MLService() => instance;
|
|
|
|
bool _isInitialized = false;
|
|
|
|
int? lastRemoteFetch;
|
|
static const int _kRemoteFetchCooldownOnLite = 1000 * 60 * 5;
|
|
|
|
late String client;
|
|
|
|
bool get isInitialized => _isInitialized;
|
|
|
|
bool get showClusteringIsHappening => _clusteringIsHappening;
|
|
|
|
bool debugIndexingDisabled = false;
|
|
bool _clusteringIsHappening = false;
|
|
bool _mlControllerStatus = false;
|
|
bool _isIndexingOrClusteringRunning = false;
|
|
bool _isRunningML = false;
|
|
bool _shouldPauseIndexingAndClustering = false;
|
|
|
|
static const _kForceClusteringFaceCount = 8000;
|
|
late final mlDataDB = MLDataDB.instance;
|
|
|
|
/// Only call this function once at app startup, after that you can directly call [runAllML]
|
|
Future<void> init() async {
|
|
if (_isInitialized) return;
|
|
if (!flagService.hasGrantedMLConsent) {
|
|
return;
|
|
}
|
|
_logger.info("init called");
|
|
|
|
// Check if the device has enough RAM to run local indexing
|
|
await checkDeviceTotalRAM();
|
|
|
|
// Get client name
|
|
final packageInfo = ServiceLocator.instance.packageInfo;
|
|
client = "${packageInfo.packageName}/${packageInfo.version}";
|
|
_logger.info("client: $client");
|
|
|
|
// Listen on MachineLearningController
|
|
Bus.instance.on<MachineLearningControlEvent>().listen((event) {
|
|
if (!flagService.hasGrantedMLConsent) {
|
|
return;
|
|
}
|
|
|
|
_mlControllerStatus = event.shouldRun;
|
|
if (_mlControllerStatus) {
|
|
if (_shouldPauseIndexingAndClustering) {
|
|
_cancelPauseIndexingAndClustering();
|
|
_logger.info(
|
|
"MLController allowed running ML, faces indexing undoing previous pause",
|
|
);
|
|
} else {
|
|
_logger.info(
|
|
"MLController allowed running ML, faces indexing starting",
|
|
);
|
|
}
|
|
unawaited(runAllML());
|
|
} else {
|
|
_logger.info(
|
|
"MLController stopped running ML, faces indexing will be paused (unless it's fetching embeddings)",
|
|
);
|
|
pauseIndexingAndClustering();
|
|
}
|
|
});
|
|
|
|
_isInitialized = true;
|
|
_logger.info('init done');
|
|
}
|
|
|
|
bool canFetch() {
|
|
if (localSettings.isMLLocalIndexingEnabled) return true;
|
|
if (lastRemoteFetch == null) {
|
|
lastRemoteFetch = DateTime.now().millisecondsSinceEpoch;
|
|
return true;
|
|
}
|
|
final intDiff = DateTime.now().millisecondsSinceEpoch - lastRemoteFetch!;
|
|
final bool canFetch = intDiff > _kRemoteFetchCooldownOnLite;
|
|
if (canFetch) {
|
|
lastRemoteFetch = DateTime.now().millisecondsSinceEpoch;
|
|
}
|
|
return canFetch;
|
|
}
|
|
|
|
Future<void> sync() async {
|
|
await FileDataService.instance.syncFDStatus();
|
|
await faceRecognitionService.syncPersonFeedback();
|
|
}
|
|
|
|
Future<void> runAllML({bool force = false}) async {
|
|
try {
|
|
if (force) {
|
|
_mlControllerStatus = true;
|
|
}
|
|
if (!_canRunMLFunction(function: "AllML") && !force) return;
|
|
_isRunningML = true;
|
|
await sync();
|
|
|
|
final int unclusteredFacesCount =
|
|
await mlDataDB.getUnclusteredFaceCount();
|
|
if (unclusteredFacesCount > _kForceClusteringFaceCount) {
|
|
_logger.info(
|
|
"There are $unclusteredFacesCount unclustered faces, doing clustering first",
|
|
);
|
|
await clusterAllImages();
|
|
}
|
|
if (_mlControllerStatus == true) {
|
|
// refresh discover section
|
|
magicCacheService.updateCache(forced: force).ignore();
|
|
}
|
|
if (canFetch()) {
|
|
await fetchAndIndexAllImages();
|
|
}
|
|
if ((await mlDataDB.getUnclusteredFaceCount()) > 0) {
|
|
await clusterAllImages();
|
|
}
|
|
if (_mlControllerStatus == true) {
|
|
// refresh discover section
|
|
magicCacheService.updateCache().ignore();
|
|
}
|
|
} catch (e, s) {
|
|
_logger.severe("runAllML failed", e, s);
|
|
rethrow;
|
|
} finally {
|
|
_isRunningML = false;
|
|
}
|
|
}
|
|
|
|
void triggerML() {
|
|
if (_mlControllerStatus &&
|
|
!_isIndexingOrClusteringRunning &&
|
|
!_isRunningML) {
|
|
unawaited(runAllML());
|
|
}
|
|
}
|
|
|
|
void pauseIndexingAndClustering() {
|
|
if (_isIndexingOrClusteringRunning) {
|
|
_shouldPauseIndexingAndClustering = true;
|
|
MLIndexingIsolate.instance.shouldPauseIndexingAndClustering = true;
|
|
}
|
|
}
|
|
|
|
void _cancelPauseIndexingAndClustering() {
|
|
_shouldPauseIndexingAndClustering = false;
|
|
MLIndexingIsolate.instance.shouldPauseIndexingAndClustering = false;
|
|
}
|
|
|
|
/// Analyzes all the images in the user library with the latest ml version and stores the results in the database.
|
|
///
|
|
/// This function first fetches from remote and checks if the image has already been analyzed
|
|
/// with the lastest faceMlVersion and stored on remote or local database. If so, it skips the image.
|
|
Future<void> fetchAndIndexAllImages() async {
|
|
if (!_canRunMLFunction(function: "Indexing")) return;
|
|
|
|
try {
|
|
_isIndexingOrClusteringRunning = true;
|
|
_logger.info('starting image indexing');
|
|
final Stream<List<FileMLInstruction>> instructionStream =
|
|
fetchEmbeddingsAndInstructions(fileDownloadMlLimit);
|
|
|
|
int fileAnalyzedCount = 0;
|
|
final Stopwatch stopwatch = Stopwatch()..start();
|
|
|
|
stream:
|
|
await for (final chunk in instructionStream) {
|
|
if (!localSettings.isMLLocalIndexingEnabled) {
|
|
await MLIndexingIsolate.instance.cleanupLocalIndexingModels();
|
|
continue;
|
|
} else if (!await canUseHighBandwidth()) {
|
|
_logger.info(
|
|
'stopping indexing because user is not connected to wifi',
|
|
);
|
|
break stream;
|
|
} else {
|
|
await MLIndexingIsolate.instance.ensureDownloadedModels();
|
|
}
|
|
final futures = <Future<bool>>[];
|
|
for (final instruction in chunk) {
|
|
if (_shouldPauseIndexingAndClustering) {
|
|
_logger.info("indexAllImages() was paused, stopping");
|
|
break stream;
|
|
}
|
|
await MLIndexingIsolate.instance.ensureLoadedModels(instruction);
|
|
futures.add(processImage(instruction));
|
|
}
|
|
final awaitedFutures = await Future.wait(futures);
|
|
final sumFutures = awaitedFutures.fold<int>(
|
|
0,
|
|
(previousValue, element) => previousValue + (element ? 1 : 0),
|
|
);
|
|
fileAnalyzedCount += sumFutures;
|
|
}
|
|
if (fileAnalyzedCount > 0) {
|
|
magicCacheService.queueUpdate('fileIndexed');
|
|
}
|
|
_logger.info(
|
|
"`indexAllImages()` finished. Analyzed $fileAnalyzedCount images, in ${stopwatch.elapsed.inSeconds} seconds (avg of ${stopwatch.elapsed.inSeconds / fileAnalyzedCount} seconds per image)",
|
|
);
|
|
_logStatus();
|
|
} catch (e, s) {
|
|
_logger.severe("indexAllImages failed", e, s);
|
|
} finally {
|
|
_isIndexingOrClusteringRunning = false;
|
|
_cancelPauseIndexingAndClustering();
|
|
}
|
|
}
|
|
|
|
Future<void> clusterAllImages({
|
|
bool clusterInBuckets = true,
|
|
bool force = false,
|
|
}) async {
|
|
if (!_canRunMLFunction(function: "Clustering") && !force) return;
|
|
if (_clusteringIsHappening) {
|
|
_logger.info("clusterAllImages() is already running, returning");
|
|
return;
|
|
}
|
|
|
|
_logger.info("`clusterAllImages()` called");
|
|
_isIndexingOrClusteringRunning = true;
|
|
_clusteringIsHappening = true;
|
|
final clusterAllImagesTime = DateTime.now();
|
|
|
|
_logger.info('Pulling remote feedback before actually clustering');
|
|
await PersonService.instance.fetchRemoteClusterFeedback();
|
|
final persons = await PersonService.instance.getPersons();
|
|
final faceIdNotToCluster = <String, List<String>>{};
|
|
for (final person in persons) {
|
|
if (person.data.rejectedFaceIDs.isNotEmpty) {
|
|
final personClusters = person.data.assigned.map((e) => e.id).toList();
|
|
for (final faceID in person.data.rejectedFaceIDs) {
|
|
faceIdNotToCluster[faceID] = personClusters;
|
|
}
|
|
}
|
|
}
|
|
|
|
try {
|
|
// Get a sense of the total number of faces in the database
|
|
final int totalFaces = await mlDataDB.getTotalFaceCount();
|
|
final fileIDToCreationTime =
|
|
await FilesDB.instance.getFileIDToCreationTime();
|
|
final startEmbeddingFetch = DateTime.now();
|
|
// read all embeddings
|
|
final result = await mlDataDB.getFaceInfoForClustering(
|
|
maxFaces: totalFaces,
|
|
);
|
|
final Set<int> missingFileIDs = {};
|
|
final allFaceInfoForClustering = <FaceDbInfoForClustering>[];
|
|
for (final faceInfo in result) {
|
|
if (!fileIDToCreationTime.containsKey(faceInfo.fileID)) {
|
|
missingFileIDs.add(faceInfo.fileID);
|
|
} else {
|
|
if (faceIdNotToCluster.containsKey(faceInfo.faceID)) {
|
|
faceInfo.rejectedClusterIds = faceIdNotToCluster[faceInfo.faceID];
|
|
}
|
|
allFaceInfoForClustering.add(faceInfo);
|
|
}
|
|
}
|
|
// sort the embeddings based on file creation time, newest first
|
|
allFaceInfoForClustering.sort((b, a) {
|
|
return fileIDToCreationTime[a.fileID]!
|
|
.compareTo(fileIDToCreationTime[b.fileID]!);
|
|
});
|
|
_logger.info(
|
|
'Getting and sorting embeddings took ${DateTime.now().difference(startEmbeddingFetch).inMilliseconds} ms for ${allFaceInfoForClustering.length} embeddings'
|
|
'and ${missingFileIDs.length} missing fileIDs',
|
|
);
|
|
|
|
// Get the current cluster statistics
|
|
final Map<String, (Uint8List, int)> oldClusterSummaries =
|
|
await mlDataDB.getAllClusterSummary();
|
|
|
|
if (clusterInBuckets) {
|
|
const int bucketSize = 10000;
|
|
const int offsetIncrement = 7500;
|
|
int offset = 0;
|
|
int bucket = 1;
|
|
|
|
while (true) {
|
|
if (_shouldPauseIndexingAndClustering) {
|
|
_logger.info(
|
|
"MLController does not allow running ML, stopping before clustering bucket $bucket",
|
|
);
|
|
break;
|
|
}
|
|
if (offset > allFaceInfoForClustering.length - 1) {
|
|
_logger.warning(
|
|
'faceIdToEmbeddingBucket is empty, this should ideally not happen as it should have stopped earlier. offset: $offset, totalFaces: $totalFaces',
|
|
);
|
|
break;
|
|
}
|
|
if (offset > totalFaces) {
|
|
_logger.warning(
|
|
'offset > totalFaces, this should ideally not happen. offset: $offset, totalFaces: $totalFaces',
|
|
);
|
|
break;
|
|
}
|
|
|
|
final bucketStartTime = DateTime.now();
|
|
final faceInfoForClustering = allFaceInfoForClustering.sublist(
|
|
offset,
|
|
min(offset + bucketSize, allFaceInfoForClustering.length),
|
|
);
|
|
|
|
if (faceInfoForClustering.every((face) => face.clusterId != null)) {
|
|
_logger.info('Everything in bucket $bucket is already clustered');
|
|
if (offset + bucketSize >= totalFaces) {
|
|
_logger.info('All faces clustered');
|
|
break;
|
|
} else {
|
|
_logger.info('Skipping to next bucket');
|
|
offset += offsetIncrement;
|
|
bucket++;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
final clusteringResult =
|
|
await FaceClusteringService.instance.predictLinearIsolate(
|
|
faceInfoForClustering.toSet(),
|
|
fileIDToCreationTime: fileIDToCreationTime,
|
|
offset: offset,
|
|
oldClusterSummaries: oldClusterSummaries,
|
|
);
|
|
if (clusteringResult == null) {
|
|
_logger.warning("faceIdToCluster is null");
|
|
return;
|
|
}
|
|
|
|
await mlDataDB
|
|
.updateFaceIdToClusterId(clusteringResult.newFaceIdToCluster);
|
|
await mlDataDB
|
|
.clusterSummaryUpdate(clusteringResult.newClusterSummaries);
|
|
Bus.instance.fire(PeopleChangedEvent());
|
|
for (final faceInfo in faceInfoForClustering) {
|
|
faceInfo.clusterId ??=
|
|
clusteringResult.newFaceIdToCluster[faceInfo.faceID];
|
|
}
|
|
for (final clusterUpdate
|
|
in clusteringResult.newClusterSummaries.entries) {
|
|
oldClusterSummaries[clusterUpdate.key] = clusterUpdate.value;
|
|
}
|
|
_logger.info(
|
|
'Done with clustering ${offset + faceInfoForClustering.length} embeddings (${(100 * (offset + faceInfoForClustering.length) / totalFaces).toStringAsFixed(0)}%) in bucket $bucket, offset: $offset, in ${DateTime.now().difference(bucketStartTime).inSeconds} seconds',
|
|
);
|
|
if (offset + bucketSize >= totalFaces) {
|
|
_logger.info('All faces clustered');
|
|
break;
|
|
}
|
|
offset += offsetIncrement;
|
|
bucket++;
|
|
}
|
|
} else {
|
|
final clusterStartTime = DateTime.now();
|
|
// Cluster the embeddings using the linear clustering algorithm, returning a map from faceID to clusterID
|
|
final clusteringResult =
|
|
await FaceClusteringService.instance.predictLinearIsolate(
|
|
allFaceInfoForClustering.toSet(),
|
|
fileIDToCreationTime: fileIDToCreationTime,
|
|
oldClusterSummaries: oldClusterSummaries,
|
|
);
|
|
if (clusteringResult == null) {
|
|
_logger.warning("faceIdToCluster is null");
|
|
return;
|
|
}
|
|
final clusterDoneTime = DateTime.now();
|
|
_logger.info(
|
|
'done with clustering ${allFaceInfoForClustering.length} in ${clusterDoneTime.difference(clusterStartTime).inSeconds} seconds ',
|
|
);
|
|
|
|
// Store the updated clusterIDs in the database
|
|
_logger.info(
|
|
'Updating ${clusteringResult.newFaceIdToCluster.length} FaceIDs with clusterIDs in the DB',
|
|
);
|
|
await mlDataDB
|
|
.updateFaceIdToClusterId(clusteringResult.newFaceIdToCluster);
|
|
await mlDataDB
|
|
.clusterSummaryUpdate(clusteringResult.newClusterSummaries);
|
|
Bus.instance.fire(PeopleChangedEvent());
|
|
_logger.info('Done updating FaceIDs with clusterIDs in the DB, in '
|
|
'${DateTime.now().difference(clusterDoneTime).inSeconds} seconds');
|
|
}
|
|
_logger.info('clusterAllImages() finished, in '
|
|
'${DateTime.now().difference(clusterAllImagesTime).inSeconds} seconds');
|
|
} catch (e, s) {
|
|
_logger.severe("`clusterAllImages` failed", e, s);
|
|
} finally {
|
|
_clusteringIsHappening = false;
|
|
_isIndexingOrClusteringRunning = false;
|
|
_cancelPauseIndexingAndClustering();
|
|
}
|
|
}
|
|
|
|
Future<bool> processImage(FileMLInstruction instruction) async {
|
|
bool actuallyRanML = false;
|
|
|
|
try {
|
|
final String filePath = await getImagePathForML(instruction.file);
|
|
|
|
final MLResult? result = await MLIndexingIsolate.instance.analyzeImage(
|
|
instruction,
|
|
filePath,
|
|
);
|
|
// Check if there's no result simply because MLController paused indexing
|
|
if (result == null) {
|
|
if (!_shouldPauseIndexingAndClustering) {
|
|
_logger.severe(
|
|
"Failed to analyze image with uploadedFileID: ${instruction.file.uploadedFileID}",
|
|
);
|
|
}
|
|
return actuallyRanML;
|
|
}
|
|
// Check anything actually ran
|
|
actuallyRanML = result.ranML;
|
|
if (!actuallyRanML) return actuallyRanML;
|
|
// Prepare storing data on remote
|
|
final FileDataEntity dataEntity = instruction.existingRemoteFileML ??
|
|
FileDataEntity.empty(
|
|
instruction.file.uploadedFileID!,
|
|
DataType.mlData,
|
|
);
|
|
// Faces results
|
|
final List<Face> faces = [];
|
|
if (result.facesRan) {
|
|
if (result.faces!.isEmpty) {
|
|
faces.add(Face.empty(result.fileId));
|
|
}
|
|
if (result.faces!.isNotEmpty) {
|
|
for (int i = 0; i < result.faces!.length; ++i) {
|
|
faces.add(
|
|
Face.fromFaceResult(
|
|
result.faces![i],
|
|
result.fileId,
|
|
result.decodedImageSize,
|
|
),
|
|
);
|
|
}
|
|
}
|
|
dataEntity.putFace(
|
|
RemoteFaceEmbedding(
|
|
faces,
|
|
faceMlVersion,
|
|
client: client,
|
|
height: result.decodedImageSize.height,
|
|
width: result.decodedImageSize.width,
|
|
),
|
|
);
|
|
}
|
|
// Clip results
|
|
if (result.clipRan) {
|
|
dataEntity.putClip(
|
|
RemoteClipEmbedding(
|
|
result.clip!.embedding,
|
|
version: clipMlVersion,
|
|
client: client,
|
|
),
|
|
);
|
|
}
|
|
// Storing results on remote
|
|
await FileDataService.instance.putFileData(
|
|
instruction.file,
|
|
dataEntity,
|
|
);
|
|
_logger.info("ML results for fileID ${result.fileId} stored on remote");
|
|
// Storing results locally
|
|
if (result.facesRan) await mlDataDB.bulkInsertFaces(faces);
|
|
if (result.clipRan) {
|
|
await SemanticSearchService.instance.storeClipImageResult(
|
|
result.clip!,
|
|
);
|
|
}
|
|
_logger.info("ML results for fileID ${result.fileId} stored locally");
|
|
return actuallyRanML;
|
|
} catch (e, s) {
|
|
final String errorString = e.toString();
|
|
final String format = instruction.file.displayName.split('.').last;
|
|
final int? size = instruction.file.fileSize;
|
|
final fileType = instruction.file.fileType;
|
|
final bool acceptedIssue =
|
|
errorString.contains('ThumbnailRetrievalException') ||
|
|
errorString.contains('InvalidImageFormatException') ||
|
|
errorString.contains('FileSizeTooLargeForMobileIndexing');
|
|
if (acceptedIssue) {
|
|
_logger.severe(
|
|
'$errorString for fileID ${instruction.file.uploadedFileID} (format $format, type $fileType, size $size), storing empty results so indexing does not get stuck',
|
|
e,
|
|
s,
|
|
);
|
|
await mlDataDB.bulkInsertFaces(
|
|
[Face.empty(instruction.file.uploadedFileID!, error: true)],
|
|
);
|
|
await SemanticSearchService.instance.storeEmptyClipImageResult(
|
|
instruction.file,
|
|
);
|
|
return true;
|
|
}
|
|
_logger.severe(
|
|
"Failed to index file for fileID ${instruction.file.uploadedFileID} (format $format, type $fileType, size $size). Not storing any results locally, which means it will be automatically retried later.",
|
|
e,
|
|
s,
|
|
);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool _canRunMLFunction({required String function}) {
|
|
if (kDebugMode && Platform.isIOS && !_isIndexingOrClusteringRunning) {
|
|
return true;
|
|
}
|
|
if (_isIndexingOrClusteringRunning) {
|
|
_logger.info(
|
|
"Cannot run $function because indexing or clustering is already running",
|
|
);
|
|
_logStatus();
|
|
return false;
|
|
}
|
|
if (_mlControllerStatus == false) {
|
|
_logger.info(
|
|
"Cannot run $function because MLController does not allow it",
|
|
);
|
|
_logStatus();
|
|
return false;
|
|
}
|
|
if (debugIndexingDisabled) {
|
|
_logger.info(
|
|
"Cannot run $function because debugIndexingDisabled is true",
|
|
);
|
|
_logStatus();
|
|
return false;
|
|
}
|
|
if (_shouldPauseIndexingAndClustering) {
|
|
// This should ideally not be triggered, because one of the above should be triggered instead.
|
|
_logger.warning(
|
|
"Cannot run $function because indexing and clustering is being paused",
|
|
);
|
|
_logStatus();
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void _logStatus() {
|
|
final String status = '''
|
|
isInternalUser: ${flagService.internalUser}
|
|
Local indexing: ${localSettings.isMLLocalIndexingEnabled}
|
|
canRunMLController: $_mlControllerStatus
|
|
isIndexingOrClusteringRunning: $_isIndexingOrClusteringRunning
|
|
shouldPauseIndexingAndClustering: $_shouldPauseIndexingAndClustering
|
|
debugIndexingDisabled: $debugIndexingDisabled
|
|
''';
|
|
_logger.info(status);
|
|
}
|
|
}
|