import "dart:async"; import "dart:io" show Platform; import "dart:math" show min; import "dart:typed_data" show Uint8List; import "package:flutter/foundation.dart" show kDebugMode; import "package:logging/logging.dart"; import "package:photos/core/event_bus.dart"; import "package:photos/db/files_db.dart"; import "package:photos/db/ml/db.dart"; import "package:photos/events/machine_learning_control_event.dart"; import "package:photos/events/people_changed_event.dart"; import "package:photos/models/ml/face/face.dart"; import "package:photos/models/ml/ml_versions.dart"; import "package:photos/service_locator.dart"; import "package:photos/services/filedata/filedata_service.dart"; import "package:photos/services/filedata/model/file_data.dart"; import 'package:photos/services/machine_learning/face_ml/face_clustering/face_clustering_service.dart'; import "package:photos/services/machine_learning/face_ml/face_clustering/face_db_info_for_clustering.dart"; import "package:photos/services/machine_learning/face_ml/person/person_service.dart"; import "package:photos/services/machine_learning/ml_indexing_isolate.dart"; import 'package:photos/services/machine_learning/ml_result.dart'; import "package:photos/services/machine_learning/semantic_search/semantic_search_service.dart"; import "package:photos/utils/ml_util.dart"; import "package:photos/utils/network_util.dart"; import "package:photos/utils/ram_check_util.dart"; class MLService { final _logger = Logger("MLService"); // Singleton pattern MLService._privateConstructor(); static final instance = MLService._privateConstructor(); factory MLService() => instance; bool _isInitialized = false; int? lastRemoteFetch; static const int _kRemoteFetchCooldownOnLite = 1000 * 60 * 5; late String client; bool get isInitialized => _isInitialized; bool get showClusteringIsHappening => _clusteringIsHappening; bool debugIndexingDisabled = false; bool _clusteringIsHappening = false; bool _mlControllerStatus = false; bool _isIndexingOrClusteringRunning = false; bool _isRunningML = false; bool _shouldPauseIndexingAndClustering = false; static const _kForceClusteringFaceCount = 8000; late final mlDataDB = MLDataDB.instance; /// Only call this function once at app startup, after that you can directly call [runAllML] Future init() async { if (_isInitialized) return; if (!flagService.hasGrantedMLConsent) { return; } _logger.info("init called"); // Check if the device has enough RAM to run local indexing await checkDeviceTotalRAM(); // Get client name final packageInfo = ServiceLocator.instance.packageInfo; client = "${packageInfo.packageName}/${packageInfo.version}"; _logger.info("client: $client"); // Listen on MachineLearningController Bus.instance.on().listen((event) { if (!flagService.hasGrantedMLConsent) { return; } _mlControllerStatus = event.shouldRun; if (_mlControllerStatus) { if (_shouldPauseIndexingAndClustering) { _cancelPauseIndexingAndClustering(); _logger.info( "MLController allowed running ML, faces indexing undoing previous pause", ); } else { _logger.info( "MLController allowed running ML, faces indexing starting", ); } unawaited(runAllML()); } else { _logger.info( "MLController stopped running ML, faces indexing will be paused (unless it's fetching embeddings)", ); pauseIndexingAndClustering(); } }); _isInitialized = true; _logger.info('init done'); } bool canFetch() { if (localSettings.isMLLocalIndexingEnabled) return true; if (lastRemoteFetch == null) { lastRemoteFetch = DateTime.now().millisecondsSinceEpoch; return true; } final intDiff = DateTime.now().millisecondsSinceEpoch - lastRemoteFetch!; final bool canFetch = intDiff > _kRemoteFetchCooldownOnLite; if (canFetch) { lastRemoteFetch = DateTime.now().millisecondsSinceEpoch; } return canFetch; } Future sync() async { await FileDataService.instance.syncFDStatus(); await faceRecognitionService.syncPersonFeedback(); } Future runAllML({bool force = false}) async { try { if (force) { _mlControllerStatus = true; } if (!_canRunMLFunction(function: "AllML") && !force) return; _isRunningML = true; await sync(); final int unclusteredFacesCount = await mlDataDB.getUnclusteredFaceCount(); if (unclusteredFacesCount > _kForceClusteringFaceCount) { _logger.info( "There are $unclusteredFacesCount unclustered faces, doing clustering first", ); await clusterAllImages(); } if (_mlControllerStatus == true) { // refresh discover section magicCacheService.updateCache(forced: force).ignore(); } if (canFetch()) { await fetchAndIndexAllImages(); } if ((await mlDataDB.getUnclusteredFaceCount()) > 0) { await clusterAllImages(); } if (_mlControllerStatus == true) { // refresh discover section magicCacheService.updateCache().ignore(); } } catch (e, s) { _logger.severe("runAllML failed", e, s); rethrow; } finally { _isRunningML = false; } } void triggerML() { if (_mlControllerStatus && !_isIndexingOrClusteringRunning && !_isRunningML) { unawaited(runAllML()); } } void pauseIndexingAndClustering() { if (_isIndexingOrClusteringRunning) { _shouldPauseIndexingAndClustering = true; MLIndexingIsolate.instance.shouldPauseIndexingAndClustering = true; } } void _cancelPauseIndexingAndClustering() { _shouldPauseIndexingAndClustering = false; MLIndexingIsolate.instance.shouldPauseIndexingAndClustering = false; } /// Analyzes all the images in the user library with the latest ml version and stores the results in the database. /// /// This function first fetches from remote and checks if the image has already been analyzed /// with the lastest faceMlVersion and stored on remote or local database. If so, it skips the image. Future fetchAndIndexAllImages() async { if (!_canRunMLFunction(function: "Indexing")) return; try { _isIndexingOrClusteringRunning = true; _logger.info('starting image indexing'); final Stream> instructionStream = fetchEmbeddingsAndInstructions(fileDownloadMlLimit); int fileAnalyzedCount = 0; final Stopwatch stopwatch = Stopwatch()..start(); stream: await for (final chunk in instructionStream) { if (!localSettings.isMLLocalIndexingEnabled) { await MLIndexingIsolate.instance.cleanupLocalIndexingModels(); continue; } else if (!await canUseHighBandwidth()) { _logger.info( 'stopping indexing because user is not connected to wifi', ); break stream; } else { await MLIndexingIsolate.instance.ensureDownloadedModels(); } final futures = >[]; for (final instruction in chunk) { if (_shouldPauseIndexingAndClustering) { _logger.info("indexAllImages() was paused, stopping"); break stream; } await MLIndexingIsolate.instance.ensureLoadedModels(instruction); futures.add(processImage(instruction)); } final awaitedFutures = await Future.wait(futures); final sumFutures = awaitedFutures.fold( 0, (previousValue, element) => previousValue + (element ? 1 : 0), ); fileAnalyzedCount += sumFutures; } if (fileAnalyzedCount > 0) { magicCacheService.queueUpdate('fileIndexed'); } _logger.info( "`indexAllImages()` finished. Analyzed $fileAnalyzedCount images, in ${stopwatch.elapsed.inSeconds} seconds (avg of ${stopwatch.elapsed.inSeconds / fileAnalyzedCount} seconds per image)", ); _logStatus(); } catch (e, s) { _logger.severe("indexAllImages failed", e, s); } finally { _isIndexingOrClusteringRunning = false; _cancelPauseIndexingAndClustering(); } } Future clusterAllImages({ bool clusterInBuckets = true, bool force = false, }) async { if (!_canRunMLFunction(function: "Clustering") && !force) return; if (_clusteringIsHappening) { _logger.info("clusterAllImages() is already running, returning"); return; } _logger.info("`clusterAllImages()` called"); _isIndexingOrClusteringRunning = true; _clusteringIsHappening = true; final clusterAllImagesTime = DateTime.now(); _logger.info('Pulling remote feedback before actually clustering'); await PersonService.instance.fetchRemoteClusterFeedback(); final persons = await PersonService.instance.getPersons(); final faceIdNotToCluster = >{}; for (final person in persons) { if (person.data.rejectedFaceIDs.isNotEmpty) { final personClusters = person.data.assigned.map((e) => e.id).toList(); for (final faceID in person.data.rejectedFaceIDs) { faceIdNotToCluster[faceID] = personClusters; } } } try { // Get a sense of the total number of faces in the database final int totalFaces = await mlDataDB.getTotalFaceCount(); final fileIDToCreationTime = await FilesDB.instance.getFileIDToCreationTime(); final startEmbeddingFetch = DateTime.now(); // read all embeddings final result = await mlDataDB.getFaceInfoForClustering( maxFaces: totalFaces, ); final Set missingFileIDs = {}; final allFaceInfoForClustering = []; for (final faceInfo in result) { if (!fileIDToCreationTime.containsKey(faceInfo.fileID)) { missingFileIDs.add(faceInfo.fileID); } else { if (faceIdNotToCluster.containsKey(faceInfo.faceID)) { faceInfo.rejectedClusterIds = faceIdNotToCluster[faceInfo.faceID]; } allFaceInfoForClustering.add(faceInfo); } } // sort the embeddings based on file creation time, newest first allFaceInfoForClustering.sort((b, a) { return fileIDToCreationTime[a.fileID]! .compareTo(fileIDToCreationTime[b.fileID]!); }); _logger.info( 'Getting and sorting embeddings took ${DateTime.now().difference(startEmbeddingFetch).inMilliseconds} ms for ${allFaceInfoForClustering.length} embeddings' 'and ${missingFileIDs.length} missing fileIDs', ); // Get the current cluster statistics final Map oldClusterSummaries = await mlDataDB.getAllClusterSummary(); if (clusterInBuckets) { const int bucketSize = 10000; const int offsetIncrement = 7500; int offset = 0; int bucket = 1; while (true) { if (_shouldPauseIndexingAndClustering) { _logger.info( "MLController does not allow running ML, stopping before clustering bucket $bucket", ); break; } if (offset > allFaceInfoForClustering.length - 1) { _logger.warning( 'faceIdToEmbeddingBucket is empty, this should ideally not happen as it should have stopped earlier. offset: $offset, totalFaces: $totalFaces', ); break; } if (offset > totalFaces) { _logger.warning( 'offset > totalFaces, this should ideally not happen. offset: $offset, totalFaces: $totalFaces', ); break; } final bucketStartTime = DateTime.now(); final faceInfoForClustering = allFaceInfoForClustering.sublist( offset, min(offset + bucketSize, allFaceInfoForClustering.length), ); if (faceInfoForClustering.every((face) => face.clusterId != null)) { _logger.info('Everything in bucket $bucket is already clustered'); if (offset + bucketSize >= totalFaces) { _logger.info('All faces clustered'); break; } else { _logger.info('Skipping to next bucket'); offset += offsetIncrement; bucket++; continue; } } final clusteringResult = await FaceClusteringService.instance.predictLinearIsolate( faceInfoForClustering.toSet(), fileIDToCreationTime: fileIDToCreationTime, offset: offset, oldClusterSummaries: oldClusterSummaries, ); if (clusteringResult == null) { _logger.warning("faceIdToCluster is null"); return; } await mlDataDB .updateFaceIdToClusterId(clusteringResult.newFaceIdToCluster); await mlDataDB .clusterSummaryUpdate(clusteringResult.newClusterSummaries); Bus.instance.fire(PeopleChangedEvent()); for (final faceInfo in faceInfoForClustering) { faceInfo.clusterId ??= clusteringResult.newFaceIdToCluster[faceInfo.faceID]; } for (final clusterUpdate in clusteringResult.newClusterSummaries.entries) { oldClusterSummaries[clusterUpdate.key] = clusterUpdate.value; } _logger.info( 'Done with clustering ${offset + faceInfoForClustering.length} embeddings (${(100 * (offset + faceInfoForClustering.length) / totalFaces).toStringAsFixed(0)}%) in bucket $bucket, offset: $offset, in ${DateTime.now().difference(bucketStartTime).inSeconds} seconds', ); if (offset + bucketSize >= totalFaces) { _logger.info('All faces clustered'); break; } offset += offsetIncrement; bucket++; } } else { final clusterStartTime = DateTime.now(); // Cluster the embeddings using the linear clustering algorithm, returning a map from faceID to clusterID final clusteringResult = await FaceClusteringService.instance.predictLinearIsolate( allFaceInfoForClustering.toSet(), fileIDToCreationTime: fileIDToCreationTime, oldClusterSummaries: oldClusterSummaries, ); if (clusteringResult == null) { _logger.warning("faceIdToCluster is null"); return; } final clusterDoneTime = DateTime.now(); _logger.info( 'done with clustering ${allFaceInfoForClustering.length} in ${clusterDoneTime.difference(clusterStartTime).inSeconds} seconds ', ); // Store the updated clusterIDs in the database _logger.info( 'Updating ${clusteringResult.newFaceIdToCluster.length} FaceIDs with clusterIDs in the DB', ); await mlDataDB .updateFaceIdToClusterId(clusteringResult.newFaceIdToCluster); await mlDataDB .clusterSummaryUpdate(clusteringResult.newClusterSummaries); Bus.instance.fire(PeopleChangedEvent()); _logger.info('Done updating FaceIDs with clusterIDs in the DB, in ' '${DateTime.now().difference(clusterDoneTime).inSeconds} seconds'); } _logger.info('clusterAllImages() finished, in ' '${DateTime.now().difference(clusterAllImagesTime).inSeconds} seconds'); } catch (e, s) { _logger.severe("`clusterAllImages` failed", e, s); } finally { _clusteringIsHappening = false; _isIndexingOrClusteringRunning = false; _cancelPauseIndexingAndClustering(); } } Future processImage(FileMLInstruction instruction) async { bool actuallyRanML = false; try { final String filePath = await getImagePathForML(instruction.file); final MLResult? result = await MLIndexingIsolate.instance.analyzeImage( instruction, filePath, ); // Check if there's no result simply because MLController paused indexing if (result == null) { if (!_shouldPauseIndexingAndClustering) { _logger.severe( "Failed to analyze image with uploadedFileID: ${instruction.file.uploadedFileID}", ); } return actuallyRanML; } // Check anything actually ran actuallyRanML = result.ranML; if (!actuallyRanML) return actuallyRanML; // Prepare storing data on remote final FileDataEntity dataEntity = instruction.existingRemoteFileML ?? FileDataEntity.empty( instruction.file.uploadedFileID!, DataType.mlData, ); // Faces results final List faces = []; if (result.facesRan) { if (result.faces!.isEmpty) { faces.add(Face.empty(result.fileId)); } if (result.faces!.isNotEmpty) { for (int i = 0; i < result.faces!.length; ++i) { faces.add( Face.fromFaceResult( result.faces![i], result.fileId, result.decodedImageSize, ), ); } } dataEntity.putFace( RemoteFaceEmbedding( faces, faceMlVersion, client: client, height: result.decodedImageSize.height, width: result.decodedImageSize.width, ), ); } // Clip results if (result.clipRan) { dataEntity.putClip( RemoteClipEmbedding( result.clip!.embedding, version: clipMlVersion, client: client, ), ); } // Storing results on remote await FileDataService.instance.putFileData( instruction.file, dataEntity, ); _logger.info("ML results for fileID ${result.fileId} stored on remote"); // Storing results locally if (result.facesRan) await mlDataDB.bulkInsertFaces(faces); if (result.clipRan) { await SemanticSearchService.instance.storeClipImageResult( result.clip!, ); } _logger.info("ML results for fileID ${result.fileId} stored locally"); return actuallyRanML; } catch (e, s) { final String errorString = e.toString(); final String format = instruction.file.displayName.split('.').last; final int? size = instruction.file.fileSize; final fileType = instruction.file.fileType; final bool acceptedIssue = errorString.contains('ThumbnailRetrievalException') || errorString.contains('InvalidImageFormatException') || errorString.contains('FileSizeTooLargeForMobileIndexing'); if (acceptedIssue) { _logger.severe( '$errorString for fileID ${instruction.file.uploadedFileID} (format $format, type $fileType, size $size), storing empty results so indexing does not get stuck', e, s, ); await mlDataDB.bulkInsertFaces( [Face.empty(instruction.file.uploadedFileID!, error: true)], ); await SemanticSearchService.instance.storeEmptyClipImageResult( instruction.file, ); return true; } _logger.severe( "Failed to index file for fileID ${instruction.file.uploadedFileID} (format $format, type $fileType, size $size). Not storing any results locally, which means it will be automatically retried later.", e, s, ); return false; } } bool _canRunMLFunction({required String function}) { if (kDebugMode && Platform.isIOS && !_isIndexingOrClusteringRunning) { return true; } if (_isIndexingOrClusteringRunning) { _logger.info( "Cannot run $function because indexing or clustering is already running", ); _logStatus(); return false; } if (_mlControllerStatus == false) { _logger.info( "Cannot run $function because MLController does not allow it", ); _logStatus(); return false; } if (debugIndexingDisabled) { _logger.info( "Cannot run $function because debugIndexingDisabled is true", ); _logStatus(); return false; } if (_shouldPauseIndexingAndClustering) { // This should ideally not be triggered, because one of the above should be triggered instead. _logger.warning( "Cannot run $function because indexing and clustering is being paused", ); _logStatus(); return false; } return true; } void _logStatus() { final String status = ''' isInternalUser: ${flagService.internalUser} Local indexing: ${localSettings.isMLLocalIndexingEnabled} canRunMLController: $_mlControllerStatus isIndexingOrClusteringRunning: $_isIndexingOrClusteringRunning shouldPauseIndexingAndClustering: $_shouldPauseIndexingAndClustering debugIndexingDisabled: $debugIndexingDisabled '''; _logger.info(status); } }