[mob] Use separate table for storing clusters

This commit is contained in:
Neeraj Gupta 2024-04-01 17:34:35 +05:30
parent 323521d496
commit 51b51ff2b1
4 changed files with 82 additions and 57 deletions

View File

@ -53,6 +53,8 @@ class FaceMLDataDB {
await db.execute(createClusterPersonTable); await db.execute(createClusterPersonTable);
await db.execute(createClusterSummaryTable); await db.execute(createClusterSummaryTable);
await db.execute(createNotPersonFeedbackTable); await db.execute(createNotPersonFeedbackTable);
await db.execute(createFaceClustersTable);
await db.execute(fcClusterIDIndex);
} }
// bulkInsertFaces inserts the faces in the database in batches of 1000. // bulkInsertFaces inserts the faces in the database in batches of 1000.
@ -96,12 +98,10 @@ class FaceMLDataDB {
for (final entry in batch) { for (final entry in batch) {
final faceID = entry.key; final faceID = entry.key;
final personID = entry.value; final personID = entry.value;
batchUpdate.insert(
batchUpdate.update( faceClustersTable,
facesTable, {fcClusterID: personID, fcFaceId: faceID},
{faceClusterId: personID}, conflictAlgorithm: ConflictAlgorithm.replace,
where: '$faceIDColumn = ? AND $faceClusterId IS NULL',
whereArgs: [faceID],
); );
} }
@ -243,12 +243,19 @@ class FaceMLDataDB {
} }
} }
if (clusterID != null) { if (clusterID != null) {
final clusterIDs = [clusterID]; final List<Map<String, dynamic>> faceMaps = await db.query(
final List<Map<String, dynamic>> faceMaps = await db.rawQuery( faceClustersTable,
'SELECT * FROM $facesTable where $faceClusterId IN (${clusterIDs.join(",")}) AND $fileIDColumn = $recentFileID ', columns: [fcFaceId],
where: '$fcClusterID = ?',
whereArgs: [clusterID],
); );
if (faceMaps.isNotEmpty) { final List<Face>? faces = await getFacesForGivenFileID(recentFileID);
return mapRowToFace(faceMaps.first); if (faces != null) {
for (final face in faces) {
if (faceMaps.any((element) => element[fcFaceId] == face.faceID)) {
return face;
}
}
} }
} }
if (personID == null && clusterID == null) { if (personID == null && clusterID == null) {
@ -296,11 +303,11 @@ class FaceMLDataDB {
) async { ) async {
final db = await instance.database; final db = await instance.database;
final List<Map<String, dynamic>> maps = await db.rawQuery( final List<Map<String, dynamic>> maps = await db.rawQuery(
'SELECT $faceIDColumn, $faceClusterId FROM $facesTable where $faceIDColumn IN (${faceIds.map((id) => "'$id'").join(",")})', 'SELECT $fcFaceId, $fcClusterID FROM $faceClustersTable where $fcFaceId IN (${faceIds.map((id) => "'$id'").join(",")})',
); );
final Map<String, int?> result = {}; final Map<String, int?> result = {};
for (final map in maps) { for (final map in maps) {
result[map[faceIDColumn] as String] = map[faceClusterId] as int?; result[map[fcFaceId] as String] = map[fcClusterID] as int?;
} }
return result; return result;
} }
@ -309,13 +316,15 @@ class FaceMLDataDB {
final Map<int, Set<int>> result = {}; final Map<int, Set<int>> result = {};
final db = await instance.database; final db = await instance.database;
final List<Map<String, dynamic>> maps = await db.rawQuery( final List<Map<String, dynamic>> maps = await db.rawQuery(
'SELECT $faceClusterId, $fileIDColumn FROM $facesTable where $faceClusterId IS NOT NULL', 'SELECT $fcClusterID, $fcFaceId FROM $faceClustersTable',
); );
for (final map in maps) { for (final map in maps) {
final personID = map[faceClusterId] as int; final clusterID = map[fcClusterID] as int;
final fileID = map[fileIDColumn] as int; final faceID = map[fcFaceId] as String;
result[fileID] = (result[fileID] ?? {})..add(personID); final x = faceID.split('_').first;
final fileID = int.parse(x);
result[fileID] = (result[fileID] ?? {})..add(clusterID);
} }
return result; return result;
} }
@ -331,18 +340,17 @@ class FaceMLDataDB {
for (final map in faceIDToPersonID.entries) { for (final map in faceIDToPersonID.entries) {
final faceID = map.key; final faceID = map.key;
final clusterID = map.value; final clusterID = map.value;
batch.update( batch.insert(
facesTable, faceClustersTable,
{faceClusterId: clusterID}, {fcFaceId: faceID, fcClusterID: clusterID},
where: '$faceIDColumn = ?', conflictAlgorithm: ConflictAlgorithm.replace,
whereArgs: [faceID],
); );
} }
// Commit the batch // Commit the batch
await batch.commit(noResult: true); await batch.commit(noResult: true);
} }
/// Returns a map of faceID to record of faceClusterID and faceEmbeddingBlob /// Returns a map of faceID to record of clusterId and faceEmbeddingBlob
/// ///
/// Only selects faces with score greater than [minScore] and blur score greater than [minClarity] /// Only selects faces with score greater than [minScore] and blur score greater than [minClarity]
Future<Map<String, (int?, Uint8List)>> getFaceEmbeddingMap({ Future<Map<String, (int?, Uint8List)>> getFaceEmbeddingMap({
@ -372,10 +380,15 @@ class FaceMLDataDB {
if (maps.isEmpty) { if (maps.isEmpty) {
break; break;
} }
final List<String> faceIds = [];
for (final map in maps) {
faceIds.add(map[faceIDColumn] as String);
}
final faceIdToClusterId = await getFaceIdsToClusterIds(faceIds);
for (final map in maps) { for (final map in maps) {
final faceID = map[faceIDColumn] as String; final faceID = map[faceIDColumn] as String;
result[faceID] = result[faceID] =
(map[faceClusterId] as int?, map[faceEmbeddingBlob] as Uint8List); (faceIdToClusterId[faceID], map[faceEmbeddingBlob] as Uint8List);
} }
if (result.length >= maxFaces) { if (result.length >= maxFaces) {
break; break;
@ -435,10 +448,9 @@ class FaceMLDataDB {
Future<void> resetClusterIDs() async { Future<void> resetClusterIDs() async {
final db = await instance.database; final db = await instance.database;
await db.update( await db.rawQuery(dropFaceClustersTable);
facesTable, await db.rawQuery(createFaceClustersTable);
{faceClusterId: null}, await db.rawQuery(fcClusterIDIndex);
);
} }
Future<void> insert(Person p, int cluserID) async { Future<void> insert(Person p, int cluserID) async {
@ -514,16 +526,17 @@ class FaceMLDataDB {
final db = instance.database; final db = instance.database;
return db.then((db) async { return db.then((db) async {
final List<Map<String, dynamic>> maps = await db.rawQuery( final List<Map<String, dynamic>> maps = await db.rawQuery(
'SELECT $clusterPersonTable.$cluserIDColumn, $fileIDColumn FROM $facesTable ' 'SELECT $clusterPersonTable.$cluserIDColumn, $fcFaceId FROM $faceClustersTable '
'INNER JOIN $clusterPersonTable ' 'INNER JOIN $clusterPersonTable '
'ON $facesTable.$faceClusterId = $clusterPersonTable.$cluserIDColumn ' 'ON $faceClustersTable.$fcClusterID = $clusterPersonTable.$cluserIDColumn '
'WHERE $clusterPersonTable.$personIdColumn = ?', 'WHERE $clusterPersonTable.$personIdColumn = ?',
[personID], [personID],
); );
final Map<int, Set<int>> result = {}; final Map<int, Set<int>> result = {};
for (final map in maps) { for (final map in maps) {
final clusterID = map[cluserIDColumn] as int; final clusterID = map[cluserIDColumn] as int;
final fileID = map[fileIDColumn] as int; final String faceID = map[fcFaceId] as String;
final fileID = int.parse(faceID.split('_').first);
result[fileID] = (result[fileID] ?? {})..add(clusterID); result[fileID] = (result[fileID] ?? {})..add(clusterID);
} }
return result; return result;
@ -664,21 +677,24 @@ class FaceMLDataDB {
Future<void> removeFilesFromPerson(List<EnteFile> files, Person p) async { Future<void> removeFilesFromPerson(List<EnteFile> files, Person p) async {
final db = await instance.database; final db = await instance.database;
final result = await db.rawQuery( final faceIdsResult = await db.rawQuery(
'SELECT $faceIDColumn FROM $facesTable LEFT JOIN $clusterPersonTable ' 'SELECT $fcFaceId FROM $faceClustersTable LEFT JOIN $clusterPersonTable '
'ON $facesTable.$faceClusterId = $clusterPersonTable.$cluserIDColumn ' 'ON $faceClustersTable.$fcClusterID = $clusterPersonTable.$cluserIDColumn '
'WHERE $clusterPersonTable.$personIdColumn = ? AND $facesTable.$fileIDColumn IN (${files.map((e) => e.uploadedFileID).join(",")})', 'WHERE $clusterPersonTable.$personIdColumn = ?',
[p.remoteID], [p.remoteID],
); );
// get max clusterID final Set<String> fileIds = {};
final maxRows = for (final enteFile in files) {
await db.rawQuery('SELECT max($faceClusterId) from $facesTable'); fileIds.add(enteFile.uploadedFileID.toString());
int maxClusterID = maxRows.first.values.first as int; }
int maxClusterID = DateTime.now().millisecondsSinceEpoch;
final Map<String, int> faceIDToClusterID = {}; final Map<String, int> faceIDToClusterID = {};
for (final faceRow in result) { for (final row in faceIdsResult) {
final faceID = faceRow[faceIDColumn] as String; final faceID = row[fcFaceId] as String;
faceIDToClusterID[faceID] = maxClusterID + 1; if (fileIds.contains(faceID.split('_').first)) {
maxClusterID = maxClusterID + 1; maxClusterID += 1;
faceIDToClusterID[faceID] = maxClusterID;
}
} }
await forceUpdateClusterIds(faceIDToClusterID); await forceUpdateClusterIds(faceIDToClusterID);
} }
@ -688,17 +704,23 @@ class FaceMLDataDB {
int clusterID, int clusterID,
) async { ) async {
final db = await instance.database; final db = await instance.database;
final result = await db.rawQuery( final faceIdsResult = await db.rawQuery(
'SELECT $faceIDColumn FROM $facesTable ' 'SELECT $fcFaceId FROM $faceClustersTable '
'WHERE $facesTable.$faceClusterId = ? AND $facesTable.$fileIDColumn IN (${files.map((e) => e.uploadedFileID).join(",")})', 'WHERE $faceClustersTable.$fcClusterID = ?',
[clusterID], [clusterID],
); );
final Map<String, int> faceIDToClusterID = {}; final Set<String> fileIds = {};
for (final enteFile in files) {
fileIds.add(enteFile.uploadedFileID.toString());
}
int maxClusterID = DateTime.now().millisecondsSinceEpoch; int maxClusterID = DateTime.now().millisecondsSinceEpoch;
for (final faceRow in result) { final Map<String, int> faceIDToClusterID = {};
maxClusterID += 1; for (final row in faceIdsResult) {
final faceID = faceRow[faceIDColumn] as String; final faceID = row[fcFaceId] as String;
faceIDToClusterID[faceID] = maxClusterID; if (fileIds.contains(faceID.split('_').first)) {
maxClusterID += 1;
faceIDToClusterID[faceID] = maxClusterID;
}
} }
await forceUpdateClusterIds(faceIDToClusterID); await forceUpdateClusterIds(faceIDToClusterID);
} }

View File

@ -35,13 +35,15 @@ const fcFaceId = 'face_id';
// fcClusterId & fcFaceId are the primary keys and fcClusterId is a foreign key to faces table // fcClusterId & fcFaceId are the primary keys and fcClusterId is a foreign key to faces table
const createFaceClustersTable = ''' const createFaceClustersTable = '''
CREATE TABLE IF NOT EXISTS $faceClustersTable ( CREATE TABLE IF NOT EXISTS $faceClustersTable (
$fcClusterID INTEGER NOT NULL,
$fcFaceId TEXT NOT NULL, $fcFaceId TEXT NOT NULL,
PRIMARY KEY($fcClusterID, $fcFaceId), $fcClusterID INTEGER NOT NULL,
PRIMARY KEY($fcFaceId),
FOREIGN KEY($fcFaceId) REFERENCES $facesTable($faceIDColumn) FOREIGN KEY($fcFaceId) REFERENCES $facesTable($faceIDColumn)
); );
'''; ''';
// -- Creating a non-unique index on clusterID for query optimization
const fcClusterIDIndex =
'''CREATE INDEX IF NOT EXISTS idx_fcClusterID ON faceClustersTable(fcClusterID);''';
const dropFaceClustersTable = 'DROP TABLE IF EXISTS $faceClustersTable'; const dropFaceClustersTable = 'DROP TABLE IF EXISTS $faceClustersTable';
//##endregion //##endregion

View File

@ -80,7 +80,7 @@ class FaceMlService {
bool isInitialized = false; bool isInitialized = false;
bool isImageIndexRunning = false; bool isImageIndexRunning = false;
int kParallelism = 15; int kParallelism = 100;
Future<void> init({bool initializeImageMlIsolate = false}) async { Future<void> init({bool initializeImageMlIsolate = false}) async {
return _initLock.synchronized(() async { return _initLock.synchronized(() async {
@ -524,6 +524,7 @@ class FaceMlService {
try { try {
final EnteWatch? w = kDebugMode ? EnteWatch("face_em_fetch") : null; final EnteWatch? w = kDebugMode ? EnteWatch("face_em_fetch") : null;
w?.start(); w?.start();
w?.log('starting remote fetch for ${fileIds.length} files');
final res = final res =
await RemoteFileMLService.instance.getFilessEmbedding(fileIds); await RemoteFileMLService.instance.getFilessEmbedding(fileIds);
w?.logAndReset('fetched ${res.mlData.length} embeddings'); w?.logAndReset('fetched ${res.mlData.length} embeddings');
@ -1222,5 +1223,4 @@ class FaceMlService {
return indexedFileIds.containsKey(id) && return indexedFileIds.containsKey(id) &&
indexedFileIds[id]! >= faceMlVersion; indexedFileIds[id]! >= faceMlVersion;
} }
} }

View File

@ -169,7 +169,8 @@ class _AllSearchSectionsState extends State<AllSearchSections> {
curve: Curves.easeOut, curve: Curves.easeOut,
); );
} else if (snapshot.hasError) { } else if (snapshot.hasError) {
_logger.severe('Failed to load sections: ', snapshot.error); _logger.severe('Failed to load sections: ', snapshot.error,
snapshot.stackTrace,);
if (kDebugMode) { if (kDebugMode) {
return Padding( return Padding(
padding: const EdgeInsets.only(bottom: 72), padding: const EdgeInsets.only(bottom: 72),