mirror of
https://github.com/ente-io/ente.git
synced 2025-08-08 07:28:26 +00:00
[mob][photos] Use existing image utils for clip preprocessing
This commit is contained in:
parent
f03cea7252
commit
d7e1b737d8
@ -2,9 +2,9 @@ import "dart:io";
|
||||
import "dart:math";
|
||||
import "dart:typed_data";
|
||||
|
||||
import 'package:image/image.dart' as img;
|
||||
import "package:logging/logging.dart";
|
||||
import "package:onnxruntime/onnxruntime.dart";
|
||||
import "package:photos/utils/image_ml_util.dart";
|
||||
|
||||
class OnnxImageEncoder {
|
||||
final _logger = Logger("OnnxImageEncoder");
|
||||
@ -27,80 +27,14 @@ class OnnxImageEncoder {
|
||||
}
|
||||
|
||||
Future<List<double>> inferByImage(Map args) async {
|
||||
final rgb = img.decodeImage(await File(args["imagePath"]).readAsBytes())!;
|
||||
final imageData = await File(args["imagePath"]).readAsBytes();
|
||||
final image = await decodeImageFromData(imageData);
|
||||
final ByteData imgByteData = await getByteDataFromImage(image);
|
||||
|
||||
final int imageWidth = rgb.width;
|
||||
final int imageHeight = rgb.height;
|
||||
final int inputSize = 3 * imageWidth * imageHeight;
|
||||
final inputImage = List.filled(inputSize, 0.toDouble());
|
||||
|
||||
const int requiredWidth = 224;
|
||||
const int requiredHeight = 224;
|
||||
const int totalSize = 3 * requiredWidth * requiredHeight;
|
||||
|
||||
// Load image into List<double> inputImage
|
||||
for (int y = 0; y < imageHeight; y++) {
|
||||
for (int x = 0; x < imageWidth; x++) {
|
||||
final int i = 3 * (y * imageWidth + x);
|
||||
final pixel = rgb.getPixel(x, y);
|
||||
inputImage[i] = pixel.r.toDouble();
|
||||
inputImage[i + 1] = pixel.g.toDouble();
|
||||
inputImage[i + 2] = pixel.b.toDouble();
|
||||
}
|
||||
}
|
||||
|
||||
final result = List.filled(totalSize, 0.toDouble());
|
||||
final invertedScale = max(imageWidth, imageHeight) / 224;
|
||||
|
||||
final int scaledWidth = (imageWidth / invertedScale + 0.5).toInt();
|
||||
final int scaledHeight = (imageHeight / invertedScale + 0.5).toInt();
|
||||
|
||||
final mean = [0.48145466, 0.4578275, 0.40821073];
|
||||
final std = [0.26862954, 0.26130258, 0.27577711];
|
||||
|
||||
for (int y = 0; y < scaledHeight; y++) {
|
||||
for (int x = 0; x < scaledWidth; x++) {
|
||||
for (int c = 0; c < 3; c++) {
|
||||
//linear interpolation
|
||||
final double scaledX = (x + 0.5) * invertedScale - 0.5;
|
||||
final double scaledY = (y + 0.5) * invertedScale - 0.5;
|
||||
|
||||
final int x0 = max(0, scaledX.floor());
|
||||
final int y0 = max(0, scaledY.floor());
|
||||
|
||||
final int x1 = min(x0 + 1, imageWidth - 1);
|
||||
final int y1 = min(y0 + 1, imageHeight - 1);
|
||||
|
||||
final double dx = scaledX - x0;
|
||||
final double dy = scaledY - y0;
|
||||
|
||||
final int j00 = 3 * (y0 * imageWidth + x0) + c;
|
||||
final int j01 = 3 * (y0 * imageWidth + x1) + c;
|
||||
final int j10 = 3 * (y1 * imageWidth + x0) + c;
|
||||
final int j11 = 3 * (y1 * imageWidth + x1) + c;
|
||||
|
||||
final double pixel1 = inputImage[j00];
|
||||
final double pixel2 = inputImage[j01];
|
||||
final double pixel3 = inputImage[j10];
|
||||
final double pixel4 = inputImage[j11];
|
||||
|
||||
final double v0 = pixel1 * (1 - dx) + pixel2 * dx;
|
||||
final double v1 = pixel3 * (1 - dx) + pixel4 * dx;
|
||||
|
||||
final double v = v0 * (1 - dy) + v1 * dy;
|
||||
|
||||
final int v2 = min(max(v.round(), 0), 255);
|
||||
|
||||
final int i = (y * requiredWidth + x) + c * 224 * 224;
|
||||
|
||||
result[i] = ((v2 / 255) - mean[c]) / std[c];
|
||||
}
|
||||
}
|
||||
}
|
||||
final floatList = Float32List.fromList(result);
|
||||
final inputList = await preprocessImageClip(image, imgByteData);
|
||||
|
||||
final inputOrt =
|
||||
OrtValueTensor.createTensorWithDataList(floatList, [1, 3, 224, 224]);
|
||||
OrtValueTensor.createTensorWithDataList(inputList, [1, 3, 224, 224]);
|
||||
final inputs = {'input': inputOrt};
|
||||
final session = OrtSession.fromAddress(args["address"]);
|
||||
final runOptions = OrtRunOptions();
|
||||
|
@ -193,6 +193,48 @@ Future<(Float32List, Dimensions, Dimensions)>
|
||||
);
|
||||
}
|
||||
|
||||
Future<Float32List> preprocessImageClip(
|
||||
Image image,
|
||||
ByteData imgByteData,
|
||||
) async {
|
||||
const int requiredWidth = 224;
|
||||
const int requiredHeight = 224;
|
||||
const int requiredSize = 3 * requiredWidth * requiredHeight;
|
||||
const mean = [0.48145466, 0.4578275, 0.40821073];
|
||||
const std = [0.26862954, 0.26130258, 0.27577711];
|
||||
|
||||
final scale = min(requiredWidth / image.width, requiredHeight / image.height);
|
||||
final scaledWidth = (image.width * scale).round().clamp(0, requiredWidth);
|
||||
final scaledHeight = (image.height * scale).round().clamp(0, requiredHeight);
|
||||
|
||||
final processedBytes = Float32List(requiredSize);
|
||||
final buffer = Float32List.view(processedBytes.buffer);
|
||||
int pixelIndex = 0;
|
||||
const int greenOff = requiredHeight * requiredWidth;
|
||||
const int blueOff = 2 * requiredHeight * requiredWidth;
|
||||
for (var h = 0; h < requiredHeight; h++) {
|
||||
for (var w = 0; w < requiredWidth; w++) {
|
||||
late Color pixel;
|
||||
if (w >= scaledWidth || h >= scaledHeight) {
|
||||
pixel = const Color.fromRGBO(114, 114, 114, 1.0);
|
||||
} else {
|
||||
pixel = _getPixelBicubic(
|
||||
w / scale,
|
||||
h / scale,
|
||||
image,
|
||||
imgByteData,
|
||||
);
|
||||
}
|
||||
buffer[pixelIndex] = ((pixel.red / 255) - mean[0]) / std[0];
|
||||
buffer[pixelIndex + greenOff] = ((pixel.green / 255) - mean[1]) / std[1];
|
||||
buffer[pixelIndex + blueOff] = ((pixel.blue / 255) - mean[2]) / std[2];
|
||||
pixelIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
return processedBytes;
|
||||
}
|
||||
|
||||
Future<(Float32List, List<AlignmentResult>, List<bool>, List<double>, Size)>
|
||||
preprocessToMobileFaceNetFloat32List(
|
||||
Image image,
|
||||
@ -225,7 +267,9 @@ Future<(Float32List, List<AlignmentResult>, List<bool>, List<double>, Size)>
|
||||
SimilarityTransform.estimate(face.allKeypoints);
|
||||
if (!correctlyEstimated) {
|
||||
log('Face alignment failed because not able to estimate SimilarityTransform, for face: $face');
|
||||
throw Exception('Face alignment failed because not able to estimate SimilarityTransform');
|
||||
throw Exception(
|
||||
'Face alignment failed because not able to estimate SimilarityTransform',
|
||||
);
|
||||
}
|
||||
alignmentResults.add(alignmentResult);
|
||||
|
||||
@ -639,4 +683,4 @@ Color _getPixelBicubic(num fx, num fy, Image image, ByteData byteDataRgba) {
|
||||
// final c3 = cubic(dy, ip3, ic3, in3, ia3);
|
||||
|
||||
return Color.fromRGBO(c0, c1, c2, 1.0);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user