[mob][photos] Use existing image utils for clip preprocessing

This commit is contained in:
laurenspriem 2024-06-28 18:20:35 +05:30
parent f03cea7252
commit d7e1b737d8
2 changed files with 52 additions and 74 deletions

View File

@ -2,9 +2,9 @@ import "dart:io";
import "dart:math";
import "dart:typed_data";
import 'package:image/image.dart' as img;
import "package:logging/logging.dart";
import "package:onnxruntime/onnxruntime.dart";
import "package:photos/utils/image_ml_util.dart";
class OnnxImageEncoder {
final _logger = Logger("OnnxImageEncoder");
@ -27,80 +27,14 @@ class OnnxImageEncoder {
}
Future<List<double>> inferByImage(Map args) async {
final rgb = img.decodeImage(await File(args["imagePath"]).readAsBytes())!;
final imageData = await File(args["imagePath"]).readAsBytes();
final image = await decodeImageFromData(imageData);
final ByteData imgByteData = await getByteDataFromImage(image);
final int imageWidth = rgb.width;
final int imageHeight = rgb.height;
final int inputSize = 3 * imageWidth * imageHeight;
final inputImage = List.filled(inputSize, 0.toDouble());
const int requiredWidth = 224;
const int requiredHeight = 224;
const int totalSize = 3 * requiredWidth * requiredHeight;
// Load image into List<double> inputImage
for (int y = 0; y < imageHeight; y++) {
for (int x = 0; x < imageWidth; x++) {
final int i = 3 * (y * imageWidth + x);
final pixel = rgb.getPixel(x, y);
inputImage[i] = pixel.r.toDouble();
inputImage[i + 1] = pixel.g.toDouble();
inputImage[i + 2] = pixel.b.toDouble();
}
}
final result = List.filled(totalSize, 0.toDouble());
final invertedScale = max(imageWidth, imageHeight) / 224;
final int scaledWidth = (imageWidth / invertedScale + 0.5).toInt();
final int scaledHeight = (imageHeight / invertedScale + 0.5).toInt();
final mean = [0.48145466, 0.4578275, 0.40821073];
final std = [0.26862954, 0.26130258, 0.27577711];
for (int y = 0; y < scaledHeight; y++) {
for (int x = 0; x < scaledWidth; x++) {
for (int c = 0; c < 3; c++) {
//linear interpolation
final double scaledX = (x + 0.5) * invertedScale - 0.5;
final double scaledY = (y + 0.5) * invertedScale - 0.5;
final int x0 = max(0, scaledX.floor());
final int y0 = max(0, scaledY.floor());
final int x1 = min(x0 + 1, imageWidth - 1);
final int y1 = min(y0 + 1, imageHeight - 1);
final double dx = scaledX - x0;
final double dy = scaledY - y0;
final int j00 = 3 * (y0 * imageWidth + x0) + c;
final int j01 = 3 * (y0 * imageWidth + x1) + c;
final int j10 = 3 * (y1 * imageWidth + x0) + c;
final int j11 = 3 * (y1 * imageWidth + x1) + c;
final double pixel1 = inputImage[j00];
final double pixel2 = inputImage[j01];
final double pixel3 = inputImage[j10];
final double pixel4 = inputImage[j11];
final double v0 = pixel1 * (1 - dx) + pixel2 * dx;
final double v1 = pixel3 * (1 - dx) + pixel4 * dx;
final double v = v0 * (1 - dy) + v1 * dy;
final int v2 = min(max(v.round(), 0), 255);
final int i = (y * requiredWidth + x) + c * 224 * 224;
result[i] = ((v2 / 255) - mean[c]) / std[c];
}
}
}
final floatList = Float32List.fromList(result);
final inputList = await preprocessImageClip(image, imgByteData);
final inputOrt =
OrtValueTensor.createTensorWithDataList(floatList, [1, 3, 224, 224]);
OrtValueTensor.createTensorWithDataList(inputList, [1, 3, 224, 224]);
final inputs = {'input': inputOrt};
final session = OrtSession.fromAddress(args["address"]);
final runOptions = OrtRunOptions();

View File

@ -193,6 +193,48 @@ Future<(Float32List, Dimensions, Dimensions)>
);
}
Future<Float32List> preprocessImageClip(
Image image,
ByteData imgByteData,
) async {
const int requiredWidth = 224;
const int requiredHeight = 224;
const int requiredSize = 3 * requiredWidth * requiredHeight;
const mean = [0.48145466, 0.4578275, 0.40821073];
const std = [0.26862954, 0.26130258, 0.27577711];
final scale = min(requiredWidth / image.width, requiredHeight / image.height);
final scaledWidth = (image.width * scale).round().clamp(0, requiredWidth);
final scaledHeight = (image.height * scale).round().clamp(0, requiredHeight);
final processedBytes = Float32List(requiredSize);
final buffer = Float32List.view(processedBytes.buffer);
int pixelIndex = 0;
const int greenOff = requiredHeight * requiredWidth;
const int blueOff = 2 * requiredHeight * requiredWidth;
for (var h = 0; h < requiredHeight; h++) {
for (var w = 0; w < requiredWidth; w++) {
late Color pixel;
if (w >= scaledWidth || h >= scaledHeight) {
pixel = const Color.fromRGBO(114, 114, 114, 1.0);
} else {
pixel = _getPixelBicubic(
w / scale,
h / scale,
image,
imgByteData,
);
}
buffer[pixelIndex] = ((pixel.red / 255) - mean[0]) / std[0];
buffer[pixelIndex + greenOff] = ((pixel.green / 255) - mean[1]) / std[1];
buffer[pixelIndex + blueOff] = ((pixel.blue / 255) - mean[2]) / std[2];
pixelIndex++;
}
}
return processedBytes;
}
Future<(Float32List, List<AlignmentResult>, List<bool>, List<double>, Size)>
preprocessToMobileFaceNetFloat32List(
Image image,
@ -225,7 +267,9 @@ Future<(Float32List, List<AlignmentResult>, List<bool>, List<double>, Size)>
SimilarityTransform.estimate(face.allKeypoints);
if (!correctlyEstimated) {
log('Face alignment failed because not able to estimate SimilarityTransform, for face: $face');
throw Exception('Face alignment failed because not able to estimate SimilarityTransform');
throw Exception(
'Face alignment failed because not able to estimate SimilarityTransform',
);
}
alignmentResults.add(alignmentResult);
@ -639,4 +683,4 @@ Color _getPixelBicubic(num fx, num fy, Image image, ByteData byteDataRgba) {
// final c3 = cubic(dy, ip3, ic3, in3, ia3);
return Color.fromRGBO(c0, c1, c2, 1.0);
}
}