diff --git a/src/kernels/backend.ts b/src/kernels/backend.ts index f704ce60f4..1971e1864e 100644 --- a/src/kernels/backend.ts +++ b/src/kernels/backend.ts @@ -568,7 +568,17 @@ export class KernelBackend implements TensorStorage, Backend, BackendTimer { extrapolationValue: number): Tensor4D { throw new Error('Not yet implemented'); } - + + transform( + images: Tensor4D, + transforms: Tensor2D, + method: 'bilinear'|'nearest', + outputSize: [number, number], + fillValue: number + ): Tensor4D { + throw new Error('Not yet implemented'); + } + depthToSpace(x: Tensor4D, blockSize: number, dataFormat: string): Tensor4D { throw new Error('Not yet implemented'); } diff --git a/src/kernels/backend_cpu.ts b/src/kernels/backend_cpu.ts index 069e4e7599..c03f8e1259 100644 --- a/src/kernels/backend_cpu.ts +++ b/src/kernels/backend_cpu.ts @@ -3272,7 +3272,77 @@ export class MathBackendCPU implements KernelBackend { } return output.toTensor() as Tensor4D; } + + transform( + images: Tensor4D, + transforms: Tensor2D, + method: string, + outputSize: [number, number], + fillValue: number + ) { + const [batch, inHeight, inWidth, numChannels] = images.shape; + const numTransforms = transforms.shape[0]; + + const outHeight = outputSize[0]; + const outWidth = outputSize[1]; + const imageBuffers = images.bufferSync(); + + const output = ops.buffer([batch, outHeight, outWidth, numChannels], images.dtype); + + const outStride = output.strides; + const transformVals = transforms.dataSync(); + + // Reference implementation + // tslint:disable-next-line:max-line-length + // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/image/kernels/image_ops.h + if((numTransforms !== batch && numTransforms !== 1 )|| transforms.shape[1] !== 8){ + throw (new Error("Input transform should be num_images x 8 or 1 x 8")); + } + const readWithFillValue = function(batch : number, y: number, x: number, channel: number, fillValue: number){ + return (0 <= y && y < inHeight && 0 <= x && x < inWidth) ? imageBuffers.get(batch, y, x, channel) : fillValue; + } + const bilinearInterpolation = function(batch : number, y: number, x: number, channel: number, fillValue: number){ + const xFloor = Math.floor(x); + const yFloor = Math.floor(y); + const xCeil = xFloor+1; + const yCeil = yFloor+1; + + const valueYFloor = (xCeil - x) * readWithFillValue(batch, yFloor, xFloor, channel, fillValue) + + (x - xFloor) * readWithFillValue(batch, yFloor, xCeil, channel, fillValue); + + const valueYCeil = (xCeil - x) * readWithFillValue(batch, yCeil, xFloor, channel, fillValue) + + (x - xFloor) * readWithFillValue(batch, yCeil, xCeil, channel, fillValue); + const res = (yCeil - y)*valueYFloor + (y - yFloor)*valueYCeil; + + return res; + } + const nearestInterpolation = function(batch : number, y: number, x: number, channel: number, fillValue: number){ + return readWithFillValue(batch, Math.round(y), Math.round(x), channel, fillValue); + } + for (let bInd = 0; bInd < batch; bInd++){ + const transform = numTransforms === 1 ? transformVals: transformVals.slice(bInd*8, (bInd+1)*8); + + for (let topInd = 0; topInd < outHeight; topInd++){ + for (let leftInd = 0; leftInd < outWidth; leftInd++){ + const projection = transform[6]*leftInd + transform[7]*topInd + 1; + const floatInputLeft = (transform[0] * leftInd + transform[1] * topInd + transform[2]) / projection; + const floatInputTop = (transform[3] * leftInd + transform[4] * topInd + transform[5]) / projection; + + for (let c = 0; c < numChannels; c ++){ + const outInd = c + leftInd * outStride[2] + topInd * outStride[1] + bInd * outStride[0]; + if (method === 'bilinear') { + output.values[outInd] = bilinearInterpolation(bInd, floatInputTop, floatInputLeft, c, fillValue); + } else { // method == "nearest" + output.values[outInd] = nearestInterpolation(bInd, floatInputTop, floatInputLeft, c, fillValue); + } + } + } + } + } + return output.toTensor() as Tensor4D; + } + sparseToDense( sparseIndices: Tensor, sparseValues: Tensor, outputShape: ShapeMap[R], defaultValue: Scalar): Tensor { diff --git a/src/kernels/backend_webgl.ts b/src/kernels/backend_webgl.ts index 194d5d004c..e4566eb145 100644 --- a/src/kernels/backend_webgl.ts +++ b/src/kernels/backend_webgl.ts @@ -65,6 +65,7 @@ import {Conv2DProgram, Conv3DProgram} from './webgl/conv_gpu'; import {DepthwiseConv2DProgram} from './webgl/conv_gpu_depthwise'; import {DepthwiseConvPacked2DProgram} from './webgl/conv_packed_gpu_depthwise'; import {CropAndResizeProgram} from './webgl/crop_and_resize_gpu'; +import {TransformProgram} from './webgl/transform_gpu'; import {CumSumProgram} from './webgl/cumsum_gpu'; import {DepthToSpaceProgram} from './webgl/depth_to_space_gpu'; import {EncodeFloatProgram} from './webgl/encode_float_gpu'; @@ -2048,7 +2049,19 @@ export class MathBackendWebGL implements KernelBackend { image.shape, boxes.shape, cropSize, method, extrapolationValue); return this.compileAndRun(program, [image, boxes, boxIndex]); } - + + transform( + image: Tensor4D, + transforms: Tensor2D, + method: 'bilinear'|'nearest', + outputSize: [number, number], + fillValue: number + ): Tensor4D { + const program = new TransformProgram(image.shape, transforms.shape, method, outputSize, fillValue); + + return this.compileAndRun(program, [image, transforms]); + } + depthToSpace(x: Tensor4D, blockSize: number, dataFormat: 'NHWC'|'NCHW'): Tensor4D { util.assert( diff --git a/src/kernels/webgl/transform_gpu.ts b/src/kernels/webgl/transform_gpu.ts new file mode 100644 index 0000000000..e7072c689e --- /dev/null +++ b/src/kernels/webgl/transform_gpu.ts @@ -0,0 +1,110 @@ +/** + * @license + * Copyright 2017 Google Inc. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ + +import { GPGPUProgram } from './gpgpu_math'; + +export class TransformProgram implements GPGPUProgram { + variableNames = ['Image', 'Transform']; + outputShape: number[] = []; + userCode: string; + + constructor( + imageShape: [number, number, number, number], + transformShape: [number, number], + method: 'bilinear' | 'nearest', + size: [number, number], + fillValue: number + ) { + const [, imageHeight, imageWidth, depth] = imageShape; + const [numTransforms,] = transformShape; + const [outHeight, outWidth] = size; + this.outputShape = [numTransforms, outHeight, outWidth, depth]; + const methodId = method === 'bilinear' ? 1 : 0; + + let texGetTransformId; + if(numTransforms === 1){ + texGetTransformId = '0'; + } else { + texGetTransformId = 'b'; + } + + + // Reference implementation + // tslint:disable-next-line:max-line-length + // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/image/kernels/image_ops_gpu.cu.cc + + this.userCode = ` + float readFillValue(int b, int y, int x, int d) { + if( y < 0 || y >= ${imageHeight} || x < 0 || x >= ${imageWidth} ) { + return float(${fillValue}); + } + return getImage(b, y, x, d); + } + void main() { + ivec4 coords = getOutputCoords(); + int b = coords[0]; + int y = coords[1]; + int x = coords[2]; + int d = coords[3]; + + // get transform vals + float a0 = getTransform(${texGetTransformId},0); + float a1 = getTransform(${texGetTransformId},1); + float a2 = getTransform(${texGetTransformId},2); + float b0 = getTransform(${texGetTransformId},3); + float b1 = getTransform(${texGetTransformId},4); + float b2 = getTransform(${texGetTransformId},5); + float c0 = getTransform(${texGetTransformId},6); + float c1 = getTransform(${texGetTransformId},7); + + float projection = c0 * float(x) + c1 * float(y) + 1.0; + + float in_y = (b0 * float(x) + b1 * float(y) + b2) / projection; + + float in_x = (a0 * float(x) + a1 * float(y) + a2) / projection; + + vec2 sourceFracIndexRC = vec2(in_x, in_y); + vec2 sourceFracIndexRCCeil = vec2(in_x + 1.0, in_y + 1.0); + if(${methodId} == 1) { + // Compute the four integer indices. + int xFloor = int(floor(in_x)); + int yFloor = int(floor(in_y)); + int xCeil = int(floor(in_x + 1.0)); + int yCeil = int(floor(in_y + 1.0)); + + float topLeft = readFillValue(b, yFloor, xFloor, d); + float bottomLeft = readFillValue(b, yCeil, xFloor, d); + float topRight = readFillValue(b, yFloor, xCeil, d); + float bottomRight = readFillValue(b, yCeil, xCeil, d); + + float valueYFloor = topLeft * (float(xCeil) - in_x) + topRight * (in_x - float(xFloor)); + float valueYCeil = bottomLeft * (float(xCeil) - in_x) + bottomRight * (in_x - float(xFloor)); + + float newValue = (float(yCeil) - in_y)*valueYFloor + (in_y - float(yFloor))*valueYCeil; + + setOutput(float(newValue)); + } else { + // Compute the coordinators of nearest neighbor point. + int xRound = int(round(in_x)); + int yRound = int(round(in_y)); + float newValue = readFillValue(b, yRound, xRound, d); + setOutput(newValue); + } + } + `; + } +} diff --git a/src/ops/image_ops.ts b/src/ops/image_ops.ts index b4775f7091..68912f72f7 100644 --- a/src/ops/image_ops.ts +++ b/src/ops/image_ops.ts @@ -305,8 +305,63 @@ function cropAndResize_( return res as Tensor4D; } +/** + * Applies the given transform(s) to the image(s). + * + * @param image 4d tensor of shape `[batch,imageHeight,imageWidth,depth]`, + * where imageHeight and imageWidth must be positive, specifying the + * batch of images to transform + * @param transforms 2d float32 tensor of shape `[batch, 8]` or `[1, 8]`. + * Each entry is a projective transform matrix/matrices + * If one row of transforms is `[a0, a1, a2, b0, b1, b2, c0, c1]`, + * then it maps the output point (x, y) to a transformed input point + * `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, where `k = c0 x + c1 y + 1` + * @param method Optional, string from `'bilinear' | 'nearest'`, + * defaults to bilinear, which specifies the sampling method for resizing + * @param size Optional, The new size `[newHeight, newWidth]` for the output image + * defaults to `[imageHeight,imageWidth]` + * @param fillValue Optional, the value to fill the outside of the input image pixels + * default to 0 + * @return A 4D tensor of the shape `[numBoxes,imageHeight,imageWidth,depth]` + */ +/** @doc {heading: 'Operations', subheading: 'Images', namespace: 'image'} */ +function transform_( + image: Tensor4D|TensorLike, + transforms: Tensor2D|TensorLike, + method?: 'bilinear'|'nearest', + size?: [number, number], + fillValue?: number + ): Tensor4D { + const $image = convertToTensor(image, 'image', 'transform', 'float32'); + const $transforms = convertToTensor(transforms, 'transforms', 'transform', 'float32'); + method = method || 'bilinear'; + fillValue = fillValue || 0; + size = size || [$image.shape[1], $image.shape[2]]; + + util.assert( + $image.rank === 4, + () => 'Error in transform: image must be rank 4,' + + `but got rank ${$image.rank}.`); + util.assert( + $transforms.rank === 2 && $transforms.shape[1] === 8 && ($transforms.shape[0] === 1 || $transforms.shape[0] === $image.shape[0]), + () => `Error in transform: transforms must be have size [${$image.shape[0]},8] or [1,8]` + + `but had shape ${$transforms.shape}.`); + util.assert( + size[0] >= 1 && size[1] >= 1, + () => `size must be atleast [1,1], but was ${size}`); + util.assert( + method === 'bilinear' || method === 'nearest', + () => `method must be bilinear or nearest, but was ${method}`); + + const forward: ForwardFunc = (backend, save) => + backend.transform($image, $transforms, method, size, fillValue); + + const res = ENV.engine.runKernel(forward, {$image, $transforms}); + return res as Tensor4D; +} export const resizeBilinear = op({resizeBilinear_}); export const resizeNearestNeighbor = op({resizeNearestNeighbor_}); export const nonMaxSuppression = op({nonMaxSuppression_}); export const nonMaxSuppressionAsync = nonMaxSuppressionAsync_; export const cropAndResize = op({cropAndResize_}); +export const transform = op({transform_}); diff --git a/src/ops/image_ops_test.ts b/src/ops/image_ops_test.ts index effa00c686..30d2efe62b 100644 --- a/src/ops/image_ops_test.ts +++ b/src/ops/image_ops_test.ts @@ -407,3 +407,126 @@ describeWithFlags('cropAndResize', ALL_ENVS, () => { output, [1, 2, 0, 3, 4, 0, 3, 4, 0, 5, 6, 6, 7, 8, 8, 0, 0, 0]); }); }); + + +describeWithFlags('nonMaxSuppressionAsync', ALL_ENVS, () => { + it('select from three clusters', async () => { + const boxes = tf.tensor2d( + [ + 0, 0, 1, 1, 0, 0.1, 1, 1.1, 0, -0.1, 1, 0.9, + 0, 10, 1, 11, 0, 10.1, 1, 11.1, 0, 100, 1, 101 + ], + [6, 4]); + const scores = tf.tensor1d([0.9, 0.75, 0.6, 0.95, 0.5, 0.3]); + const maxOutputSize = 3; + const iouThreshold = 0.5; + const scoreThreshold = 0; + const indices = await tf.image.nonMaxSuppressionAsync( + boxes, scores, maxOutputSize, iouThreshold, scoreThreshold); + + expect(indices.shape).toEqual([3]); + expectArraysEqual(indices, [3, 0, 5]); + }); + + it('accepts a tensor-like object', async () => { + const boxes = [[0, 0, 1, 1], [0, 1, 1, 2]]; + const scores = [1, 2]; + const indices = await tf.image.nonMaxSuppressionAsync(boxes, scores, 10); + expect(indices.shape).toEqual([2]); + expect(indices.dtype).toEqual('int32'); + expectArraysEqual(indices, [1, 0]); + }); +}); + +describeWithFlags('transform', ALL_ENVS, () => { + it('2x2-bilinear 90° rotate', () => { + + const image: tf.Tensor4D = tf.tensor4d([1, 2, 3, 4], [1, 2, 2, 1]); + // inspired from https://math.stackexchange.com/questions/2093314/rotation-matrix-of-rotation-around-a-point-other-than-the-origin + // const angle = Math.PI/2; + // const center = [0.5, 0.5]; + // [ + // Math.cos(angle), Math.sin(angle), center[0]*(1-Math.cos(angle)) - center[1]*Math.sin(angle), + // -1*Math.sin(angle), Math.cos(angle), center[1]*(1-Math.cos(angle)) + center[0]*Math.sin(angle), + // 0, 0 + // ]; + const t = [0,1,0,-1,0,1,0,0] + + const transforms: tf.Tensor2D = tf.tensor2d(t, [1, 8]); + + const output = tf.image.transform(image, transforms, 'bilinear'); + expect(output.shape).toEqual([1, 2, 2, 1]); + expectArraysClose(output, [3, 1, 4, 2]); + }); + it('2x2-nearest 90° rotate', () => { + const image: tf.Tensor4D = tf.tensor4d([1, 2, 3, 4], [1, 2, 2, 1]); + // inspired from https://math.stackexchange.com/questions/2093314/rotation-matrix-of-rotation-around-a-point-other-than-the-origin + // const angle = Math.PI/2; + // const center = [0.5, 0.5]; + // [ + // Math.cos(angle), Math.sin(angle), center[0]*(1-Math.cos(angle)) - center[1]*Math.sin(angle), + // -1*Math.sin(angle), Math.cos(angle), center[1]*(1-Math.cos(angle)) + center[0]*Math.sin(angle), + // 0, 0 + // ]; + const t = [0,1,0,-1,0,1,0,0] + + const transforms: tf.Tensor2D = tf.tensor2d(t, [1, 8]); + + const output = tf.image.transform(image, transforms, 'nearest'); + expect(output.shape).toEqual([1, 2, 2, 1]); + expectArraysClose(output, [3, 1, 4, 2]); + }); + it('2x2-to-3x2-bilinear identity', () => { + const image: tf.Tensor4D = tf.tensor4d([1, 2, 3, 4], [1, 2, 2, 1]); + // inspired from https://math.stackexchange.com/questions/2093314/rotation-matrix-of-rotation-around-a-point-other-than-the-origin + const transforms: tf.Tensor2D = tf.tensor2d( + [ + 1, 0, 0, + 0, 1, 0, + 0, 0 + ], [1, 8]); + const output = tf.image.transform(image, transforms, 'bilinear', [2, 3]); + expect(output.shape).toEqual([1, 2, 3, 1]); + expectArraysClose(output, [1, 2, 0, 3, 4, 0]); + }); + it('2x2-to-3x2-nearest identity', () => { + const image: tf.Tensor4D = tf.tensor4d([1, 2, 3, 4], [1, 2, 2, 1]); + // inspired from https://math.stackexchange.com/questions/2093314/rotation-matrix-of-rotation-around-a-point-other-than-the-origin + const transforms: tf.Tensor2D = tf.tensor2d( + [ + 1, 0, 0, + 0, 1, 0, + 0, 0 + ], [1, 8]); + const output = tf.image.transform(image, transforms, 'nearest', [2, 3]); + expect(output.shape).toEqual([1, 2, 3, 1]); + expectArraysClose(output, [1, 2, 0, 3, 4, 0]); + }); + it('2x2-to-3x2-bilinear 10° shearing', () => { + const image: tf.Tensor4D = tf.tensor4d([1, 2, 3, 4], [1, 2, 2, 1]); + // inspired from https://math.stackexchange.com/questions/2093314/rotation-matrix-of-rotation-around-a-point-other-than-the-origin + const transforms: tf.Tensor2D = tf.tensor2d( + [ + 1, -0.1, 0, + 0, 1, 0, + 0, 0 + ], [1, 8]); + const output = tf.image.transform(image, transforms, 'bilinear', [2, 3]); + expect(output.shape).toEqual([1, 2, 3, 1]); + console.log(output.dataSync()) + expectArraysClose(output, [1, 2, 0, 2.7, 3.9, 0.4]); + }); + it('2x2-to-3x2-nearest 10° shearing', () => { + const image: tf.Tensor4D = tf.tensor4d([1, 2, 3, 4], [1, 2, 2, 1]); + // inspired from https://math.stackexchange.com/questions/2093314/rotation-matrix-of-rotation-around-a-point-other-than-the-origin + const transforms: tf.Tensor2D = tf.tensor2d( + [ + 1, -0.1, 0, + 0, 1, 0, + 0, 0 + ], [1, 8]); + const output = tf.image.transform(image, transforms, 'nearest', [2, 3]); + expect(output.shape).toEqual([1, 2, 3, 1]); + expectArraysClose(output, [1, 2, 0, 3, 4, 0]); + }); +});