tensorflow · nsthorat · Sep 16, 2017 · Aug 22, 2017 · Aug 23, 2017 · Aug 23, 2017
diff --git a/demos/model-builder/model-builder.ts b/demos/model-builder/model-builder.ts
@@ -854,4 +854,4 @@ export class ModelBuilder extends ModelBuilderPolymer {
   }
 }
 
-document.registerElement(ModelBuilder.prototype.is, ModelBuilder);
+document.registerElement(ModelBuilder.prototype.is, ModelBuilder);
diff --git a/src/adagrad_optimizer.ts b/src/adagrad_optimizer.ts
@@ -0,0 +1,84 @@
+/**
+ * @license
+ * Copyright 2017 Google Inc. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+import {Node} from './graph';
+import {NDArrayMath} from './math/math';
+import {NDArray, Scalar} from './math/ndarray';
+import {Optimizer} from './optimizer';
+import {SessionRuntime} from './session';
+import {SummedTensorArrayMap, TensorArrayMap} from './tensor_array_map';
+
+export class AdagradOptimizer extends Optimizer {
+  constructor(
+      protected learningRate: number, protected momentum: number,
+      specifiedVariableList?: Node[]) {
+    super(learningRate, specifiedVariableList);
+    this.m = Scalar.new(momentum);
+    this.eps = Scalar.new(1e-6);
+  }
+
+  beforeBatch(
+      math: NDArrayMath, batchSize: number, runtime: SessionRuntime,
+      activationArrayMap: TensorArrayMap,
+      gradientArrayMap: SummedTensorArrayMap) {
+    super.beforeBatch(
+        math, batchSize, runtime, activationArrayMap, gradientArrayMap);
+
+    if (this.accumulatedSquaredGradients.size() === 0) {
+      this.variableNodes.forEach(node => {
+        this.accumulatedSquaredGradients.set(
+            node.output, NDArray.zeros(node.output.shape));
+      });
+    }
+  }
+
+  afterBatch(
+      math: NDArrayMath, batchSize: number, runtime: SessionRuntime,
+      activationArrayMap: TensorArrayMap,
+      gradientArrayMap: SummedTensorArrayMap) {
+    math.scope((keep) => {
+      this.variableNodes.forEach(node => {
+        const oldVariable = activationArrayMap.get(node.output);
+        const gradient = this.variableGradients.get(node.output);
+        const oldCache = this.accumulatedSquaredGradients.get(node.output);
+        const gradientSquare = math.multiply(gradient, gradient);
+        const cache = math.add(oldCache, gradientSquare);
+        const variable = math.scaledArrayAdd(
+            this.c, math.divide(gradient, math.add(math.sqrt(cache), this.eps)),
+            this.one, oldVariable);
+        this.accumulatedSquaredGradients.set(node.output, keep(cache));
+        activationArrayMap.set(node.output, keep(variable));
+        node.data = variable;
+        oldVariable.dispose();
+        oldCache.dispose();
+      });
+    });
+
+    this.variableGradients.dispose();
+    this.variableGradients = new TensorArrayMap();
+  }
+
+  dispose() {
+    super.dispose();
+    this.m.dispose();
+    this.eps.dispose();
+    this.accumulatedSquaredGradients.dispose();
+  }
+
+  private accumulatedSquaredGradients = new TensorArrayMap();
+  private m: Scalar;
+  private eps: Scalar;
+}
diff --git a/src/index.ts b/src/index.ts
@@ -36,9 +36,9 @@ export {NDArrayMathGPU} from './math/math_gpu';
 // tslint:disable-next-line:max-line-length
 export {Array1D, Array2D, Array3D, Array4D, NDArray, Scalar} from './math/ndarray';
 export {GPGPUContext} from './math/webgl/gpgpu_context';
+export {MomentumOptimizer} from './momentum_optimizer';
 export {Optimizer} from './optimizer';
 export {CostReduction, FeedEntry, Session} from './session';
 export {SGDOptimizer} from './sgd_optimizer';
-export {MomentumOptimizer} from './momentum_optimizer';
 // Second level exports.
 export {conv_util, gpgpu_util, render_ndarray_gpu_util, util, webgl_util};
diff --git a/src/momentum_optimizer.ts b/src/momentum_optimizer.ts
@@ -27,6 +27,7 @@ export class MomentumOptimizer extends SGDOptimizer {
       protected learningRate: number, private momentum: number,
       specifiedVariableList?: Node[]) {
     super(learningRate, specifiedVariableList);
+    this.m = Scalar.new(this.momentum);
   }
 
   beforeBatch(
@@ -36,7 +37,6 @@ export class MomentumOptimizer extends SGDOptimizer {
     super.beforeBatch(
         math, batchSize, runtime, activationArrayMap, gradientArrayMap);
 
-    this.m = Scalar.new(this.momentum);
     if (this.variableVelocities.size() === 0) {
       this.variableNodes.forEach(node => {
         this.variableVelocities.set(
@@ -57,7 +57,7 @@ export class MomentumOptimizer extends SGDOptimizer {
         const velocity =
             math.scaledArrayAdd(this.m, oldVelocity, this.one, gradient);
         const variable =
-            math.scaledArrayAdd(this.c!, velocity, this.one!, oldVariable);
+            math.scaledArrayAdd(this.c, velocity, this.one, oldVariable);
         this.variableVelocities.set(node.output, keep(velocity));
         activationArrayMap.set(node.output, keep(variable));
         node.data = variable;
@@ -72,13 +72,8 @@ export class MomentumOptimizer extends SGDOptimizer {
   }
 
   dispose() {
-    if (this.c != null) {
-      this.c.dispose();
-    }
-    if (this.m != null) {
-      this.m.dispose();
-    }
-    this.one.dispose();
+    super.dispose();
+    this.m.dispose();
     this.variableVelocities.dispose();
   }
 

diff --git a/src/optimizer.ts b/src/optimizer.ts
@@ -17,33 +17,69 @@
 
 import {Node, VariableNode} from './graph';
 import {NDArrayMath} from './math/math';
+import {NDArray, Scalar} from './math/ndarray';
 import {SessionRuntime} from './session';
-import {TensorArrayMap, SummedTensorArrayMap} from './tensor_array_map';
+import * as session_util from './session_util';
+import {SummedTensorArrayMap, TensorArrayMap} from './tensor_array_map';
 
 export abstract class Optimizer {
   protected variableNodes: VariableNode[];
   protected specifiedVariableNodes: VariableNode[]|null;
 
-  constructor(specifiedVariableList?: Node[]) {
+  constructor(protected learningRate: number, specifiedVariableList?: Node[]) {
     if (specifiedVariableList != null) {
       this.specifiedVariableNodes = specifiedVariableList as VariableNode[];
     }
   }
 
-  abstract beforeBatch(
+  beforeBatch(
       math: NDArrayMath, batchSize: number, runtime: SessionRuntime,
       activationArrayMap: TensorArrayMap,
-      gradientArrayMap: SummedTensorArrayMap): void;
+      gradientArrayMap: SummedTensorArrayMap) {
+    this.variableNodes = this.specifiedVariableNodes == null ?
+        session_util.getVariableNodesFromEvaluationSet(runtime.nodes) :
+        this.specifiedVariableNodes;
+    if (batchSize !== this.prevBatchSize) {
+      if (this.c != null) {
+        this.c.dispose();
+      }
+      this.prevBatchSize = batchSize;
+      this.c = Scalar.new(-this.learningRate / batchSize);
+    }
+    this.variableNodes.forEach(
+        node => this.variableGradients.set(
+            node.output, NDArray.zeros(node.output.shape)));
+  }
 
-  abstract afterExample(
+  afterExample(
       math: NDArrayMath, runtime: SessionRuntime,
       activationArrayMap: TensorArrayMap,
-      gradientArrayMap: SummedTensorArrayMap): void;
+      gradientArrayMap: SummedTensorArrayMap) {
+    math.scope((keep) => {
+      this.variableNodes.forEach(node => {
+        const gradient = gradientArrayMap.get(node.output);
+        const accumulatedGradient = this.variableGradients.get(node.output);
+        this.variableGradients.set(
+            node.output, keep(math.add(gradient, accumulatedGradient)));
+        accumulatedGradient.dispose();
+      });
+    });
+  }
 
   abstract afterBatch(
       math: NDArrayMath, batchSize: number, runtime: SessionRuntime,
       activationArrayMap: TensorArrayMap,
       gradientArrayMap: SummedTensorArrayMap): void;
 
-  abstract dispose(): void;
+  dispose() {
+    if (this.c != null) {
+      this.c.dispose();
+    }
+    this.one.dispose();
+  }
+
+  protected variableGradients = new TensorArrayMap();
+  protected prevBatchSize: number;
+  protected one = Scalar.new(1);
+  protected c: Scalar;
 }
diff --git a/src/rmsprop_optimizer.ts b/src/rmsprop_optimizer.ts
@@ -0,0 +1,90 @@
+/**
+ * @license
+ * Copyright 2017 Google Inc. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import {Node} from './graph';
+import {NDArrayMath} from './math/math';
+import {NDArray, Scalar} from './math/ndarray';
+import {Optimizer} from './optimizer';
+import {SessionRuntime} from './session';
+import {SummedTensorArrayMap, TensorArrayMap} from './tensor_array_map';
+
+export class RMSPropOptimizer extends Optimizer {
+  constructor(
+      protected learningRate: number, protected momentum: number,
+      private gamma: number, specifiedVariableList?: Node[]) {
+    super(learningRate, specifiedVariableList);
+    this.m = Scalar.new(momentum);
+    this.eps = Scalar.new(1e-6);
+    this.g = Scalar.new(this.gamma);
+  }
+
+  beforeBatch(
+      math: NDArrayMath, batchSize: number, runtime: SessionRuntime,
+      activationArrayMap: TensorArrayMap,
+      gradientArrayMap: SummedTensorArrayMap) {
+    super.beforeBatch(
+        math, batchSize, runtime, activationArrayMap, gradientArrayMap);
+    if (this.accumulatedSquaredGradients.size() === 0) {
+      this.variableNodes.forEach(node => {
+        this.accumulatedSquaredGradients.set(
+            node.output, NDArray.zeros(node.output.shape));
+      });
+    }
+  }
+
+
+  afterBatch(
+      math: NDArrayMath, batchSize: number, runtime: SessionRuntime,
+      activationArrayMap: TensorArrayMap,
+      gradientArrayMap: SummedTensorArrayMap) {
+    math.scope((keep) => {
+      this.variableNodes.forEach(node => {
+        const oldVariable = activationArrayMap.get(node.output);
+        const gradient = this.variableGradients.get(node.output);
+        const oldCache = this.accumulatedSquaredGradients.get(node.output);
+        const gradientSquare = math.multiply(gradient, gradient);
+        const cache = math.scaledArrayAdd(
+            this.g, oldCache, math.sub(this.one, this.g), gradientSquare);
+        const variable = math.scaledArrayAdd(
+            this.c, math.divide(gradient, math.add(math.sqrt(cache), this.eps)),
+            this.one, oldVariable);
+        this.accumulatedSquaredGradients.set(node.output, keep(cache));
+        activationArrayMap.set(node.output, keep(variable));
+        node.data = variable;
+
+        oldVariable.dispose();
+        oldCache.dispose();
+      });
+    });
+
+    this.variableGradients.dispose();
+    this.variableGradients = new TensorArrayMap();
+  }
+
+  dispose() {
+    super.dispose();
+    this.m.dispose();
+    this.eps.dispose();
+    this.g.dispose();
+    this.accumulatedSquaredGradients.dispose();
+  }
+
+  private accumulatedSquaredGradients = new TensorArrayMap();
+  private m: Scalar;
+  private eps: Scalar;
+  private g: Scalar;
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -854,4 +854,4 @@ export class ModelBuilder extends ModelBuilderPolymer { @@
       }
     }
-    document.registerElement(ModelBuilder.prototype.is, ModelBuilder);
+    document.registerElement(ModelBuilder.prototype.is, ModelBuilder);