diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaDelta.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaDelta.kt index a41302c33..3bd1acb4d 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaDelta.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaDelta.kt @@ -6,12 +6,9 @@ package org.jetbrains.kotlinx.dl.api.core.optimizer import org.jetbrains.kotlinx.dl.api.core.KGraph -import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName import org.jetbrains.kotlinx.dl.api.core.util.getDType import org.tensorflow.Operand -import org.tensorflow.Output import org.tensorflow.op.Ops -import org.tensorflow.op.core.Constant import org.tensorflow.op.core.Gradients import org.tensorflow.op.core.Variable import org.tensorflow.op.train.ApplyAdadelta @@ -51,9 +48,6 @@ public class AdaDelta( private val epsilon: Float = 1e-8f, clipGradient: ClipGradientAction = NoClipGradient() ) : Optimizer(clipGradient) { - private lateinit var epsilonConstant: Constant - private lateinit var learningRateConst: Constant - private lateinit var rhoConst: Constant init { require(learningRate >= 0.0f) { "Learning rate $learningRate should be >= 0.0." } @@ -67,18 +61,16 @@ public class AdaDelta( weights: List>, gradients: Gradients ): List> { - val targets: MutableList> = - ArrayList() - rhoConst = tf.constant(rho, getDType()) - learningRateConst = tf.constant(learningRate, getDType()) - epsilonConstant = tf.constant(epsilon, getDType()) + val targets = mutableListOf>() - for (i in weights.indices) { - val variable = weights[i] - val varName = variable.ref().op().name() + val rhoConst = tf.constant(rho, getDType()) + val learningRateConst = tf.constant(learningRate, getDType()) + val epsilonConstant = tf.constant(epsilon, getDType()) - val accumSlot: Variable = getSlot(varName, ACCUMULATOR) - val accumUpdateSlot: Variable = getSlot(varName, ACCUMULATOR_UPDATE) + for ((i, variable) in weights.withIndex()) { + val output = variable.asOutput() + val accumSlot = createSlot(ACCUMULATOR, output, tf, graph) + val accumUpdateSlot = createSlot(ACCUMULATOR_UPDATE, output, tf, graph) targets.add( tf.train.applyAdadelta( @@ -95,24 +87,6 @@ public class AdaDelta( return targets } - private fun createAdaDeltaSlot(graph: KGraph, tf: Ops, v: Output) { - val accumInitializerName = defaultInitializerOpName(createName(v, ACCUMULATOR)) - val accumulatorInitializer = tf.withName(accumInitializerName) - .fill(tf.shape(v), tf.dtypes.cast(tf.constant(0.0f), getDType())) - createSlot(graph, tf, v.asOutput(), ACCUMULATOR, accumulatorInitializer) - - val accumUpdateInitializerName = defaultInitializerOpName(createName(v, ACCUMULATOR_UPDATE)) - val updateInitializer: Operand = tf.withName(accumUpdateInitializerName) - .fill(tf.shape(v), tf.dtypes.cast(tf.constant(0.0f), getDType())) - createSlot(graph, tf, v.asOutput(), ACCUMULATOR_UPDATE, updateInitializer) - } - - override fun createSlots(graph: KGraph, tf: Ops, variables: List>) { - for (v in variables) { - createAdaDeltaSlot(graph, tf, v.asOutput()) - } - } - override val optimizerName: String get() = "Adadelta" override val isRunningOnGPU: Boolean get() = true diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGrad.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGrad.kt index cd6293b88..fa861af2d 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGrad.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGrad.kt @@ -6,12 +6,9 @@ package org.jetbrains.kotlinx.dl.api.core.optimizer import org.jetbrains.kotlinx.dl.api.core.KGraph -import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName import org.jetbrains.kotlinx.dl.api.core.util.getDType import org.tensorflow.Operand -import org.tensorflow.Output import org.tensorflow.op.Ops -import org.tensorflow.op.core.Constant import org.tensorflow.op.core.Gradients import org.tensorflow.op.core.Variable import org.tensorflow.op.train.ApplyAdagrad @@ -44,8 +41,6 @@ public class AdaGrad( private val initialAccumulatorValue: Float = 0.01f, clipGradient: ClipGradientAction = NoClipGradient() ) : Optimizer(clipGradient) { - private lateinit var initialAccumulatorValueConstant: Constant - private lateinit var learningRateConst: Constant init { require(learningRate >= 0.0f) { "Learning rate $learningRate should be >= 0.0." } @@ -58,17 +53,12 @@ public class AdaGrad( weights: List>, gradients: Gradients ): List> { - val targets: MutableList> = - ArrayList() + val targets = mutableListOf>() - initialAccumulatorValueConstant = tf.constant(initialAccumulatorValue, getDType()) - learningRateConst = tf.constant(learningRate, getDType()) + val learningRateConst = tf.constant(learningRate, getDType()) - for (i in weights.indices) { - val variable = weights[i] - val varName = variable.ref().op().name() - - val slot: Variable = getSlot(varName, ACCUMULATOR) + for ((i, variable) in weights.withIndex()) { + val slot = createSlot(ACCUMULATOR, variable.asOutput(), tf, graph, initialValue = initialAccumulatorValue) targets.add( tf.train.applyAdagrad( @@ -84,20 +74,6 @@ public class AdaGrad( return targets } - private fun createAdaGradSlot(graph: KGraph, tf: Ops, v: Output) { - val accumInitializerName = defaultInitializerOpName(createName(v, ACCUMULATOR)) - - val initializer: Operand = tf.withName(accumInitializerName) - .fill(tf.shape(v), tf.constant(initialAccumulatorValue)) - createSlot(graph, tf, v.asOutput(), ACCUMULATOR, initializer) - } - - override fun createSlots(graph: KGraph, tf: Ops, variables: List>) { - for (v in variables) { - createAdaGradSlot(graph, tf, v.asOutput()) - } - } - override val optimizerName: String get() = "Adagrad" override val isRunningOnGPU: Boolean get() = true diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGradDA.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGradDA.kt index 310de4987..3f0db4248 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGradDA.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGradDA.kt @@ -11,11 +11,9 @@ import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName import org.jetbrains.kotlinx.dl.api.core.util.defaultOptimizerVariableName import org.jetbrains.kotlinx.dl.api.core.util.getDType import org.tensorflow.Operand -import org.tensorflow.Output import org.tensorflow.Shape import org.tensorflow.op.Ops import org.tensorflow.op.core.Assign -import org.tensorflow.op.core.Constant import org.tensorflow.op.core.Gradients import org.tensorflow.op.core.Variable import org.tensorflow.op.train.ApplyAdagradDa @@ -52,10 +50,6 @@ public class AdaGradDA( private val l2Strength: Float = 0.01f, clipGradient: ClipGradientAction = NoClipGradient() ) : Optimizer(clipGradient) { - private lateinit var learningRateConst: Constant - private lateinit var l1StrengthConst: Constant - private lateinit var l2StrengthConst: Constant - private lateinit var globalStep: Variable init { require(learningRate >= 0.0f) { "Learning rate $learningRate should be >= 0.0." } @@ -70,19 +64,22 @@ public class AdaGradDA( weights: List>, gradients: Gradients ): List> { - val targets: MutableList> = - ArrayList() - learningRateConst = tf.constant(learningRate, getDType()) - l1StrengthConst = tf.constant(l1Strength, getDType()) - l2StrengthConst = tf.constant(l2Strength, getDType()) + val targets = mutableListOf>() - for (i in weights.indices) { - val variable = weights[i] - val varName = variable.ref().op().name() + val learningRateConst = tf.constant(learningRate, getDType()) + val l1StrengthConst = tf.constant(l1Strength, getDType()) + val l2StrengthConst = tf.constant(l2Strength, getDType()) - val gradSlot: Variable = getSlot(varName, ACCUMULATOR) - val gradSquaredSlot: Variable = getSlot(varName, SQUARED_ACCUMULATOR) + val globalStep = tf.withName(GLOBAL_STEP).variable(Shape.scalar(), getDType()) + val globalStepAssignName = defaultAssignOpName(GLOBAL_STEP) + val globalStepInit: Assign<*> = tf.withName(globalStepAssignName) + .assign(globalStep, tf.withName(defaultInitializerOpName(GLOBAL_STEP)).constant(0.0f)) + graph.addOptimizerVariableInitializer(globalStepInit) + for ((i, variable) in weights.withIndex()) { + val output = variable.asOutput() + val gradSlot = createSlot(ACCUMULATOR, output, tf, graph) + val gradSquaredSlot = createSlot(SQUARED_ACCUMULATOR, output, tf, graph) targets.add( tf.train.applyAdagradDa( variable, @@ -104,30 +101,6 @@ public class AdaGradDA( return targets } - private fun createAdaGradDASlot(graph: KGraph, tf: Ops, v: Output) { - val accumulatorInitializerName = defaultInitializerOpName(createName(v, ACCUMULATOR)) - val accumInitializer: Operand = tf.withName(accumulatorInitializerName) - .fill(tf.shape(v), tf.constant(0.0f)) - createSlot(graph, tf, v.asOutput(), ACCUMULATOR, accumInitializer) - - val squareAccumInitializerName = defaultInitializerOpName(createName(v, SQUARED_ACCUMULATOR)) - val sqInitializer: Operand = tf.withName(squareAccumInitializerName) - .fill(tf.shape(v), tf.constant(initialAccumulatorValue)) - - createSlot(graph, tf, v.asOutput(), SQUARED_ACCUMULATOR, sqInitializer) - } - - override fun createSlots(graph: KGraph, tf: Ops, variables: List>) { - for (v in variables) { - createAdaGradDASlot(graph, tf, v.asOutput()) - } - globalStep = tf.withName(GLOBAL_STEP).variable(Shape.scalar(), getDType()) - val globalStepAssignName = defaultAssignOpName(GLOBAL_STEP) - val globalStepInit: Assign<*> = tf.withName(globalStepAssignName) - .assign(globalStep, tf.withName(defaultInitializerOpName(GLOBAL_STEP)).constant(0.0f)) - graph.addOptimizerVariableInitializer(globalStepInit) - } - override val optimizerName: String get() = "AdaGradDA" override val isRunningOnGPU: Boolean get() = true diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adam.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adam.kt index b8a05f248..c11a83bb6 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adam.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adam.kt @@ -11,11 +11,9 @@ import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName import org.jetbrains.kotlinx.dl.api.core.util.defaultOptimizerVariableName import org.jetbrains.kotlinx.dl.api.core.util.getDType import org.tensorflow.Operand -import org.tensorflow.Output import org.tensorflow.Shape import org.tensorflow.op.Ops import org.tensorflow.op.core.Assign -import org.tensorflow.op.core.Constant import org.tensorflow.op.core.Gradients import org.tensorflow.op.core.Variable import org.tensorflow.op.train.ApplyAdam @@ -53,13 +51,6 @@ public class Adam( clipGradient: ClipGradientAction = NoClipGradient() ) : Optimizer(clipGradient) { - private lateinit var epsilonConstant: Constant - private lateinit var learningRateConst: Constant - private lateinit var betaOneConst: Constant - private lateinit var betaTwoConst: Constant - private lateinit var betaOnePower: Variable - private lateinit var betaTwoPower: Variable - init { require(learningRate >= 0.0f) { "Learning rate $learningRate should be >= 0.0." } require(beta1 > 0.0f && beta1 < 1.0f) { "Beta1 $beta1 should be in range (0.0; 1.0)." } @@ -73,22 +64,35 @@ public class Adam( weights: List>, gradients: Gradients ): List> { - val targets: MutableList> = - ArrayList() - - betaOneConst = tf.constant(beta1, getDType()) - betaTwoConst = tf.constant(beta2, getDType()) - learningRateConst = tf.constant(learningRate, getDType()) - epsilonConstant = tf.constant(epsilon, getDType()) + val targets = mutableListOf>() - for (i in weights.indices) { + val betaOneConst = tf.constant(beta1, getDType()) + val betaTwoConst = tf.constant(beta2, getDType()) + val learningRateConst = tf.constant(learningRate, getDType()) + val epsilonConstant = tf.constant(epsilon, getDType()) - val variable = weights[i] - val varName = variable.ref().op().name() + val betaOnePower = tf.withName(FIRST_BETA_POWER_NAME).variable(Shape.scalar(), getDType()) + val betaOnePowerAssignName = defaultAssignOpName(FIRST_BETA_POWER_NAME) + val betaOnePowerInit: Assign<*> = tf.withName(betaOnePowerAssignName) + .assign( + betaOnePower, + tf.withName(defaultInitializerOpName(FIRST_BETA_POWER_NAME)).constant(beta1, getDType()) + ) + graph.addOptimizerVariableInitializer(betaOnePowerInit) - val firstMomentSlot: Variable = getSlot(varName, FIRST_MOMENT) - val secondMomentSlot: Variable = getSlot(varName, SECOND_MOMENT) + val betaTwoPower = tf.withName(SECOND_BETA_POWER_NAME).variable(Shape.scalar(), getDType()) + val betaTwoPowerAssignName = defaultAssignOpName(SECOND_BETA_POWER_NAME) + val betaTwoPowerInit: Assign<*> = tf.withName(betaTwoPowerAssignName) + .assign( + betaTwoPower, + tf.withName(defaultInitializerOpName(SECOND_BETA_POWER_NAME)).constant(beta2, getDType()) + ) + graph.addOptimizerVariableInitializer(betaTwoPowerInit) + for ((i, variable) in weights.withIndex()) { + val output = variable.asOutput() + val firstMomentSlot = createSlot(FIRST_MOMENT, output, tf, graph) + val secondMomentSlot = createSlot(SECOND_MOMENT, output, tf, graph) targets.add( tf.train.applyAdam( variable, @@ -122,44 +126,6 @@ public class Adam( return targets } - private fun createAdamSlot(graph: KGraph, tf: Ops, v: Output) { - val firstMomentInitializerName = defaultInitializerOpName(createName(v, FIRST_MOMENT)) - val firstMomentInitializer = - tf.withName(firstMomentInitializerName).fill(tf.shape(v), tf.constant(0.0f, getDType())) - createSlot(graph, tf, v.asOutput(), FIRST_MOMENT, firstMomentInitializer) - - val secondMomentInitializerName = defaultInitializerOpName(createName(v, SECOND_MOMENT)) - val secondMomentInitializer = - tf.withName(secondMomentInitializerName).fill(tf.shape(v), tf.constant(0.0f, getDType())) - createSlot(graph, tf, v.asOutput(), SECOND_MOMENT, secondMomentInitializer) - } - - override fun createSlots(graph: KGraph, tf: Ops, variables: List>) { - for (v in variables) { - createAdamSlot(graph, tf, v.asOutput()) - } - betaOnePower = tf.withName(FIRST_BETA_POWER_NAME).variable(Shape.scalar(), getDType()) - - val betaOnePowerAssignName = defaultAssignOpName(FIRST_BETA_POWER_NAME) - val betaOnePowerInit: Assign<*> = tf.withName(betaOnePowerAssignName) - .assign( - betaOnePower, - tf.withName(defaultInitializerOpName(FIRST_BETA_POWER_NAME)).constant(beta1, getDType()) - ) - graph.addOptimizerVariableInitializer(betaOnePowerInit) - - - betaTwoPower = tf.withName(SECOND_BETA_POWER_NAME).variable(Shape.scalar(), getDType()) - - val betaTwoPowerAssignName = defaultAssignOpName(SECOND_BETA_POWER_NAME) - val betaTwoPowerInit: Assign<*> = tf.withName(betaTwoPowerAssignName) - .assign( - betaTwoPower, - tf.withName(defaultInitializerOpName(SECOND_BETA_POWER_NAME)).constant(beta2, getDType()) - ) - graph.addOptimizerVariableInitializer(betaTwoPowerInit) - } - override val optimizerName: String get() = "Adam" override val isRunningOnGPU: Boolean get() = true diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adamax.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adamax.kt index 33954e229..1d8cbfc58 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adamax.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adamax.kt @@ -11,12 +11,10 @@ import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName import org.jetbrains.kotlinx.dl.api.core.util.defaultOptimizerVariableName import org.jetbrains.kotlinx.dl.api.core.util.getDType import org.tensorflow.Operand -import org.tensorflow.Output import org.tensorflow.Shape import org.tensorflow.op.Ops import org.tensorflow.op.Scope import org.tensorflow.op.core.Assign -import org.tensorflow.op.core.Constant import org.tensorflow.op.core.Gradients import org.tensorflow.op.core.Variable import org.tensorflow.op.train.ApplyAdaMax @@ -53,12 +51,6 @@ public class Adamax( clipGradient: ClipGradientAction = NoClipGradient() ) : Optimizer(clipGradient) { - private lateinit var epsilonConstant: Constant - private lateinit var learningRateConst: Constant - private lateinit var betaOneConst: Constant - private lateinit var betaTwoConst: Constant - private lateinit var betaOnePower: Variable - init { require(learningRate >= 0.0f) { "Learning rate $learningRate should be >= 0.0." } require(beta1 > 0.0f && beta1 < 1.0f) { "Beta1 $beta1 should be in range (0.0; 1.0)." } @@ -72,23 +64,28 @@ public class Adamax( weights: List>, gradients: Gradients ): List> { - val targets: MutableList> = - ArrayList() - - betaOneConst = tf.constant(beta1, getDType()) - betaTwoConst = tf.constant(beta2, getDType()) - learningRateConst = tf.constant(learningRate, getDType()) - epsilonConstant = tf.constant(epsilon, getDType()) + val targets = mutableListOf>() - val scope = Scope(graph.tfGraph) + val betaOneConst = tf.constant(beta1, getDType()) + val betaTwoConst = tf.constant(beta2, getDType()) + val learningRateConst = tf.constant(learningRate, getDType()) + val epsilonConstant = tf.constant(epsilon, getDType()) - for (i in weights.indices) { - val variable = weights[i] - val varName = variable.ref().op().name() + val betaOnePower = tf.withName(FIRST_BETA_POWER_NAME).variable(Shape.scalar(), getDType()) + val betaOnePowerAssignName = defaultAssignOpName(FIRST_BETA_POWER_NAME) + val betaOnePowerInit: Assign<*> = tf.withName(betaOnePowerAssignName) + .assign( + betaOnePower, + tf.withName(defaultInitializerOpName(FIRST_BETA_POWER_NAME)).constant(beta1, getDType()) + ) + graph.addOptimizerVariableInitializer(betaOnePowerInit) - val firstMomentSlot: Variable = getSlot(varName, FIRST_MOMENT) - val secondMomentSlot: Variable = getSlot(varName, SECOND_MOMENT) + val scope = Scope(graph.tfGraph) + for ((i, variable) in weights.withIndex()) { + val output = variable.asOutput() + val firstMomentSlot = createSlot(FIRST_MOMENT, output, tf, graph) + val secondMomentSlot = createSlot(SECOND_MOMENT, output, tf, graph) targets.add( ApplyAdaMax.create( scope, @@ -106,42 +103,14 @@ public class Adamax( ) } - val betaOnePowerInit = tf - .assign(betaOnePower, tf.math.mul(betaOnePower, betaOneConst)) + val betaOnePowerInit2 = tf.assign(betaOnePower, tf.math.mul(betaOnePower, betaOneConst)) - graph.addOptimizerVariableInitializer(betaOnePowerInit) + graph.addOptimizerVariableInitializer(betaOnePowerInit2) graph.addOptimizerVariable(betaOnePower) return targets } - private fun createAdamaxSlot(graph: KGraph, tf: Ops, v: Output) { - val firstMomentInitializerName = defaultInitializerOpName(createName(v, FIRST_MOMENT)) - val firstMomentInitializer = - tf.withName(firstMomentInitializerName).fill(tf.shape(v), tf.constant(0.0f, getDType())) - createSlot(graph, tf, v.asOutput(), FIRST_MOMENT, firstMomentInitializer) - - val secondMomentInitializerName = defaultInitializerOpName(createName(v, SECOND_MOMENT)) - val secondMomentInitializer = tf.withName(secondMomentInitializerName) - .fill(tf.shape(v), tf.constant(0.0f, getDType())) - createSlot(graph, tf, v.asOutput(), SECOND_MOMENT, secondMomentInitializer) - } - - override fun createSlots(graph: KGraph, tf: Ops, variables: List>) { - for (v in variables) { - createAdamaxSlot(graph, tf, v.asOutput()) - } - betaOnePower = tf.withName(FIRST_BETA_POWER_NAME).variable(Shape.scalar(), getDType()) - val betaOnePowerAssignName = defaultAssignOpName(FIRST_BETA_POWER_NAME) - - val betaOnePowerInit: Assign<*> = tf.withName(betaOnePowerAssignName) - .assign( - betaOnePower, - tf.withName(defaultInitializerOpName(FIRST_BETA_POWER_NAME)).constant(beta1, getDType()) - ) - graph.addOptimizerVariableInitializer(betaOnePowerInit) - } - override val optimizerName: String get() = "Adamax" override val isRunningOnGPU: Boolean get() = false diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Ftrl.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Ftrl.kt index 23eb07204..e003b68ea 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Ftrl.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Ftrl.kt @@ -6,12 +6,9 @@ package org.jetbrains.kotlinx.dl.api.core.optimizer import org.jetbrains.kotlinx.dl.api.core.KGraph -import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName import org.jetbrains.kotlinx.dl.api.core.util.getDType import org.tensorflow.Operand -import org.tensorflow.Output import org.tensorflow.op.Ops -import org.tensorflow.op.core.Constant import org.tensorflow.op.core.Gradients import org.tensorflow.op.core.Variable import org.tensorflow.op.train.ApplyFtrl @@ -60,12 +57,6 @@ public class Ftrl( private var initialAccumulatorValue: Float = 0.0f, clipGradient: ClipGradientAction = NoClipGradient() ) : Optimizer(clipGradient) { - /** */ - private lateinit var learningRatePowerConst: Constant - private lateinit var learningRateConst: Constant - private lateinit var l1RegularizationStrengthConst: Constant - private lateinit var l2RegularizationStrengthConst: Constant - private lateinit var l2ShrinkageRegularizationStrengthConst: Constant init { require(learningRate >= 0.0f) { "Learning rate $learningRate should be >= 0.0." } @@ -82,22 +73,19 @@ public class Ftrl( weights: List>, gradients: Gradients ): List> { - val targets: MutableList> = - ArrayList() + val targets = mutableListOf>() - l1RegularizationStrengthConst = tf.constant(l1RegularizationStrength, getDType()) - l2RegularizationStrengthConst = tf.constant(l2RegularizationStrength, getDType()) - learningRateConst = tf.constant(learningRate, getDType()) - l2ShrinkageRegularizationStrengthConst = tf.constant(l2ShrinkageRegularizationStrength, getDType()) - learningRatePowerConst = tf.constant(learningRatePower, getDType()) + val l1RegularizationStrengthConst = tf.constant(l1RegularizationStrength, getDType()) + val l2RegularizationStrengthConst = tf.constant(l2RegularizationStrength, getDType()) + val learningRateConst = tf.constant(learningRate, getDType()) + val l2ShrinkageRegularizationStrengthConst = tf.constant(l2ShrinkageRegularizationStrength, getDType()) + val learningRatePowerConst = tf.constant(learningRatePower, getDType()) - for (i in weights.indices) { + for ((i, variable) in weights.withIndex()) { + val output = variable.asOutput() + val accumSlot = createSlot(ACCUMULATOR, output, tf, graph) + val linearSlot = createSlot(LINEAR_ACCUMULATOR, output, tf, graph) - val variable = weights[i] - val varName = variable.ref().op().name() - - val accumSlot: Variable = getSlot(varName, ACCUMULATOR) - val linearSlot: Variable = getSlot(varName, LINEAR_ACCUMULATOR) val options = ApplyFtrl.useLocking(true) targets.add( @@ -119,24 +107,6 @@ public class Ftrl( return targets } - private fun createFtrlSlot(graph: KGraph, tf: Ops, v: Output) { - val accumInitializerName = defaultInitializerOpName(createName(v, ACCUMULATOR)) - val accumInitializer = tf.withName(accumInitializerName) - .fill(tf.shape(v), tf.constant(initialAccumulatorValue)) - createSlot(graph, tf, v.asOutput(), ACCUMULATOR, accumInitializer) - - val linearAccumInitializerName = defaultInitializerOpName(createName(v, LINEAR_ACCUMULATOR)) - val linearAccumInitializer = tf.withName(linearAccumInitializerName) - .fill(tf.shape(v), tf.constant(0.0f)) - createSlot(graph, tf, v.asOutput(), LINEAR_ACCUMULATOR, linearAccumInitializer) - } - - override fun createSlots(graph: KGraph, tf: Ops, variables: List>) { - for (v in variables) { - createFtrlSlot(graph, tf, v.asOutput()) - } - } - override val optimizerName: String get() = "Ftrl" override val isRunningOnGPU: Boolean get() = false diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Momentum.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Momentum.kt index 34dd2b934..4596ca2b0 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Momentum.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Momentum.kt @@ -6,11 +6,8 @@ package org.jetbrains.kotlinx.dl.api.core.optimizer import org.jetbrains.kotlinx.dl.api.core.KGraph -import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName import org.tensorflow.Operand -import org.tensorflow.Output import org.tensorflow.op.Ops -import org.tensorflow.op.core.Constant import org.tensorflow.op.core.Gradients import org.tensorflow.op.core.Variable import org.tensorflow.op.train.ApplyMomentum @@ -30,8 +27,6 @@ public class Momentum( private val useNesterov: Boolean = true, clipGradient: ClipGradientAction = NoClipGradient() ) : Optimizer(clipGradient) { - private lateinit var momentumConst: Constant - private lateinit var learningRateConst: Constant init { require(learningRate >= 0.0f) { "Learning rate $learningRate should be >= 0.0." } @@ -44,16 +39,13 @@ public class Momentum( weights: List>, gradients: Gradients ): List> { - val targets: MutableList> = - ArrayList() + val targets = mutableListOf>() - learningRateConst = tf.constant(learningRate) - momentumConst = tf.constant(momentum) + val learningRateConst = tf.constant(learningRate) + val momentumConst = tf.constant(momentum) - for (i in weights.indices) { - val variable = weights[i] - - val slot = getSlot(variable.ref().op().name(), MOMENTUM) + for ((i, variable) in weights.withIndex()) { + val slot = createSlot(MOMENTUM, variable.asOutput(), tf, graph) targets.add( tf.train.applyMomentum( @@ -70,19 +62,6 @@ public class Momentum( return targets } - private fun createMomentumSlot(graph: KGraph, tf: Ops, v: Output) { - val momentumInitializerName = defaultInitializerOpName(createName(v, MOMENTUM)) - val initializer: Operand = tf.withName(momentumInitializerName) - .fill(tf.shape(v), tf.constant(0.0f)) - createSlot(graph, tf, v.asOutput(), MOMENTUM, initializer) - } - - override fun createSlots(graph: KGraph, tf: Ops, variables: List>) { - for (v in variables) { - createMomentumSlot(graph, tf, v.asOutput()) - } - } - override val optimizerName: String get() = "Momentum" override val isRunningOnGPU: Boolean get() = true diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Optimizer.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Optimizer.kt index 5cc95cfb7..ab0683dac 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Optimizer.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Optimizer.kt @@ -7,12 +7,12 @@ package org.jetbrains.kotlinx.dl.api.core.optimizer import org.jetbrains.kotlinx.dl.api.core.KGraph import org.jetbrains.kotlinx.dl.api.core.util.defaultAssignOpName +import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName import org.jetbrains.kotlinx.dl.api.core.util.defaultOptimizerVariableName import org.jetbrains.kotlinx.dl.api.core.util.getDType import org.tensorflow.Operand import org.tensorflow.Output import org.tensorflow.op.Ops -import org.tensorflow.op.core.Assign import org.tensorflow.op.core.Gradients import org.tensorflow.op.core.Variable @@ -22,11 +22,6 @@ import org.tensorflow.op.core.Variable * @property [clipGradient] Strategy of gradient clipping as subclass of [ClipGradientAction]. */ public abstract class Optimizer(public val clipGradient: ClipGradientAction) { - /** - * Top level map key is the variable name, lower level map key is the slot name. - */ - private lateinit var slots: MutableMap>> - /** * Prepares targets for optimization process. * @@ -43,26 +38,10 @@ public abstract class Optimizer(public val clipGradient: ClipGradientAction) { tf: Ops, loss: Operand ): List> { - slots = mutableMapOf() - val gradients: Gradients = computeGradients(tf, loss, weights) - - val variableOutputs = variablesToOutputs(weights) - - createSlots(graph, tf, variableOutputs) // empty action if not overridden - return applyGradients(graph, tf, weights, gradients) } - private fun variablesToOutputs(variables: List>): List> { - val variableOutputs: MutableList> = mutableListOf() - for (i in variables.indices) { - variableOutputs.add(i, variables[i].asOutput()) - } - - return variableOutputs - } - /** * Applies gradients to weights. * @@ -88,70 +67,39 @@ public abstract class Optimizer(public val clipGradient: ClipGradientAction) { return tf.gradients(loss, weights) } - /** - * No-op slot creation method. - * - * @param variables The variables to create slots for. - */ - protected open fun createSlots(graph: KGraph, tf: Ops, variables: List>) { - - } - /** Returns optimizer name. */ public abstract val optimizerName: String /** * Creates a slot in the graph for the specified variable with the specified name. Adds the slot's - * initializer to the graph's initializers, and the slot to the optimiser's slot map. + * initializer to the graph's initializers. * - * @param [graph] KGraph to be updated. - * @param [tf] TensorFlow graph API for building operations. - * @param [variable] The variable to create the slot for. * @param [slotName] The name of the slot. - * @param [initializer] The initializer for the slot. + * @param [variable] The variable to create the slot for. + * @param [tf] TensorFlow graph API for building operations. + * @param [graph] KGraph to be updated. + * @param [initialValue] The initial value to use. */ - protected open fun createSlot( - graph: KGraph, - tf: Ops, - variable: Output, - slotName: String, - initializer: Operand - ) { - val createName: String = createName(variable, slotName) - val slot: Variable = tf.withName(createName).variable(variable.shape(), getDType()) - - val assignName = defaultAssignOpName(createName(variable, slotName)) - val slotInit: Assign = tf.withName(assignName).assign(slot, initializer) - - graph.addOptimizerVariableInitializer(slotInit) - graph.addOptimizerVariable(slot) + protected fun createSlot(slotName: String, + variable: Output, + tf: Ops, + graph: KGraph, + initialValue: Float = 0.0f + ): Variable { + val slotVariableName = defaultOptimizerVariableName(variable.op().name() + "-" + slotName) + val slot = tf.withName(slotVariableName).variable(variable.shape(), getDType()) - val varName = variable.op().name() + val initializerOpName = defaultInitializerOpName(slotVariableName) + val initializerOp = tf.withName(initializerOpName) + .fill(tf.shape(variable), tf.dtypes.cast(tf.constant(initialValue), getDType())) - val variables: MutableMap> = slots.computeIfAbsent(slotName) { mutableMapOf() } - variables[varName] = slot - } + val assignOpName = defaultAssignOpName(slotVariableName) + val assignOp = tf.withName(assignOpName).assign(slot, initializerOp) - /** - * Gets the slot associated with the specified variable and slot name. - * - * @param [varName] The variable to lookup. - * @param [slotName] The slot name. - * @return The slot. - */ - protected fun getSlot( - varName: String, - slotName: String - ): Variable { - val variables: MutableMap> = slots[slotName]!! - return variables[varName]!! - } + graph.addOptimizerVariableInitializer(assignOp) + graph.addOptimizerVariable(slot) - /** - * Creates name for [variable] used in slot with name [slotName]. - */ - internal open fun createName(variable: Output, slotName: String): String { - return defaultOptimizerVariableName(variable.op().name() + "-" + slotName) + return slot } /** True, if optimizer is implemented for GPU. */ diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/RMSProp.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/RMSProp.kt index 5fea565ee..f313d85ed 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/RMSProp.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/RMSProp.kt @@ -6,12 +6,9 @@ package org.jetbrains.kotlinx.dl.api.core.optimizer import org.jetbrains.kotlinx.dl.api.core.KGraph -import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName import org.jetbrains.kotlinx.dl.api.core.util.getDType import org.tensorflow.Operand -import org.tensorflow.Output import org.tensorflow.op.Ops -import org.tensorflow.op.core.Constant import org.tensorflow.op.core.Gradients import org.tensorflow.op.core.Variable import org.tensorflow.op.train.ApplyCenteredRmsProp @@ -39,11 +36,6 @@ public class RMSProp( clipGradient: ClipGradientAction = NoClipGradient() ) : Optimizer(clipGradient) { - private lateinit var epsilonConstant: Constant - private lateinit var learningRateConst: Constant - private lateinit var decayConst: Constant - private lateinit var momentumConst: Constant - init { require(learningRate >= 0.0f) { "Learning rate $learningRate should be >= 0.0." } require(momentum >= 0.0f) { "Momentum $momentum should be >= 0.0." } @@ -57,23 +49,20 @@ public class RMSProp( weights: List>, gradients: Gradients ): List> { - val targets: MutableList> = - ArrayList() - - decayConst = tf.constant(decay, getDType()) - momentumConst = tf.constant(momentum, getDType()) - learningRateConst = tf.constant(learningRate, getDType()) - epsilonConstant = tf.constant(epsilon, getDType()) + val targets = mutableListOf>() - for (i in weights.indices) { - val variable = weights[i] - val varName = variable.ref().op().name() + val decayConst = tf.constant(decay, getDType()) + val momentumConst = tf.constant(momentum, getDType()) + val learningRateConst = tf.constant(learningRate, getDType()) + val epsilonConstant = tf.constant(epsilon, getDType()) - val rmsSlot: Variable = getSlot(varName, RMS) - val momentumSlot: Variable = getSlot(varName, MOMENTUM) + for ((i, variable) in weights.withIndex()) { + val output = variable.asOutput() + val rmsSlot = createSlot(RMS, output, tf, graph) + val momentumSlot = createSlot(MOMENTUM, output, tf, graph) if (centered) { - val mgSlot: Variable = getSlot(varName, MG) + val mgSlot = createSlot(MG, output, tf, graph) targets.add( tf.train.applyCenteredRmsProp( variable, @@ -107,35 +96,6 @@ public class RMSProp( return targets } - private fun createRMSPropSlot(graph: KGraph, tf: Ops, v: Output) { - val rmsInitializerName = defaultInitializerOpName(createName(v, RMS)) - - val rmsInitializer: Operand = tf.withName(rmsInitializerName) - .fill(tf.shape(v), tf.dtypes.cast(tf.constant(1.0f), getDType())) - createSlot(graph, tf, v.asOutput(), RMS, rmsInitializer) - - val momentumInitializerName = defaultInitializerOpName(createName(v, MOMENTUM)) - val momentumInitializer: Operand = tf.withName(momentumInitializerName) - .fill(tf.shape(v), tf.dtypes.cast(tf.constant(0.0f), getDType())) - createSlot(graph, tf, v.asOutput(), MOMENTUM, momentumInitializer) - - if (centered) { - val mgInitializerName = defaultInitializerOpName(createName(v, MG)) - val mgInitializer: Operand = tf.withName(mgInitializerName) - .fill( - tf.shape(v), - tf.constant(0.0f) - ) - createSlot(graph, tf, v.asOutput(), MG, mgInitializer) - } - } - - override fun createSlots(graph: KGraph, tf: Ops, variables: List>) { - for (v in variables) { - createRMSPropSlot(graph, tf, v.asOutput()) - } - } - override val optimizerName: String get() = "RMSProp" override val isRunningOnGPU: Boolean get() = true