@@ -63,12 +63,12 @@ def softmax(x, axis=-1):
63
63
The input values in are the log-odds of the resulting probability.
64
64
65
65
Args:
66
- x : Input tensor.
67
- axis: Integer, axis along which the softmax normalization is applied.
66
+ x : Input tensor.
67
+ axis: Integer, axis along which the softmax normalization is applied.
68
68
69
69
Returns:
70
- Tensor, output of softmax transformation (all values are non-negative
71
- and sum to 1).
70
+ Tensor, output of softmax transformation (all values are non-negative
71
+ and sum to 1).
72
72
73
73
Examples:
74
74
@@ -84,7 +84,22 @@ def softmax(x, axis=-1):
84
84
>>> layer = tf.keras.layers.Dense(32,
85
85
... activation=tf.keras.activations.softmax)
86
86
"""
87
- return backend .softmax (x , axis )
87
+ if x .shape .rank <= 1 :
88
+ raise ValueError (
89
+ f"Cannot apply softmax to a tensor that is 1D. Received input: { x } "
90
+ )
91
+
92
+ if isinstance (axis , int ):
93
+ output = tf .nn .softmax (x , axis = axis )
94
+ else :
95
+ # nn.softmax does not support tuple axis.
96
+ numerator = tf .exp (x - tf .reduce_max (x , axis = axis , keepdims = True ))
97
+ denominator = tf .reduce_sum (numerator , axis = axis , keepdims = True )
98
+ output = numerator / denominator
99
+
100
+ # Cache the logits to use for crossentropy loss.
101
+ output ._keras_logits = x
102
+ return output
88
103
89
104
90
105
@keras_export ("keras.activations.elu" )
@@ -123,11 +138,11 @@ def elu(x, alpha=1.0):
123
138
Args:
124
139
x: Input tensor.
125
140
alpha: A scalar, slope of negative section. `alpha` controls the value
126
- to which an ELU saturates for negative net inputs.
141
+ to which an ELU saturates for negative net inputs.
127
142
128
143
Returns:
129
144
The exponential linear unit (ELU) activation function: `x` if `x > 0`
130
- and `alpha * (exp(x) - 1)` if `x < 0`.
145
+ and `alpha * (exp(x) - 1)` if `x < 0`.
131
146
132
147
133
148
Reference:
@@ -181,9 +196,9 @@ def selu(x):
181
196
182
197
Notes:
183
198
- To be used together with the
184
- `tf.keras.initializers.LecunNormal` initializer.
199
+ `tf.keras.initializers.LecunNormal` initializer.
185
200
- To be used together with the dropout variant
186
- `tf.keras.layers.AlphaDropout` (not regular dropout).
201
+ `tf.keras.layers.AlphaDropout` (not regular dropout).
187
202
188
203
References:
189
204
- [Klambauer et al., 2017](https://arxiv.org/abs/1706.02515)
@@ -260,7 +275,7 @@ def swish(x):
260
275
The swish activation applied to `x` (see reference paper for details).
261
276
262
277
Reference:
263
- - [Ramachandran et al., 2017](https://arxiv.org/abs/1710.05941)
278
+ - [Ramachandran et al., 2017](https://arxiv.org/abs/1710.05941)
264
279
"""
265
280
return tf .nn .silu (x )
266
281
@@ -292,16 +307,16 @@ def relu(x, alpha=0.0, max_value=None, threshold=0.0):
292
307
Args:
293
308
x: Input `tensor` or `variable`.
294
309
alpha: A `float` that governs the slope for values lower than the
295
- threshold.
310
+ threshold.
296
311
max_value: A `float` that sets the saturation threshold (the largest
297
- value the function will return).
312
+ value the function will return).
298
313
threshold: A `float` giving the threshold value of the activation
299
- function below which values will be damped or set to zero.
314
+ function below which values will be damped or set to zero.
300
315
301
316
Returns:
302
- A `Tensor` representing the input tensor, transformed by the relu
303
- activation function. Tensor will be of the same shape and dtype of
304
- input `x`.
317
+ A `Tensor` representing the input tensor,
318
+ transformed by the relu activation function.
319
+ Tensor will be of the same shape and dtype of input `x`.
305
320
"""
306
321
return backend .relu (
307
322
x , alpha = alpha , max_value = max_value , threshold = threshold
@@ -343,8 +358,8 @@ def gelu(x, approximate=False):
343
358
if `approximate` is `False`.
344
359
345
360
Reference:
346
- - [Gaussian Error Linear Units (GELUs)](https://arxiv.org/abs/1606.08415)
347
- """ # noqa: E501
361
+ - [Gaussian Error Linear Units (GELUs)](https://arxiv.org/abs/1606.08415)
362
+ """
348
363
return tf .nn .gelu (x , approximate )
349
364
350
365
@@ -397,7 +412,10 @@ def sigmoid(x):
397
412
Returns:
398
413
Tensor with the sigmoid activation: `1 / (1 + exp(-x))`.
399
414
"""
400
- return backend .sigmoid (x )
415
+ output = tf .sigmoid (x )
416
+ # Cache the logits to use for crossentropy loss.
417
+ output ._keras_logits = x
418
+ return output
401
419
402
420
403
421
@keras_export ("keras.activations.exponential" )
@@ -441,11 +459,11 @@ def hard_sigmoid(x):
441
459
x: Input tensor.
442
460
443
461
Returns:
444
- The hard sigmoid activation, defined as:
462
+ The hard sigmoid activation, defined as:
445
463
446
- - `if x < -2.5: return 0`
447
- - `if x > 2.5: return 1`
448
- - `if -2.5 <= x <= 2.5: return 0.2 * x + 0.5`
464
+ - `if x < -2.5: return 0`
465
+ - `if x > 2.5: return 1`
466
+ - `if -2.5 <= x <= 2.5: return 0.2 * x + 0.5`
449
467
"""
450
468
return backend .hard_sigmoid (x )
451
469
@@ -517,8 +535,6 @@ def serialize(activation, use_legacy_format=False):
517
535
518
536
Args:
519
537
activation : Function object.
520
- use_legacy_format: Boolean, whether to use the legacy format for
521
- serialization. Defaults to False.
522
538
523
539
Returns:
524
540
String denoting the name attribute of the input function
@@ -592,11 +608,9 @@ def deserialize(name, custom_objects=None, use_legacy_format=False):
592
608
"""Returns activation function given a string identifier.
593
609
594
610
Args:
595
- name: The name of the activation function.
596
- custom_objects: Optional `{function_name: function_obj}`
597
- dictionary listing user-provided activation functions.
598
- use_legacy_format: Boolean, whether to use the legacy format for
599
- deserialization. Defaults to False.
611
+ name: The name of the activation function.
612
+ custom_objects: Optional `{function_name: function_obj}`
613
+ dictionary listing user-provided activation functions.
600
614
601
615
Returns:
602
616
Corresponding activation function.
0 commit comments