Turn down gain on GAIL discriminator output (#4762)

andrewcoh · web-flow · commit 0ff8f7782a81 · 2020-12-17T12:58:47.000-05:00
diff --git a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
@@ -80,7 +80,7 @@ def test_reward_decreases(
     init_reward_expert = gail_rp.evaluate(buffer_expert)[0]
     init_reward_policy = gail_rp.evaluate(buffer_policy)[0]
 
-    for _ in range(10):
+    for _ in range(20):
         gail_rp.update(buffer_policy)
         reward_expert = gail_rp.evaluate(buffer_expert)[0]
         reward_policy = gail_rp.evaluate(buffer_policy)[0]
diff --git a/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py b/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
@@ -103,7 +103,7 @@ def __init__(self, specs: BehaviorSpec, settings: GAILSettings) -> None:
             )
 
         self._estimator = torch.nn.Sequential(
-            linear_layer(estimator_input_size, 1), torch.nn.Sigmoid()
+            linear_layer(estimator_input_size, 1, kernel_gain=0.2), torch.nn.Sigmoid()
         )
 
     def get_action_input(self, mini_batch: AgentBuffer) -> torch.Tensor:

Original file line number	Diff line number	Diff line change
`@@ -103,7 +103,7 @@ def __init__(self, specs: BehaviorSpec, settings: GAILSettings) -> None:`
`103`	`103`	`)`
`104`	`104`
`105`	`105`	`self._estimator = torch.nn.Sequential(`
`106`		`- linear_layer(estimator_input_size, 1), torch.nn.Sigmoid()`
	`106`	`+ linear_layer(estimator_input_size, 1, kernel_gain=0.2), torch.nn.Sigmoid()`
`107`	`107`	`)`
`108`	`108`
`109`	`109`	`def get_action_input(self, mini_batch: AgentBuffer) -> torch.Tensor:`