Skip to content

Commit 93602fa

Browse files
Alena KastsiukavetsHelen1987
Alena Kastsiukavets
authored andcommitted
#Fix Windows silent cleanup issue
# Why is this change needed? Customers are suffering from "Agent occasionally stops working" issue. # How does it address the issue? Issue is caused by Windows silent cleanup of %WinDir%\Temp folder. Move certificate location to %PROGRAMDATA% # How was this tested ? bb release + windows integration tests cr https://code.amazon.com/reviews/CR-20016442
1 parent 940e774 commit 93602fa

File tree

8 files changed

+192
-26
lines changed

8 files changed

+192
-26
lines changed

bin/update

+2-1
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,8 @@ EOF
326326
end
327327

328328
def setup_windows_certificates
329-
cert_dir = File.expand_path(File.join(File.dirname(__FILE__), '..\certs'))
329+
app_root_folder = File.join(ENV['PROGRAMDATA'], "Amazon/CodeDeploy")
330+
cert_dir = File.expand_path(File.join(app_root_folder, 'certs'))
330331
@log.info("Setting up windows certificates from cert directory #{cert_dir}")
331332
ENV['AWS_SSL_CA_DIRECTORY'] = File.join(cert_dir, 'ca-bundle.crt')
332333
ENV['SSL_CERT_FILE'] = File.join(cert_dir, 'ca-bundle.crt')

features/step_definitions/step_constants.rb

+2-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ def self.current_aws_account
1111
end
1212

1313
def self.configure_windows_certificate
14-
cert_dir = File.expand_path(File.join(File.dirname(__FILE__), '..\..\certs'))
14+
app_root_folder = File.join(ENV['PROGRAMDATA'], "Amazon/CodeDeploy")
15+
cert_dir = File.expand_path(File.join(app_root_folder, 'certs'))
1516
Aws.config[:ssl_ca_bundle] = File.join(cert_dir, 'windows-ca-bundle.crt')
1617
ENV['AWS_SSL_CA_DIRECTORY'] = File.join(cert_dir, 'windows-ca-bundle.crt')
1718
ENV['SSL_CERT_FILE'] = File.join(cert_dir, 'windows-ca-bundle.crt')

lib/aws/codedeploy/local/deployer.rb

+2-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ def initialize(configuration_file_location = CONF_DEFAULT_LOCATION)
5858
end
5959

6060
def self.configure_windows_certificate
61-
cert_dir = File.expand_path(File.join(File.dirname(__FILE__), '..\..\..\..\certs'))
61+
app_root_folder = File.join(ENV['PROGRAMDATA'], "Amazon/CodeDeploy")
62+
cert_dir = File.join(app_root_folder, 'certs')
6263
Aws.config[:ssl_ca_bundle] = File.join(cert_dir, 'windows-ca-bundle.crt')
6364
ENV['AWS_SSL_CA_DIRECTORY'] = File.join(cert_dir, 'windows-ca-bundle.crt')
6465
ENV['SSL_CERT_FILE'] = File.join(cert_dir, 'windows-ca-bundle.crt')

lib/instance_agent/agent/base.rb

+5-3
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,11 @@ def run
2828
perform
2929
@error_count = 0
3030
rescue Seahorse::Client::NetworkingError => e
31-
log(:error, "Failed to execute the command. Your certificates might have been deleted" )
32-
# TODO: verify error message is "certificate verify failed"
33-
raise e
31+
log(:error, "Network error: " + e.inspect)
32+
if e.message.include? "certificate verify failed"
33+
log(:error, "Failed to execute the command. Your certificates might have been deleted" )
34+
end
35+
@error_count = @error_count.to_i + 1
3436
rescue Aws::Errors::MissingCredentialsError
3537
log(:error, "Missing credentials - please check if this instance was started with an IAM instance profile")
3638
@error_count = @error_count.to_i + 1

lib/winagent.rb

+9-20
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def initialize
2121
@app_root_folder = File.join(ENV['PROGRAMDATA'], "Amazon/CodeDeploy")
2222
InstanceAgent::Platform.util = InstanceAgent::WindowsUtil
2323

24-
cert_dir = File.expand_path(File.join(File.dirname(__FILE__), '..\certs'))
24+
cert_dir = File.join(@app_root_folder, 'certs')
2525
Aws.config[:ssl_ca_bundle] = File.join(cert_dir, 'ca-bundle.crt')
2626
ENV['AWS_SSL_CA_DIRECTORY'] = File.join(cert_dir, 'ca-bundle.crt')
2727
ENV['SSL_CERT_FILE'] = File.join(cert_dir, 'ca-bundle.crt')
@@ -34,10 +34,9 @@ def description
3434

3535
def service_main
3636
read_config
37-
@attempt_count = 0
3837
log(:info, 'started')
39-
shutdown_flag = false
40-
while running? && !shutdown_flag
38+
shutdown_flag = false
39+
while running? && !shutdown_flag
4140
with_error_handling do
4241
# Initialize the poller only once
4342
begin
@@ -61,15 +60,15 @@ def service_stop
6160
log(:info, 'stopping the agent')
6261
@polling_mutex.synchronize do
6362
@runner.graceful_shutdown
64-
log(:info, 'agent exiting now')
63+
log(:info, 'command execution threads shutdown, agent exiting now')
6564
end
6665
end
6766

6867
def log(severity, message)
69-
raise ArgumentError, "Unknown severity #{severity.inspect}" unless InstanceAgent::Log::SEVERITIES.include?(severity.to_s)
70-
InstanceAgent::Log.send(severity.to_sym, "#{description}: #{message}")
68+
raise ArgumentError, "Unknown severity #{severity.inspect}" unless InstanceAgent::Log::SEVERITIES.include?(severity.to_s)
69+
InstanceAgent::Log.send(severity.to_sym, "#{description}: #{message}")
7170
end
72-
71+
7372
def expand_conf_path(key)
7473
tmp = InstanceAgent::Config.config[key.to_sym]
7574
InstanceAgent::Config.config(key.to_sym => File.join(ENV['PROGRAMDATA'], tmp)) unless Pathname.new(tmp).absolute?
@@ -78,7 +77,7 @@ def expand_conf_path(key)
7877
def read_config
7978
default_config = File.join(@app_root_folder, "conf.yml")
8079
InstanceAgent::Config.config({:config_file => default_config,
81-
:on_premises_config_file => File.join(default_root, "conf.onpremises.yml")})
80+
:on_premises_config_file => File.join(@app_root_folder, "conf.onpremises.yml")})
8281
InstanceAgent::Config.load_config
8382

8483
expand_conf_path(:root_dir)
@@ -89,23 +88,13 @@ def read_config
8988

9089
def with_error_handling
9190
yield
92-
rescue Seahorse::Client::NetworkingError => e
93-
@attempt_count = @attempt_count + 1
94-
if @attempt_count > 3
95-
log(:error, "Failed to recover after certificate issue:" + e.inspect)
96-
exit
97-
end
98-
log(:error, "Custom:" + e.inspect)
99-
# try to copy certs from application root folder
100-
@certs_backup_folder = File.join(@app_root_folder, "certs/.")
101-
FileUtils.cp_r(@certs_backup_folder, @cert_dir)
10291
rescue SocketError => e
10392
log(:info, "#{description}: failed to run as the connection failed! #{e.class} - #{e.message} - #{e.backtrace.join("\n")}")
10493
sleep InstanceAgent::Config.config[:wait_after_connection_problem]
10594
rescue Exception => e
10695
if (e.message.to_s.match(/throttle/i) || e.message.to_s.match(/rateexceeded/i) rescue false)
10796
log(:error, "#{description}: ran into throttling - waiting for #{InstanceAgent::Config.config[:wait_after_throttle_error]}s until retrying")
108-
sleep InstanceAgent::Config.config[:wait_after_throttle_error]
97+
sleep InstanceAgent::Config.config[:wait_after_throttle_error]
10998
else
11099
log(:error, "#{description}: error during start or run: #{e.class} - #{e.message} - #{e.backtrace.join("\n")}")
111100
sleep 5

test/instance_agent/agent/base_test.rb

+23
Original file line numberDiff line numberDiff line change
@@ -58,5 +58,28 @@ class InstanceAgentBaseTest < InstanceAgentTestCase
5858
assert_nothing_raised { @base.run }
5959
end
6060
end
61+
62+
context 'when certificates are lost' do
63+
64+
should 'continue to execute' do
65+
CERT_VERIFICATION_FAILED_MESSAGE = 'SSL_connect returned=1 errno=0 state=error: certificate verify failed'
66+
@base.stubs(:perform).raises(Seahorse::Client::NetworkingError.new(Exception.new(CERT_VERIFICATION_FAILED_MESSAGE)))
67+
68+
@base.run
69+
end
70+
71+
end
72+
73+
context 'when network error is thrown' do
74+
75+
should 'continue to execute' do
76+
@base.stubs(:perform).raises(Seahorse::Client::NetworkingError.new(Exception.new('random message')))
77+
78+
@base.run
79+
end
80+
81+
end
82+
6183
end
84+
6285
end
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
require 'test_helper'
2+
3+
class Daemon
4+
def running?
5+
true
6+
end
7+
8+
def self.mainloop *args, &block
9+
false
10+
end
11+
end
12+
13+
require 'wrapper/test_wrapper_winagent'
14+
15+
class WinAgentTestClass < InstanceAgentTestCase
16+
context 'Win agent shell try to start agent' do
17+
18+
setup do
19+
ENV.expects(:[]).at_least_once.returns("")
20+
21+
@fake_runner = mock()
22+
InstanceAgent::Plugins::CodeDeployPlugin::CommandPoller.stubs(:runner).returns(@fake_runner)
23+
24+
logger_mock = mock()
25+
::ProcessManager::Log.stubs(:init).returns(logger_mock)
26+
27+
InstanceAgent::Config.expects(:load_config)
28+
InstanceAgent::Config.config.expects(:[]).with(:wait_between_runs).at_most(5).returns("0")
29+
InstanceAgent::Config.config.expects(:[]).at_least_once.returns("")
30+
end
31+
32+
should 'starts succesfully' do
33+
@fake_runner.stubs(:run).times(2)
34+
FileUtils.expects(:cp_r).never
35+
@fake_runner.expects(:graceful_shutdown).never
36+
37+
agent = InstanceAgentService.new
38+
agent.expects(:running?).times(3).returns(true, true, false)
39+
40+
agent.service_main
41+
end
42+
43+
end
44+
end

test/wrapper/test_wrapper_winagent.rb

+105
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
# we can't test cross-platform so remove mentioning of windows libraries for tests https://github.com/chefspec/chefspec/issues/405
2+
#require 'win32/daemon'
3+
require 'core_ext'
4+
require 'aws-sdk-core'
5+
require 'process_manager'
6+
7+
# There's something strange about how Orca handles implicit requires.
8+
# We have to explicitly require everything in advance or we'll get uninitialized constant failures.
9+
require 'instance_agent/agent/base'
10+
require 'instance_agent/config'
11+
require 'instance_agent/log'
12+
require 'instance_agent/platform'
13+
require 'instance_agent/platform/windows_util'
14+
require 'instance_agent/plugins/codedeploy/register_plugin'
15+
require 'pathname'
16+
17+
#include Win32
18+
19+
class InstanceAgentService < Daemon
20+
21+
def initialize
22+
@app_root_folder = File.join(ENV['PROGRAMDATA'], "Amazon/CodeDeploy")
23+
InstanceAgent::Platform.util = InstanceAgent::WindowsUtil
24+
cert_dir = File.join(@app_root_folder, 'certs')
25+
Aws.config[:ssl_ca_bundle] = File.join(cert_dir, 'ca-bundle.crt')
26+
ENV['AWS_SSL_CA_DIRECTORY'] = File.join(cert_dir, 'ca-bundle.crt')
27+
ENV['SSL_CERT_FILE'] = File.join(cert_dir, 'ca-bundle.crt')
28+
@polling_mutex = Mutex.new
29+
end
30+
31+
def description
32+
"CodeDeploy Instance Agent Service"
33+
end
34+
35+
def service_main
36+
read_config
37+
log(:info, 'started')
38+
shutdown_flag = false
39+
while running? && !shutdown_flag
40+
with_error_handling do
41+
# Initialize the poller only once
42+
begin
43+
@polling_mutex.synchronize do
44+
@runner ||= InstanceAgent::Plugins::CodeDeployPlugin::CommandPoller.runner
45+
@runner.run
46+
end
47+
rescue SystemExit
48+
service_stop
49+
shutdown_flag = true
50+
end
51+
sleep InstanceAgent::Config.config[:wait_between_runs].to_i
52+
end
53+
end
54+
if shutdown_flag
55+
exit!
56+
end
57+
end
58+
59+
def service_stop
60+
log(:info, 'stopping the agent')
61+
@polling_mutex.synchronize do
62+
@runner.graceful_shutdown
63+
log(:info, 'command execution threads shutdown, agent exiting now')
64+
end
65+
end
66+
67+
def log(severity, message)
68+
raise ArgumentError, "Unknown severity #{severity.inspect}" unless InstanceAgent::Log::SEVERITIES.include?(severity.to_s)
69+
InstanceAgent::Log.send(severity.to_sym, "#{description}: #{message}")
70+
end
71+
72+
def expand_conf_path(key)
73+
tmp = InstanceAgent::Config.config[key.to_sym]
74+
InstanceAgent::Config.config(key.to_sym => File.join(ENV['PROGRAMDATA'], tmp)) unless Pathname.new(tmp).absolute?
75+
end
76+
77+
def read_config
78+
default_config = File.join(@app_root_folder, "conf.yml")
79+
InstanceAgent::Config.config({:config_file => default_config,
80+
:on_premises_config_file => File.join(@app_root_folder, "conf.onpremises.yml")})
81+
InstanceAgent::Config.load_config
82+
83+
expand_conf_path(:root_dir)
84+
expand_conf_path(:log_dir)
85+
86+
InstanceAgent::Log.init(File.join(InstanceAgent::Config.config[:log_dir], "codedeploy-agent-log.txt"))
87+
end
88+
89+
def with_error_handling
90+
yield
91+
rescue SocketError => e
92+
log(:info, "#{description}: failed to run as the connection failed! #{e.class} - #{e.message} - #{e.backtrace.join("\n")}")
93+
sleep InstanceAgent::Config.config[:wait_after_connection_problem]
94+
rescue Exception => e
95+
if (e.message.to_s.match(/throttle/i) || e.message.to_s.match(/rateexceeded/i) rescue false)
96+
log(:error, "#{description}: ran into throttling - waiting for #{InstanceAgent::Config.config[:wait_after_throttle_error]}s until retrying")
97+
sleep InstanceAgent::Config.config[:wait_after_throttle_error]
98+
else
99+
log(:error, "#{description}: error during start or run: #{e.class} - #{e.message} - #{e.backtrace.join("\n")}")
100+
sleep 5
101+
end
102+
end
103+
end
104+
105+
InstanceAgentService.mainloop unless defined?(Ocra)

0 commit comments

Comments
 (0)