-
-
Notifications
You must be signed in to change notification settings - Fork 31.9k
gh-87597: Decode subprocess output in text mode when timeout is hit #95579
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
fa8d19c
aecc55b
4b4b7eb
c4b1012
a2c7e06
9d2168e
98bff39
39393a8
34dfe5a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1245,11 +1245,39 @@ def _check_timeout(self, endtime, orig_timeout, stdout_seq, stderr_seq, | |
"""Convenience for checking if a timeout has expired.""" | ||
if endtime is None: | ||
return | ||
|
||
def translate_newlines_partial_output(data, encoding, errors): | ||
# Handle decoding the data considering it may be truncated | ||
# mid-codepoint (ignore the trailing partial codepoint). | ||
# See https://github.com/python/cpython/issues/87597 | ||
try: | ||
output = self._translate_newlines(data, encoding, errors) | ||
except UnicodeDecodeError as exc: | ||
if exc.end == len(data): | ||
output = self._translate_newlines(data[:exc.start], | ||
encoding, | ||
errors) | ||
else: | ||
raise | ||
return output | ||
|
||
if skip_check_and_raise or _time() > endtime: | ||
if stdout_seq is not None: | ||
stdout = b''.join(stdout_seq) | ||
if self.text_mode: | ||
stdout = translate_newlines_partial_output( | ||
stdout, self.stdout.encoding, self.stdout.errors) | ||
else: | ||
stdout = None | ||
Comment on lines
+1265
to
+1271
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe this whole pattern could go into the helper function? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the if/else around the None makes sense here as is, but the join and text_mode conditional could be done in-function. basically def join_and_maybe_decode(output_seq, data, encoding, errors):
output = b''.join(output_seq)
if self.text_mode:
try:
... # existing translate_newlines_partial_output code and here you'd have things like if stdout_seq is not None:
stdout = join_and_maybe_decode(stdout_seq, data, encoding, errors)
else:
stdout = None
# and the same for stderr |
||
if stderr_seq is not None: | ||
stderr = b''.join(stderr_seq) | ||
if self.text_mode: | ||
stderr = translate_newlines_partial_output( | ||
stderr, self.stderr.encoding, self.stderr.errors) | ||
else: | ||
stderr = None | ||
raise TimeoutExpired( | ||
self.args, orig_timeout, | ||
output=b''.join(stdout_seq) if stdout_seq else None, | ||
stderr=b''.join(stderr_seq) if stderr_seq else None) | ||
self.args, orig_timeout, output=stdout, stderr=stderr) | ||
|
||
|
||
def wait(self, timeout=None): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1129,6 +1129,37 @@ def test_universal_newlines_communicate_encodings(self): | |
stdout, stderr = popen.communicate(input='') | ||
self.assertEqual(stdout, '1\n2\n3\n4') | ||
|
||
@unittest.skipIf(mswindows, "behavior currently not supported on Windows") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what is needed for this to work on Windows? having inconsistent behavior in the returned stderr/stdout types on Timeout between the two platforms will make people's code difficult. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hah, I didn't even notice this. I'm guessing this should actually be "platforms that don't default to UTF-8 don't trigger the decoding error", which luckily is easily resolved by explicitly specifying that the encoding should be utf-8. |
||
def test_universal_newlines_timeout(self): | ||
with self.assertRaises(subprocess.TimeoutExpired) as c: | ||
p = subprocess.run( | ||
[ | ||
sys.executable, "-c", | ||
"import sys, time;" | ||
r"sys.stderr.buffer.write(b'foo \xc2\xa4 bar');" | ||
"sys.stderr.buffer.flush();" | ||
r"sys.stdout.buffer.write(b'foo \xc2');" | ||
"sys.stdout.buffer.flush();" | ||
"time.sleep(10);" | ||
], | ||
stdout=subprocess.PIPE, | ||
stderr=subprocess.PIPE, | ||
universal_newlines=True, | ||
timeout=3) | ||
self.assertEqual(c.exception.stdout, "foo ") | ||
self.assertEqual(c.exception.stderr, "foo ¤ bar") | ||
|
||
@unittest.skipIf(mswindows, "behavior currently not supported on Windows") | ||
def test_no_output_timeout(self): | ||
with self.assertRaises(subprocess.TimeoutExpired) as c: | ||
p = subprocess.run( | ||
[sys.executable, "-c", "import time; time.sleep(10)"], | ||
stdout=subprocess.PIPE, | ||
stderr=subprocess.PIPE, | ||
timeout=0.1) | ||
self.assertEqual(c.exception.stdout, b"") | ||
self.assertEqual(c.exception.stderr, b"") | ||
|
||
def test_communicate_errors(self): | ||
for errors, expected in [ | ||
('ignore', ''), | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Store decoded output from ``subprocess.run()`` on ``TimeoutExpired`` exception when using ``text=True`` mode. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This exception would be raised instead of the timeout error, which isn't going to be helpful. I considered three alternatives, but I think the last is the best:
from
itTimeoutExpired
class (so it learns to keep encoding/errors around and do the decode on demand)exc.start
here, then decode the rest with thereplace
error handler and concatenate itThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agreed, I like that latter strategy. I think it is important to have the
TimeoutExpired
be the primary error. Otherwise the caller could wrongly interpret theUnicodeDecodeError
as the process exiting naturally while producing undecodable output. If the process did emit undecodable data on its own, that isn't important in this situation.