|
| 1 | +# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 |
| 2 | +# For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt |
| 3 | + |
| 4 | +"""Raw data collector for coverage.py.""" |
| 5 | + |
| 6 | +from __future__ import annotations |
| 7 | + |
| 8 | +import atexit |
| 9 | +import dataclasses |
| 10 | +import dis |
| 11 | +import inspect |
| 12 | +import os |
| 13 | +import os.path |
| 14 | +import re |
| 15 | +import sys |
| 16 | +import threading |
| 17 | +import traceback |
| 18 | + |
| 19 | +from types import CodeType, FrameType, ModuleType |
| 20 | +from typing import Any, Callable, Dict, List, Optional, Set, Tuple, cast |
| 21 | + |
| 22 | +#from coverage.debug import short_stack |
| 23 | +from coverage.types import ( |
| 24 | + TArc, TFileDisposition, TLineNo, TTraceData, TTraceFileData, TTraceFn, |
| 25 | + TTracer, TWarnFn, |
| 26 | +) |
| 27 | + |
| 28 | +# When running meta-coverage, this file can try to trace itself, which confuses |
| 29 | +# everything. Don't trace ourselves. |
| 30 | + |
| 31 | +THIS_FILE = __file__.rstrip("co") |
| 32 | + |
| 33 | +seen_threads = set() |
| 34 | + |
| 35 | +def log(msg): |
| 36 | + return |
| 37 | + # Thread ids are reused across processes? Make a shorter number more likely |
| 38 | + # to be unique. |
| 39 | + pid = os.getpid() |
| 40 | + tid = (os.getpid() * threading.current_thread().ident) % 9_999_991 |
| 41 | + tid = f"{tid:07d}" |
| 42 | + if tid not in seen_threads: |
| 43 | + seen_threads.add(tid) |
| 44 | + log(f"New thread {tid}:\n{short_stack(full=True)}") |
| 45 | + for filename in [ |
| 46 | + "/tmp/pan.out", |
| 47 | + f"/tmp/pan-{pid}.out", |
| 48 | + f"/tmp/pan-{pid}-{tid}.out", |
| 49 | + ]: |
| 50 | + with open(filename, "a") as f: |
| 51 | + print(f"{pid}:{tid}: {msg}", file=f, flush=True) |
| 52 | + |
| 53 | +FILENAME_REGEXES = [ |
| 54 | + (r"/private/var/folders/.*/pytest-of-.*/pytest-\d+", "tmp:"), |
| 55 | +] |
| 56 | +FILENAME_SUBS = [] |
| 57 | + |
| 58 | +def fname_repr(filename): |
| 59 | + if not FILENAME_SUBS: |
| 60 | + for pathdir in sys.path: |
| 61 | + FILENAME_SUBS.append((pathdir, "syspath:")) |
| 62 | + import coverage |
| 63 | + FILENAME_SUBS.append((os.path.dirname(coverage.__file__), "cov:")) |
| 64 | + FILENAME_SUBS.sort(key=(lambda pair: len(pair[0])), reverse=True) |
| 65 | + if filename is not None: |
| 66 | + for pat, sub in FILENAME_REGEXES: |
| 67 | + filename = re.sub(pat, sub, filename) |
| 68 | + for before, after in FILENAME_SUBS: |
| 69 | + filename = filename.replace(before, after) |
| 70 | + return repr(filename) |
| 71 | + |
| 72 | +def arg_repr(arg): |
| 73 | + if isinstance(arg, CodeType): |
| 74 | + arg_repr = f"<name={arg.co_name}, file={fname_repr(arg.co_filename)}#{arg.co_firstlineno}>" |
| 75 | + else: |
| 76 | + arg_repr = repr(arg) |
| 77 | + return arg_repr |
| 78 | + |
| 79 | +def short_stack(full=True): |
| 80 | + stack: Iterable[inspect.FrameInfo] = inspect.stack()[::-1] |
| 81 | + return "\n".join(f"{fi.function:>30s} : 0x{id(fi.frame):x} {fi.filename}:{fi.lineno}" for fi in stack) |
| 82 | + |
| 83 | +def panopticon(*names): |
| 84 | + def _decorator(meth): |
| 85 | + def _wrapped(self, *args): |
| 86 | + try: |
| 87 | + # log("stack:\n" + short_stack()) |
| 88 | + # args_reprs = [] |
| 89 | + # for name, arg in zip(names, args): |
| 90 | + # if name is None: |
| 91 | + # continue |
| 92 | + # args_reprs.append(f"{name}={arg_repr(arg)}") |
| 93 | + # log(f"{id(self)}:{meth.__name__}({', '.join(args_reprs)})") |
| 94 | + ret = meth(self, *args) |
| 95 | + # log(f" end {id(self)}:{meth.__name__}({', '.join(args_reprs)})") |
| 96 | + return ret |
| 97 | + except Exception as exc: |
| 98 | + log(f"{exc.__class__.__name__}: {exc}") |
| 99 | + with open("/tmp/pan.out", "a") as f: |
| 100 | + traceback.print_exception(exc, file=f) |
| 101 | + sys.monitoring.set_events(sys.monitoring.COVERAGE_ID, 0) |
| 102 | + raise |
| 103 | + return _wrapped |
| 104 | + return _decorator |
| 105 | + |
| 106 | + |
| 107 | +@dataclasses.dataclass |
| 108 | +class CodeInfo: |
| 109 | + tracing: bool |
| 110 | + file_data: Optional[TTraceFileData] |
| 111 | + byte_to_line: Dict[int, int] |
| 112 | + |
| 113 | + |
| 114 | +def bytes_to_lines(code): |
| 115 | + b2l = {} |
| 116 | + cur_line = None |
| 117 | + for inst in dis.get_instructions(code): |
| 118 | + if inst.starts_line is not None: |
| 119 | + cur_line = inst.starts_line |
| 120 | + b2l[inst.offset] = cur_line |
| 121 | + log(f" --> bytes_to_lines: {b2l!r}") |
| 122 | + return b2l |
| 123 | + |
| 124 | +class Pep669Tracer(TTracer): |
| 125 | + """Python implementation of the raw data tracer for PEP669 implementations.""" |
| 126 | + # One of these will be used across threads. Be careful. |
| 127 | + |
| 128 | + def __init__(self) -> None: |
| 129 | + log(f"Pep669Tracer.__init__: @{id(self)}\n{short_stack()}") |
| 130 | + # pylint: disable=super-init-not-called |
| 131 | + # Attributes set from the collector: |
| 132 | + self.data: TTraceData |
| 133 | + self.trace_arcs = False |
| 134 | + self.should_trace: Callable[[str, FrameType], TFileDisposition] |
| 135 | + self.should_trace_cache: Dict[str, Optional[TFileDisposition]] |
| 136 | + self.should_start_context: Optional[Callable[[FrameType], Optional[str]]] = None |
| 137 | + self.switch_context: Optional[Callable[[Optional[str]], None]] = None |
| 138 | + self.warn: TWarnFn |
| 139 | + |
| 140 | + # The threading module to use, if any. |
| 141 | + self.threading: Optional[ModuleType] = None |
| 142 | + |
| 143 | + self.code_infos: Dict[CodeType, CodeInfo] = {} |
| 144 | + self.last_lines: Dict[FrameType, int] = {} |
| 145 | + self.stats = { |
| 146 | + "starts": 0, |
| 147 | + } |
| 148 | + |
| 149 | + self.thread: Optional[threading.Thread] = None |
| 150 | + self.stopped = False |
| 151 | + self._activity = False |
| 152 | + |
| 153 | + self.in_atexit = False |
| 154 | + # On exit, self.in_atexit = True |
| 155 | + atexit.register(setattr, self, "in_atexit", True) |
| 156 | + |
| 157 | + def __repr__(self) -> str: |
| 158 | + me = id(self) |
| 159 | + points = sum(len(v) for v in self.data.values()) |
| 160 | + files = len(self.data) |
| 161 | + return f"<Pep669Tracer at 0x{me:x}: {points} data points in {files} files>" |
| 162 | + |
| 163 | + def start(self) -> TTraceFn: # TODO: wrong return type |
| 164 | + """Start this Tracer.""" |
| 165 | + self.stopped = False |
| 166 | + if self.threading: |
| 167 | + if self.thread is None: |
| 168 | + self.thread = self.threading.current_thread() |
| 169 | + else: |
| 170 | + if self.thread.ident != self.threading.current_thread().ident: |
| 171 | + # Re-starting from a different thread!? Don't set the trace |
| 172 | + # function, but we are marked as running again, so maybe it |
| 173 | + # will be ok? |
| 174 | + 1/0 |
| 175 | + return self._cached_bound_method_trace |
| 176 | + |
| 177 | + self.myid = sys.monitoring.COVERAGE_ID |
| 178 | + sys.monitoring.use_tool_id(self.myid, "coverage.py") |
| 179 | + events = sys.monitoring.events |
| 180 | + sys.monitoring.set_events( |
| 181 | + self.myid, |
| 182 | + events.PY_START | events.PY_RETURN | events.PY_RESUME | events.PY_YIELD | events.PY_UNWIND, |
| 183 | + ) |
| 184 | + sys.monitoring.register_callback(self.myid, events.PY_START, self.sysmon_py_start) |
| 185 | + sys.monitoring.register_callback(self.myid, events.PY_RESUME, self.sysmon_py_resume) |
| 186 | + sys.monitoring.register_callback(self.myid, events.PY_RETURN, self.sysmon_py_return) |
| 187 | + sys.monitoring.register_callback(self.myid, events.PY_YIELD, self.sysmon_py_yield) |
| 188 | + sys.monitoring.register_callback(self.myid, events.PY_UNWIND, self.sysmon_py_unwind) |
| 189 | + sys.monitoring.register_callback(self.myid, events.LINE, self.sysmon_line) |
| 190 | + sys.monitoring.register_callback(self.myid, events.BRANCH, self.sysmon_branch) |
| 191 | + sys.monitoring.register_callback(self.myid, events.JUMP, self.sysmon_jump) |
| 192 | + |
| 193 | + def stop(self) -> None: |
| 194 | + """Stop this Tracer.""" |
| 195 | + sys.monitoring.set_events(self.myid, 0) |
| 196 | + sys.monitoring.free_tool_id(self.myid) |
| 197 | + |
| 198 | + def activity(self) -> bool: |
| 199 | + """Has there been any activity?""" |
| 200 | + return self._activity |
| 201 | + |
| 202 | + def reset_activity(self) -> None: |
| 203 | + """Reset the activity() flag.""" |
| 204 | + self._activity = False |
| 205 | + |
| 206 | + def get_stats(self) -> Optional[Dict[str, int]]: |
| 207 | + """Return a dictionary of statistics, or None.""" |
| 208 | + return None |
| 209 | + return self.stats | { |
| 210 | + "codes": len(self.code_infos), |
| 211 | + "codes_tracing": sum(1 for ci in self.code_infos.values() if ci.tracing), |
| 212 | + } |
| 213 | + |
| 214 | + def callers_frame(self) -> FrameType: |
| 215 | + return inspect.currentframe().f_back.f_back.f_back |
| 216 | + |
| 217 | + @panopticon("code", "@") |
| 218 | + def sysmon_py_start(self, code, instruction_offset: int): |
| 219 | + # Entering a new frame. Decide if we should trace in this file. |
| 220 | + self._activity = True |
| 221 | + self.stats["starts"] += 1 |
| 222 | + |
| 223 | + code_info = self.code_infos.get(code) |
| 224 | + if code_info is not None: |
| 225 | + tracing_code = code_info.tracing |
| 226 | + file_data = code_info.file_data |
| 227 | + else: |
| 228 | + tracing_code = file_data = None |
| 229 | + |
| 230 | + if tracing_code is None: |
| 231 | + filename = code.co_filename |
| 232 | + disp = self.should_trace_cache.get(filename) |
| 233 | + if disp is None: |
| 234 | + frame = inspect.currentframe().f_back.f_back |
| 235 | + disp = self.should_trace(filename, frame) |
| 236 | + self.should_trace_cache[filename] = disp |
| 237 | + |
| 238 | + tracing_code = disp.trace |
| 239 | + if tracing_code: |
| 240 | + tracename = disp.source_filename |
| 241 | + assert tracename is not None |
| 242 | + if tracename not in self.data: |
| 243 | + self.data[tracename] = set() # type: ignore[assignment] |
| 244 | + file_data = self.data[tracename] |
| 245 | + b2l = bytes_to_lines(code) |
| 246 | + else: |
| 247 | + file_data = None |
| 248 | + b2l = None |
| 249 | + |
| 250 | + self.code_infos[code] = CodeInfo( |
| 251 | + tracing=tracing_code, |
| 252 | + file_data=file_data, |
| 253 | + byte_to_line=b2l, |
| 254 | + ) |
| 255 | + |
| 256 | + if tracing_code: |
| 257 | + events = sys.monitoring.events |
| 258 | + log(f"set_local_events(code={arg_repr(code)})") |
| 259 | + sys.monitoring.set_local_events( |
| 260 | + self.myid, |
| 261 | + code, |
| 262 | + sys.monitoring.events.LINE | |
| 263 | + sys.monitoring.events.BRANCH | |
| 264 | + sys.monitoring.events.JUMP, |
| 265 | + ) |
| 266 | + |
| 267 | + if tracing_code: |
| 268 | + frame = self.callers_frame() |
| 269 | + self.last_lines[frame] = -code.co_firstlineno |
| 270 | + log(f" {file_data=}") |
| 271 | + |
| 272 | + @panopticon("code", "@") |
| 273 | + def sysmon_py_resume(self, code, instruction_offset: int): |
| 274 | + frame = self.callers_frame() |
| 275 | + self.last_lines[frame] = frame.f_lineno |
| 276 | + |
| 277 | + @panopticon("code", "@", None) |
| 278 | + def sysmon_py_return(self, code, instruction_offset: int, retval: object): |
| 279 | + frame = self.callers_frame() |
| 280 | + code_info = self.code_infos.get(code) |
| 281 | + if code_info is not None and code_info.file_data is not None: |
| 282 | + if self.trace_arcs: |
| 283 | + arc = (self.last_lines[frame], -code.co_firstlineno) |
| 284 | + cast(Set[TArc], code_info.file_data).add(arc) |
| 285 | + log(f" add1({arc=})") |
| 286 | + |
| 287 | + # Leaving this function, no need for the frame any more. |
| 288 | + log(f" popping frame 0x{id(frame):x}") |
| 289 | + self.last_lines.pop(frame, None) |
| 290 | + |
| 291 | + @panopticon("code", "@", None) |
| 292 | + def sysmon_py_yield(self, code, instruction_offset: int, retval: object): |
| 293 | + pass |
| 294 | + |
| 295 | + @panopticon("code", "@", None) |
| 296 | + def sysmon_py_unwind(self, code, instruction_offset: int, exception): |
| 297 | + frame = self.callers_frame() |
| 298 | + code_info = self.code_infos[code] |
| 299 | + if code_info.file_data is not None: |
| 300 | + if self.trace_arcs: |
| 301 | + arc = (self.last_lines[frame], -code.co_firstlineno) |
| 302 | + cast(Set[TArc], code_info.file_data).add(arc) |
| 303 | + log(f" add3({arc=})") |
| 304 | + |
| 305 | + # Leaving this function. |
| 306 | + self.last_lines.pop(frame, None) |
| 307 | + |
| 308 | + @panopticon("code", "line") |
| 309 | + def sysmon_line(self, code, line_number: int): |
| 310 | + frame = self.callers_frame() |
| 311 | + code_info = self.code_infos[code] |
| 312 | + if code_info.file_data is not None: |
| 313 | + if self.trace_arcs: |
| 314 | + arc = (self.last_lines[frame], line_number) |
| 315 | + cast(Set[TArc], code_info.file_data).add(arc) |
| 316 | + log(f" add4({arc=})") |
| 317 | + else: |
| 318 | + cast(Set[TLineNo], code_info.file_data).add(line_number) |
| 319 | + log(f" add5({line_number=})") |
| 320 | + self.last_lines[frame] = line_number |
| 321 | + |
| 322 | + @panopticon("code", "from@", "to@") |
| 323 | + def sysmon_branch(self, code, instruction_offset: int, destination_offset: int): |
| 324 | + ... |
| 325 | + |
| 326 | + @panopticon("code", "from@", "to@") |
| 327 | + def sysmon_jump(self, code, instruction_offset: int, destination_offset: int): |
| 328 | + ... |
0 commit comments