@@ -12,7 +12,7 @@ module Timings
1212
1313using .. Core
1414using .. Compiler: - , + , :, Vector, length, first, empty!, push!, pop!, @inline ,
15- @inbounds , copy, backtrace
15+ @inbounds , copy, backtrace, _time_ns
1616
1717# What we record for any given frame we infer during type inference.
1818struct InferenceFrameInfo
5353Timing (mi_info, start_time, cur_start_time, time, children) = Timing (mi_info, start_time, cur_start_time, time, children, nothing )
5454Timing (mi_info, start_time) = Timing (mi_info, start_time, start_time, UInt64 (0 ), Timing[])
5555
56- _time_ns () = ccall (:jl_hrtime , UInt64, ())
57-
5856# We keep a stack of the Timings for each of the MethodInstances currently being timed.
5957# Since type inference currently operates via a depth-first search (during abstract
6058# evaluation), this vector operates like a call stack. The last node in _timings is the
@@ -93,7 +91,7 @@ If set to `true`, record per-method-instance timings within type inference in th
9391__set_measure_typeinf (onoff:: Bool ) = __measure_typeinf__[] = onoff
9492const __measure_typeinf__ = RefValue {Bool} (false )
9593
96- function finish! (interp:: AbstractInterpreter , caller:: InferenceState , validation_world:: UInt )
94+ function finish! (interp:: AbstractInterpreter , caller:: InferenceState , validation_world:: UInt , time_before :: UInt64 )
9795 result = caller. result
9896 opt = result. src
9997 if opt isa OptimizationState
@@ -139,9 +137,12 @@ function finish!(interp::AbstractInterpreter, caller::InferenceState, validation
139137 if ! @isdefined di
140138 di = DebugInfo (result. linfo)
141139 end
142- ccall (:jl_update_codeinst , Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Any, Any),
140+ time_now = _time_ns ()
141+ time_self_ns = caller. time_self_ns + (time_now - time_before)
142+ time_total = (time_now - caller. time_start - caller. time_paused) * 1e-9
143+ ccall (:jl_update_codeinst , Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Float64, Float64, Float64, Any, Any),
143144 ci, inferred_result, const_flag, first (result. valid_worlds), last (result. valid_worlds), encode_effects (result. ipo_effects),
144- result. analysis_results, di, edges)
145+ result. analysis_results, time_total, caller . time_caches, time_self_ns * 1e-9 , di, edges)
145146 engine_reject (interp, ci)
146147 if ! discard_src && isdefined (interp, :codegen ) && uncompressed isa CodeInfo
147148 # record that the caller could use this result to generate code when required, if desired, to avoid repeating n^2 work
@@ -182,8 +183,8 @@ function finish!(interp::AbstractInterpreter, mi::MethodInstance, ci::CodeInstan
182183 end
183184 ccall (:jl_fill_codeinst , Cvoid, (Any, Any, Any, Any, Int32, UInt, UInt, UInt32, Any, Any, Any),
184185 ci, rettype, exctype, nothing , const_flags, min_world, max_world, ipo_effects, nothing , di, edges)
185- ccall (:jl_update_codeinst , Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Any, Any),
186- ci, nothing , const_flag, min_world, max_world, ipo_effects, nothing , di, edges)
186+ ccall (:jl_update_codeinst , Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Float64, Float64, Float64, Any, Any),
187+ ci, nothing , const_flag, min_world, max_world, ipo_effects, nothing , 0.0 , 0.0 , 0.0 , di, edges)
187188 code_cache (interp)[mi] = ci
188189 if isdefined (interp, :codegen )
189190 interp. codegen[ci] = src
@@ -192,14 +193,14 @@ function finish!(interp::AbstractInterpreter, mi::MethodInstance, ci::CodeInstan
192193 return nothing
193194end
194195
195- function finish_nocycle (:: AbstractInterpreter , frame:: InferenceState )
196+ function finish_nocycle (:: AbstractInterpreter , frame:: InferenceState , time_before :: UInt64 )
196197 finishinfer! (frame, frame. interp, frame. cycleid)
197198 opt = frame. result. src
198199 if opt isa OptimizationState # implies `may_optimize(caller.interp) === true`
199200 optimize (frame. interp, opt, frame. result)
200201 end
201202 validation_world = get_world_counter ()
202- finish! (frame. interp, frame, validation_world)
203+ finish! (frame. interp, frame, validation_world, time_before )
203204 if isdefined (frame. result, :ci )
204205 # After validation, under the world_counter_lock, set max_world to typemax(UInt) for all dependencies
205206 # (recursively). From that point onward the ordinary backedge mechanism is responsible for maintaining
@@ -214,7 +215,7 @@ function finish_nocycle(::AbstractInterpreter, frame::InferenceState)
214215 return nothing
215216end
216217
217- function finish_cycle (:: AbstractInterpreter , frames:: Vector{AbsIntState} , cycleid:: Int )
218+ function finish_cycle (:: AbstractInterpreter , frames:: Vector{AbsIntState} , cycleid:: Int , time_before :: UInt64 )
218219 cycle_valid_worlds = WorldRange ()
219220 cycle_valid_effects = EFFECTS_TOTAL
220221 for frameid = cycleid: length (frames)
@@ -231,23 +232,45 @@ function finish_cycle(::AbstractInterpreter, frames::Vector{AbsIntState}, cyclei
231232 caller = frames[frameid]:: InferenceState
232233 adjust_cycle_frame! (caller, cycle_valid_worlds, cycle_valid_effects)
233234 finishinfer! (caller, caller. interp, cycleid)
235+ time_now = _time_ns ()
236+ caller. time_self_ns += (time_now - time_before)
237+ time_before = time_now
234238 end
239+ time_caches = 0.0 # the total and adjusted time of every entry in the cycle are the same
240+ time_paused = UInt64 (0 )
235241 for frameid = cycleid: length (frames)
236242 caller = frames[frameid]:: InferenceState
237243 opt = caller. result. src
238244 if opt isa OptimizationState # implies `may_optimize(caller.interp) === true`
239245 optimize (caller. interp, opt, caller. result)
246+ time_now = _time_ns ()
247+ caller. time_self_ns += (time_now - time_before)
248+ time_before = time_now
240249 end
250+ time_caches += caller. time_caches
251+ time_paused += caller. time_paused
252+ caller. time_paused = UInt64 (0 )
253+ caller. time_caches = 0.0
241254 end
255+ cycletop = frames[cycleid]:: InferenceState
256+ time_start = cycletop. time_start
242257 validation_world = get_world_counter ()
243258 cis = CodeInstance[]
244259 for frameid = cycleid: length (frames)
245260 caller = frames[frameid]:: InferenceState
246- finish! (caller. interp, caller, validation_world)
261+ caller. time_start = time_start
262+ caller. time_caches = time_caches
263+ caller. time_paused = time_paused
264+ finish! (caller. interp, caller, validation_world, time_before)
247265 if isdefined (caller. result, :ci )
248266 push! (cis, caller. result. ci)
249267 end
250268 end
269+ if cycletop. parentid != 0
270+ parent = frames[cycletop. parentid]
271+ parent. time_caches += time_caches
272+ parent. time_paused += time_paused
273+ end
251274 # After validation, under the world_counter_lock, set max_world to typemax(UInt) for all dependencies
252275 # (recursively). From that point onward the ordinary backedge mechanism is responsible for maintaining
253276 # validity.
@@ -778,9 +801,10 @@ function return_cached_result(interp::AbstractInterpreter, method::Method, codei
778801 rt = cached_return_type (codeinst)
779802 exct = codeinst. exctype
780803 effects = ipo_effects (codeinst)
781- edge = codeinst
782804 update_valid_age! (caller, WorldRange (min_world (codeinst), max_world (codeinst)))
783- return Future (MethodCallResult (interp, caller, method, rt, exct, effects, edge, edgecycle, edgelimited))
805+ caller. time_caches += reinterpret (Float16, codeinst. time_infer_total)
806+ caller. time_caches += reinterpret (Float16, codeinst. time_infer_cache_saved)
807+ return Future (MethodCallResult (interp, caller, method, rt, exct, effects, codeinst, edgecycle, edgelimited))
784808end
785809
786810function MethodCallResult (:: AbstractInterpreter , sv:: AbsIntState , method:: Method ,
@@ -876,7 +900,9 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
876900 if frame === false
877901 # completely new, but check again after reserving in the engine
878902 if cache_mode == CACHE_MODE_GLOBAL
903+ reserve_start = _time_ns () # subtract engine_reserve (thread-synchronization) time from callers to avoid double-counting
879904 ci_from_engine = engine_reserve (interp, mi)
905+ caller. time_paused += (_time_ns () - reserve_start)
880906 edge_ci = ci_from_engine
881907 codeinst = get (code_cache (interp), mi, nothing )
882908 if codeinst isa CodeInstance # return existing rettype if the code is already inferred
0 commit comments