Mon May 13 15:48:13 2024    cprofile_output

         67649 function calls in 20.692 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     1024   12.881    0.013   12.881    0.013 _internals.py:328(get_logits)
     1024    3.855    0.004   18.710    0.018 llama.py:542(eval)
     1024    1.963    0.002    1.963    0.002 _internals.py:314(decode)
     1024    0.619    0.001   20.166    0.020 llama.py:649(generate)
     1025    0.521    0.001    0.522    0.001 _internals.py:192(detokenize)
     1024    0.327    0.000    0.327    0.000 _internals.py:346(sample_repetition_penalties)
     1024    0.225    0.000    0.228    0.000 _internals.py:563(copy_logits)
     1024    0.150    0.000    0.150    0.000 _internals.py:373(sample_top_k)
     1024    0.042    0.000    0.042    0.000 {method 'resize' of 'numpy.ndarray' objects}
     1024    0.021    0.000    0.021    0.000 {built-in method numpy.arange}
     1024    0.013    0.000    0.813    0.001 _internals.py:723(sample)
     1024    0.009    0.000    0.837    0.001 llama.py:575(sample)
     1024    0.009    0.000    0.083    0.000 _internals.py:546(__init__)
     1024    0.006    0.000    0.006    0.000 _internals.py:286(kv_cache_seq_rm)
     1024    0.006    0.000    0.006    0.000 {built-in method numpy.zeros}
     1024    0.005    0.000    0.005    0.000 {method 'tolist' of 'numpy.ndarray' objects}
     1024    0.005    0.000    0.010    0.000 llama.py:480(eval_tokens)
     1024    0.004    0.000    0.004    0.000 _internals.py:379(sample_top_p)
        1    0.004    0.004   20.693   20.693 llama.py:929(_create_completion)
     2048    0.003    0.000    0.003    0.000 __init__.py:517(cast)
     1024    0.002    0.000    0.002    0.000 _internals.py:518(set_batch)
     1024    0.002    0.000    0.002    0.000 {built-in method numpy.array}
     1024    0.002    0.000    0.002    0.000 <string>:2(__init__)
     2048    0.002    0.000    0.002    0.000 _internal.py:250(__init__)
     1024    0.001    0.000    0.001    0.000 _internals.py:459(sample_token)
     1024    0.001    0.000    0.001    0.000 _internals.py:391(sample_tail_free)
     1024    0.001    0.000    0.001    0.000 {method 'reshape' of 'numpy.ndarray' objects}
     1024    0.001    0.000    0.001    0.000 _internals.py:68(n_vocab)
     2048    0.001    0.000    0.004    0.000 _internal.py:267(data_as)
     8192    0.001    0.000    0.001    0.000 {built-in method _ctypes.byref}
     3071    0.001    0.000    0.001    0.000 {method 'append' of 'list' objects}
    10251    0.001    0.000    0.001    0.000 {built-in method builtins.len}
     1024    0.001    0.000    0.001    0.000 _internals.py:407(sample_temp)
     1024    0.001    0.000    0.001    0.000 llama.py:476(_scores)
     2048    0.001    0.000    0.001    0.000 {built-in method builtins.min}
     1024    0.001    0.000    0.001    0.000 _internals.py:399(sample_typical)
     1024    0.001    0.000    0.001    0.000 _internals.py:385(sample_min_p)
     1024    0.001    0.000    0.001    0.000 _internals.py:145(token_nl)
     1025    0.000    0.000    0.522    0.001 llama_tokenizer.py:48(detokenize)
     1025    0.000    0.000    0.000    0.000 _internals.py:137(token_bos)
     1025    0.000    0.000    0.523    0.001 llama.py:507(detokenize)
     1024    0.000    0.000    0.000    0.000 {built-in method builtins.max}
        1    0.000    0.000    0.000    0.000 _internals.py:167(tokenize)
     1024    0.000    0.000    0.000    0.000 {method 'items' of 'dict' objects}
     1024    0.000    0.000    0.001    0.000 _internals.py:812(accept)
     1023    0.000    0.000    0.000    0.000 {method 'clear' of 'list' objects}
        1    0.000    0.000    0.000    0.000 {built-in method _io.open}
        6    0.000    0.000    0.000    0.000 {built-in method builtins.print}
        1    0.000    0.000    0.000    0.000 _internals.py:476(print_timings)
        1    0.000    0.000   20.693   20.693 {built-in method builtins.next}
        1    0.000    0.000    0.000    0.000 uuid.py:139(__init__)
        1    0.000    0.000   20.693   20.693 llama.py:1570(__call__)
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        6    0.000    0.000    0.000    0.000 _logger.py:23(llama_log_callback)
        1    0.000    0.000    0.000    0.000 uuid.py:723(uuid4)
        1    0.000    0.000    0.000    0.000 cProfile.py:46(dump_stats)
        1    0.000    0.000    0.000    0.000 uuid.py:280(__str__)
        1    0.000    0.000    0.000    0.000 llama_tokenizer.py:43(tokenize)
        1    0.000    0.000   20.693   20.693 llama.py:1473(create_completion)
        1    0.000    0.000    0.000    0.000 llama.py:491(tokenize)
        7    0.000    0.000    0.000    0.000 {method 'decode' of 'bytes' objects}
        1    0.000    0.000    0.000    0.000 cProfile.py:52(create_stats)
        1    0.000    0.000    0.000    0.000 _internals.py:472(reset_timings)
        1    0.000    0.000    0.000    0.000 _internals.py:149(token_prefix)
        1    0.000    0.000    0.000    0.000 lcpp-py-profile.py:6(formatMessages)
        1    0.000    0.000    0.000    0.000 {built-in method posix.urandom}
        1    0.000    0.000    0.000    0.000 _internals.py:72(n_ctx_train)
        1    0.000    0.000    0.000    0.000 _internals.py:153(token_middle)
        1    0.000    0.000    0.000    0.000 _internals.py:157(token_suffix)
        3    0.000    0.000    0.000    0.000 {method 'get' of 'dict' objects}
        1    0.000    0.000    0.000    0.000 {built-in method time.time}
        3    0.000    0.000    0.000    0.000 {built-in method builtins.isinstance}
        1    0.000    0.000    0.000    0.000 {method 'count' of 'list' objects}
        1    0.000    0.000    0.000    0.000 {method 'encode' of 'str' objects}
        1    0.000    0.000    0.000    0.000 {built-in method from_bytes}
        1    0.000    0.000    0.000    0.000 llama.py:538(reset)