|
10 | 10 |
|
11 | 11 | import android.app.Activity;
|
12 | 12 | import android.content.Intent;
|
13 |
| -import android.os.AsyncTask; |
14 | 13 | import android.os.Bundle;
|
15 |
| -import android.os.Debug; |
| 14 | +import android.os.Handler; |
| 15 | +import android.os.HandlerThread; |
| 16 | +import android.os.Looper; |
16 | 17 | import android.system.ErrnoException;
|
17 | 18 | import android.system.Os;
|
| 19 | + |
18 | 20 | import com.google.gson.Gson;
|
| 21 | + |
19 | 22 | import java.io.File;
|
20 | 23 | import java.io.FileWriter;
|
21 | 24 | import java.io.IOException;
|
22 | 25 | import java.util.ArrayList;
|
23 | 26 | import java.util.Arrays;
|
24 |
| -import java.util.Collections; |
25 | 27 | import java.util.List;
|
26 |
| -import java.util.stream.Collectors; |
27 |
| -import org.pytorch.executorch.Module; |
28 | 28 |
|
29 | 29 | public class BenchmarkActivity extends Activity {
|
30 |
| - @Override |
31 |
| - protected void onCreate(Bundle savedInstanceState) { |
32 |
| - super.onCreate(savedInstanceState); |
33 |
| - |
34 |
| - try { |
35 |
| - Os.setenv("ADSP_LIBRARY_PATH", getApplicationInfo().nativeLibraryDir, true); |
36 |
| - } catch (ErrnoException e) { |
37 |
| - finish(); |
38 |
| - } |
39 |
| - |
40 |
| - Intent intent = getIntent(); |
41 |
| - File modelDir = new File(intent.getStringExtra("model_dir")); |
42 |
| - File model = |
43 |
| - Arrays.stream(modelDir.listFiles()) |
44 |
| - .filter(file -> file.getName().endsWith(".pte")) |
45 |
| - .findFirst() |
46 |
| - .get(); |
47 | 30 |
|
48 |
| - int numIter = intent.getIntExtra("num_iter", 50); |
49 |
| - int numWarmupIter = intent.getIntExtra("num_warm_up_iter", 10); |
| 31 | + File mModel; |
| 32 | + int mNumIter; |
| 33 | + int mNumWarmupIter; |
| 34 | + String mTokenizerPath; |
| 35 | + float mTemperature; |
| 36 | + String mPrompt; |
50 | 37 |
|
51 |
| - long pssIdle = Debug.getPss(); |
| 38 | + HandlerThread mHandlerThread; |
| 39 | + BenchmarkHandler mHandler; |
52 | 40 |
|
53 |
| - // TODO: Format the string with a parsable format |
54 |
| - Stats stats = new Stats(); |
| 41 | + List<BenchmarkMetric> mResult; |
55 | 42 |
|
56 |
| - new AsyncTask<Void, Void, Void>() { |
57 |
| - @Override |
58 |
| - protected Void doInBackground(Void... voids) { |
| 43 | + @Override |
| 44 | + protected void onCreate(Bundle savedInstanceState) { |
| 45 | + super.onCreate(savedInstanceState); |
59 | 46 |
|
60 |
| - // Record the time it takes to load the model and the forward method |
61 |
| - stats.loadStart = System.nanoTime(); |
62 |
| - Module module = Module.load(model.getPath()); |
63 |
| - stats.errorCode = module.loadMethod("forward"); |
64 |
| - stats.loadEnd = System.nanoTime(); |
65 |
| - |
66 |
| - for (int i = 0; i < numWarmupIter; i++) { |
67 |
| - module.forward(); |
| 47 | + try { |
| 48 | + Os.setenv("ADSP_LIBRARY_PATH", getApplicationInfo().nativeLibraryDir, true); |
| 49 | + } catch (ErrnoException e) { |
| 50 | + finish(); |
68 | 51 | }
|
69 | 52 |
|
70 |
| - for (int i = 0; i < numIter; i++) { |
71 |
| - long start = System.nanoTime(); |
72 |
| - module.forward(); |
73 |
| - double forwardMs = (System.nanoTime() - start) * 1e-6; |
74 |
| - stats.latency.add(forwardMs); |
| 53 | + Intent intent = getIntent(); |
| 54 | + File modelDir = new File(intent.getStringExtra("model_dir")); |
| 55 | + File model = |
| 56 | + Arrays.stream(modelDir.listFiles()) |
| 57 | + .filter(file -> file.getName().endsWith(".pte")) |
| 58 | + .findFirst() |
| 59 | + .get(); |
| 60 | + |
| 61 | + int numIter = intent.getIntExtra("num_iter", 50); |
| 62 | + int numWarmupIter = intent.getIntExtra("num_warm_up_iter", 10); |
| 63 | + String tokenizerPath = intent.getStringExtra("tokenizer_path"); |
| 64 | + float temperature = intent.getFloatExtra("temperature", 0.8f); |
| 65 | + String prompt = intent.getStringExtra("prompt"); |
| 66 | + |
| 67 | + mModel = model; |
| 68 | + mNumIter = numIter; |
| 69 | + mNumWarmupIter = numWarmupIter; |
| 70 | + mTokenizerPath = tokenizerPath; |
| 71 | + mTemperature = temperature; |
| 72 | + mPrompt = prompt; |
| 73 | + if (mPrompt == null) { |
| 74 | + mPrompt = "The ultimate answer"; |
75 | 75 | }
|
76 |
| - return null; |
77 |
| - } |
78 |
| - |
79 |
| - @Override |
80 |
| - protected void onPostExecute(Void aVoid) { |
81 |
| - |
82 |
| - final BenchmarkMetric.BenchmarkModel benchmarkModel = |
83 |
| - BenchmarkMetric.extractBackendAndQuantization(model.getName().replace(".pte", "")); |
84 |
| - final List<BenchmarkMetric> results = new ArrayList<>(); |
85 |
| - // The list of metrics we have atm includes: |
86 |
| - // Avg inference latency after N iterations |
87 |
| - // Currently the result has large variance from outliers, so only use |
88 |
| - // 80% samples in the middle (trimmean 0.2) |
89 |
| - Collections.sort(stats.latency); |
90 |
| - int resultSize = stats.latency.size(); |
91 |
| - List<Double> usedLatencyResults = |
92 |
| - stats.latency.subList(resultSize / 10, resultSize * 9 / 10); |
93 |
| - |
94 |
| - results.add( |
95 |
| - new BenchmarkMetric( |
96 |
| - benchmarkModel, |
97 |
| - "avg_inference_latency(ms)", |
98 |
| - stats.latency.stream().mapToDouble(l -> l).average().orElse(0.0f), |
99 |
| - 0.0f)); |
100 |
| - results.add( |
101 |
| - new BenchmarkMetric( |
102 |
| - benchmarkModel, |
103 |
| - "trimmean_inference_latency(ms)", |
104 |
| - usedLatencyResults.stream().mapToDouble(l -> l).average().orElse(0.0f), |
105 |
| - 0.0f)); |
106 |
| - // Model load time |
107 |
| - results.add( |
108 |
| - new BenchmarkMetric( |
109 |
| - benchmarkModel, |
110 |
| - "model_load_time(ms)", |
111 |
| - (stats.loadEnd - stats.loadStart) * 1e-6, |
112 |
| - 0.0f)); |
113 |
| - // Load status |
114 |
| - results.add(new BenchmarkMetric(benchmarkModel, "load_status", stats.errorCode, 0)); |
115 |
| - // RAM PSS usage |
116 |
| - results.add( |
117 |
| - new BenchmarkMetric( |
118 |
| - benchmarkModel, "ram_pss_usage(mb)", (Debug.getPss() - pssIdle) / 1024, 0)); |
| 76 | + mResult = new ArrayList<>(); |
| 77 | + |
| 78 | + mHandlerThread = new HandlerThread("ModelRunner"); |
| 79 | + mHandlerThread.start(); |
| 80 | + mHandler = new BenchmarkHandler(mHandlerThread.getLooper(), this); |
| 81 | + |
| 82 | + mHandler.sendEmptyMessage(BenchmarkHandler.MESSAGE_RUN_BENCHMARK); |
| 83 | + } |
119 | 84 |
|
| 85 | + void writeResult() { |
120 | 86 | try (FileWriter writer = new FileWriter(getFilesDir() + "/benchmark_results.json")) {
|
121 |
| - Gson gson = new Gson(); |
122 |
| - writer.write(gson.toJson(results)); |
| 87 | + Gson gson = new Gson(); |
| 88 | + writer.write(gson.toJson(mResult)); |
123 | 89 | } catch (IOException e) {
|
124 |
| - e.printStackTrace(); |
| 90 | + e.printStackTrace(); |
| 91 | + } finally { |
| 92 | + finish(); |
125 | 93 | }
|
126 |
| - } |
127 |
| - }.execute(); |
128 |
| - } |
| 94 | + } |
129 | 95 | }
|
130 | 96 |
|
131 |
| -class Stats { |
132 |
| - long loadStart; |
133 |
| - long loadEnd; |
134 |
| - List<Double> latency = new ArrayList<>(); |
135 |
| - int errorCode = 0; |
| 97 | +class BenchmarkHandler extends Handler { |
| 98 | + public static int MESSAGE_RUN_BENCHMARK = 1; |
| 99 | + public static int MESSAGE_LLM_RUN_BENCHMARK = 2; |
| 100 | + |
| 101 | + ModelRunner mModelRunner; |
| 102 | + BenchmarkActivity mBenchmarkActivity; |
136 | 103 |
|
137 |
| - @Override |
138 |
| - public String toString() { |
139 |
| - return "latency: " + latency.stream().map(Object::toString).collect(Collectors.joining("")); |
140 |
| - } |
| 104 | + LlmModelRunner mLlmModelRunner; |
| 105 | + LlmBenchmark mLlmBenchmark; |
| 106 | + |
| 107 | + public BenchmarkHandler(Looper looper, BenchmarkActivity benchmarkActivity) { |
| 108 | + super(looper); |
| 109 | + mModelRunner = new ModelRunner(); |
| 110 | + mBenchmarkActivity = benchmarkActivity; |
| 111 | + } |
| 112 | + |
| 113 | + @Override |
| 114 | + public void handleMessage(android.os.Message msg) { |
| 115 | + if (msg.what == MESSAGE_RUN_BENCHMARK) { |
| 116 | + mModelRunner.runBenchmark(mBenchmarkActivity.mModel, mBenchmarkActivity.mNumWarmupIter, mBenchmarkActivity.mNumIter, mBenchmarkActivity.mResult); |
| 117 | + |
| 118 | + if (mBenchmarkActivity.mTokenizerPath == null) { |
| 119 | + mBenchmarkActivity.writeResult(); |
| 120 | + } else { |
| 121 | + this.sendEmptyMessage(MESSAGE_LLM_RUN_BENCHMARK); |
| 122 | + } |
| 123 | + } else if (msg.what == MESSAGE_LLM_RUN_BENCHMARK) { |
| 124 | + mLlmBenchmark = new LlmBenchmark(mBenchmarkActivity, mBenchmarkActivity.mModel.getPath(), mBenchmarkActivity.mTokenizerPath, mBenchmarkActivity.mPrompt, mBenchmarkActivity.mTemperature, mBenchmarkActivity.mResult); |
| 125 | + } |
| 126 | + } |
141 | 127 | }
|
0 commit comments