Skip to content

Commit 1aae257

Browse files
Enable advanced monitoring for SQLServer & Postgres. (#300)
This PR introduces advanced monitoring (per query and system stats) for SQL Server and Postgres. Anyone currently using monitoring will not be impacted as advanced monitoring will need to be enabled with a new CL option (mt=advanced). In essence, this code addition allows users to run queries against system tables (SQL Server) or plugins (Postgres) which allows us to extract system and query usage statistics while benchmarking (query plans, query text, pattern execution counts, cache hits etc.). --------- Co-authored-by: Brian Kroth <[email protected]> Co-authored-by: Brian Kroth <[email protected]>
1 parent 72ba14f commit 1aae257

File tree

18 files changed

+1065
-104
lines changed

18 files changed

+1065
-104
lines changed

.github/workflows/maven.yml

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -468,14 +468,14 @@ jobs:
468468
# In this case, we load the tpcc data.
469469
if [[ ${{matrix.benchmark}} == templated ]]; then
470470
java -jar benchbase.jar -b tpcc -c config/postgres/sample_tpcc_config.xml --create=true --load=true --execute=false --json-histograms results/histograms.json
471-
java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/postgres/sample_${{matrix.benchmark}}_config.xml --create=false --load=false --execute=true --json-histograms results/histograms.json
471+
java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/postgres/sample_${{matrix.benchmark}}_config.xml -im 1000 -mt advanced --create=false --load=false --execute=true --json-histograms results/histograms.json
472472
elif [[ ${{matrix.benchmark}} == tpcc-with-reconnects ]]; then
473473
# See Also: WITH_SERVICE_INTERRUPTIONS=true docker/build-run-benchmark-with-docker.sh
474474
java -jar benchbase.jar -b tpcc -c config/postgres/sample_tpcc_config.xml --create=true --load=true
475475
(sleep 10 && ./scripts/interrupt-docker-db-service.sh postgres) &
476-
java -jar benchbase.jar -b tpcc -c config/postgres/sample_tpcc_config.xml --execute=true --json-histograms results/histograms.json
476+
java -jar benchbase.jar -b tpcc -c config/postgres/sample_tpcc_config.xml -im 1000 -mt advanced --execute=true --json-histograms results/histograms.json
477477
else
478-
java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/postgres/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true --json-histograms results/histograms.json
478+
java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/postgres/sample_${{matrix.benchmark}}_config.xml -im 1000 -mt advanced --create=true --load=true --execute=true --json-histograms results/histograms.json
479479
fi
480480
481481
# FIXME: Reduce the error rate so we don't need these overrides.
@@ -491,6 +491,12 @@ jobs:
491491
./scripts/check_latest_benchmark_results.sh $results_benchmark
492492
./scripts/check_histogram_results.sh results/histograms.json $ERRORS_THRESHOLD
493493
494+
# Running the monitor should create at least three files in the 'monitor' directory.
495+
if ![ $(find "./results/monitor" -maxdepth 1 -mindepth 1 | wc -l) -gt 2]; then
496+
echo "ERROR: Advanced monitoring unsuccessful, file directory and/or appropriate files not created." >&2
497+
exit 1
498+
fi
499+
494500
- name: Stop custom postgres service
495501
run: |
496502
./docker/postgres-latest/down.sh
@@ -638,14 +644,14 @@ jobs:
638644
# In this case, we load the tpcc data.
639645
if [[ ${{matrix.benchmark}} == templated ]]; then
640646
java -jar benchbase.jar -b tpcc -c config/sqlserver/sample_tpcc_config.xml --create=true --load=true --execute=false --json-histograms results/histograms.json
641-
java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/sqlserver/sample_${{matrix.benchmark}}_config.xml --create=false --load=false --execute=true --json-histograms results/histograms.json
647+
java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/sqlserver/sample_${{matrix.benchmark}}_config.xml -im 1000 -mt advanced --create=false --load=false --execute=true --json-histograms results/histograms.json
642648
elif [[ ${{matrix.benchmark}} == tpcc-with-reconnects ]]; then
643649
# See Also: WITH_SERVICE_INTERRUPTIONS=true docker/build-run-benchmark-with-docker.sh
644650
java -jar benchbase.jar -b tpcc -c config/sqlserver/sample_tpcc_config.xml --create=true --load=true
645651
(sleep 10 && ./scripts/interrupt-docker-db-service.sh sqlserver) &
646-
java -jar benchbase.jar -b tpcc -c config/sqlserver/sample_tpcc_config.xml --execute=true --json-histograms results/histograms.json
652+
java -jar benchbase.jar -b tpcc -c config/sqlserver/sample_tpcc_config.xml -im 1000 -mt advanced --execute=true --json-histograms results/histograms.json
647653
else
648-
java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/sqlserver/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true --json-histograms results/histograms.json
654+
java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/sqlserver/sample_${{matrix.benchmark}}_config.xml -im 1000 -mt advanced --create=true --load=true --execute=true --json-histograms results/histograms.json
649655
fi
650656
651657
# FIXME: Reduce the error rate so we don't need these overrides.
@@ -659,6 +665,12 @@ jobs:
659665
./scripts/check_latest_benchmark_results.sh $results_benchmark
660666
./scripts/check_histogram_results.sh results/histograms.json $ERRORS_THRESHOLD
661667
668+
# Running the monitor should create at least three files in the 'monitor' directory.
669+
if ![ $(find "./results/monitor" -maxdepth 1 -mindepth 1 | wc -l) -gt 2]; then
670+
echo "ERROR: Advanced monitoring unsuccessful, file directory and/or appropriate files not created." >&2
671+
exit 1
672+
fi
673+
662674
## ----------------------------------------------------------------------------------
663675
## Docker Build Test Publish
664676
## ----------------------------------------------------------------------------------

pom.xml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,13 @@
349349
<artifactId>janino</artifactId>
350350
<version>3.1.12</version>
351351
</dependency>
352+
<dependency>
353+
<groupId>org.immutables</groupId>
354+
<artifactId>value</artifactId>
355+
<version>2.9.0</version>
356+
<scope>provided</scope>
357+
</dependency>
358+
352359
</dependencies>
353360

354361
<build>

src/main/java/com/oltpbenchmark/DBWorkload.java

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,27 @@ public static void main(String[] args) throws Exception {
7979
return;
8080
}
8181

82-
// Seconds
83-
int intervalMonitor = 0;
82+
// Monitoring setup.
83+
ImmutableMonitorInfo.Builder builder = ImmutableMonitorInfo.builder();
8484
if (argsLine.hasOption("im")) {
85-
intervalMonitor = Integer.parseInt(argsLine.getOptionValue("im"));
85+
builder.monitoringInterval(Integer.parseInt(argsLine.getOptionValue("im")));
8686
}
87+
if (argsLine.hasOption("mt")) {
88+
switch (argsLine.getOptionValue("mt")) {
89+
case "advanced":
90+
builder.monitoringType(MonitorInfo.MonitoringType.ADVANCED);
91+
break;
92+
case "throughput":
93+
builder.monitoringType(MonitorInfo.MonitoringType.THROUGHPUT);
94+
break;
95+
default:
96+
throw new ParseException(
97+
"Monitoring type '"
98+
+ argsLine.getOptionValue("mt")
99+
+ "' is undefined, allowed values are: advanced/throughput");
100+
}
101+
}
102+
MonitorInfo monitorInfo = builder.build();
87103

88104
// -------------------------------------------------------------------
89105
// GET PLUGIN LIST
@@ -151,6 +167,14 @@ public static void main(String[] args) throws Exception {
151167
// Nothing to do here !
152168
}
153169

170+
// Set monitoring enabled, if all requirements are met.
171+
if (monitorInfo.getMonitoringInterval() > 0
172+
&& monitorInfo.getMonitoringType() == MonitorInfo.MonitoringType.ADVANCED
173+
&& DatabaseType.get(xmlConfig.getString("type")).shouldCreateMonitoringPrefix()) {
174+
LOG.info("Advanced monitoring enabled, prefix will be added to queries.");
175+
wrkld.setAdvancedMonitoringEnabled(true);
176+
}
177+
154178
// ----------------------------------------------------------------
155179
// CREATE BENCHMARK MODULE
156180
// ----------------------------------------------------------------
@@ -518,7 +542,7 @@ public static void main(String[] args) throws Exception {
518542
if (isBooleanOptionSet(argsLine, "execute")) {
519543
// Bombs away!
520544
try {
521-
Results r = runWorkload(benchList, intervalMonitor);
545+
Results r = runWorkload(benchList, monitorInfo);
522546
writeOutputs(r, activeTXTypes, argsLine, xmlConfig);
523547
writeHistograms(r);
524548

@@ -558,8 +582,8 @@ private static Options buildOptions(XMLConfiguration pluginConfig) {
558582
options.addOption(null, "execute", true, "Execute the benchmark workload");
559583
options.addOption("h", "help", false, "Print this help");
560584
options.addOption("s", "sample", true, "Sampling window");
561-
options.addOption(
562-
"im", "interval-monitor", true, "Throughput Monitoring Interval in milliseconds");
585+
options.addOption("im", "interval-monitor", true, "Monitoring Interval in milliseconds");
586+
options.addOption("mt", "monitor-type", true, "Type of Monitoring (throughput/advanced)");
563587
options.addOption(
564588
"d",
565589
"directory",
@@ -733,7 +757,7 @@ private static void runLoader(BenchmarkModule bench)
733757
bench.loadDatabase();
734758
}
735759

736-
private static Results runWorkload(List<BenchmarkModule> benchList, int intervalMonitor)
760+
private static Results runWorkload(List<BenchmarkModule> benchList, MonitorInfo monitorInfo)
737761
throws IOException {
738762
List<Worker<?>> workers = new ArrayList<>();
739763
List<WorkloadConfiguration> workConfs = new ArrayList<>();
@@ -748,7 +772,7 @@ private static Results runWorkload(List<BenchmarkModule> benchList, int interval
748772
bench.getBenchmarkName().toUpperCase(), num_phases, (num_phases > 1 ? "s" : "")));
749773
workConfs.add(bench.getWorkloadConfiguration());
750774
}
751-
Results r = ThreadBench.runRateLimitedBenchmark(workers, workConfs, intervalMonitor);
775+
Results r = ThreadBench.runRateLimitedBenchmark(workers, workConfs, monitorInfo);
752776
LOG.info(SINGLE_LINE);
753777
LOG.info("Rate limited reqs/s: {}", r);
754778
return r;

src/main/java/com/oltpbenchmark/ThreadBench.java

Lines changed: 47 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,15 @@
11
/*
22
* Copyright 2020 by OLTPBenchmark Project
33
*
4-
* Licensed under the Apache License, Version 2.0 (the "License");
5-
* you may not use this file except in compliance with the License.
6-
* You may obtain a copy of the License at
4+
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5+
* in compliance with the License. You may obtain a copy of the License at
76
*
8-
* http://www.apache.org/licenses/LICENSE-2.0
7+
* http://www.apache.org/licenses/LICENSE-2.0
98
*
10-
* Unless required by applicable law or agreed to in writing, software
11-
* distributed under the License is distributed on an "AS IS" BASIS,
12-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13-
* See the License for the specific language governing permissions and
14-
* limitations under the License.
9+
* Unless required by applicable law or agreed to in writing, software distributed under the License
10+
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11+
* or implied. See the License for the specific language governing permissions and limitations under
12+
* the License.
1513
*
1614
*/
1715

@@ -21,7 +19,10 @@
2119
import com.oltpbenchmark.api.BenchmarkModule;
2220
import com.oltpbenchmark.api.TransactionType;
2321
import com.oltpbenchmark.api.Worker;
22+
import com.oltpbenchmark.api.collectors.monitoring.Monitor;
23+
import com.oltpbenchmark.api.collectors.monitoring.MonitorGen;
2424
import com.oltpbenchmark.types.State;
25+
import com.oltpbenchmark.util.MonitorInfo;
2526
import com.oltpbenchmark.util.StringUtil;
2627
import java.util.*;
2728
import org.apache.commons.collections4.map.ListOrderedMap;
@@ -30,30 +31,35 @@
3031

3132
public class ThreadBench implements Thread.UncaughtExceptionHandler {
3233
private static final Logger LOG = LoggerFactory.getLogger(ThreadBench.class);
34+
// Determines how long (in ms) to wait until monitoring thread rejoins the
35+
// main thread.
36+
private static final int MONITOR_REJOIN_TIME = 60000;
3337

3438
private final BenchmarkState testState;
3539
private final List<? extends Worker<? extends BenchmarkModule>> workers;
3640
private final ArrayList<Thread> workerThreads;
3741
private final List<WorkloadConfiguration> workConfs;
3842
private final ArrayList<LatencyRecord.Sample> samples = new ArrayList<>();
39-
private final int intervalMonitor;
43+
private final MonitorInfo monitorInfo;
44+
45+
private Monitor monitor = null;
4046

4147
private ThreadBench(
4248
List<? extends Worker<? extends BenchmarkModule>> workers,
4349
List<WorkloadConfiguration> workConfs,
44-
int intervalMonitoring) {
50+
MonitorInfo monitorInfo) {
4551
this.workers = workers;
4652
this.workConfs = workConfs;
4753
this.workerThreads = new ArrayList<>(workers.size());
48-
this.intervalMonitor = intervalMonitoring;
54+
this.monitorInfo = monitorInfo;
4955
this.testState = new BenchmarkState(workers.size() + 1);
5056
}
5157

5258
public static Results runRateLimitedBenchmark(
5359
List<Worker<? extends BenchmarkModule>> workers,
5460
List<WorkloadConfiguration> workConfs,
55-
int intervalMonitoring) {
56-
ThreadBench bench = new ThreadBench(workers, workConfs, intervalMonitoring);
61+
MonitorInfo monitorInfo) {
62+
ThreadBench bench = new ThreadBench(workers, workConfs, monitorInfo);
5763
return bench.runRateLimitedMultiPhase();
5864
}
5965

@@ -88,10 +94,9 @@ private int finalizeWorkers(ArrayList<Thread> workerThreads) throws InterruptedE
8894
// to terminate... hands otherwise
8995

9096
/*
91-
* // CARLO: Maybe we might want to do this to kill threads that are
92-
* hanging... if (workerThreads.get(i).isAlive()) {
93-
* workerThreads.get(i).kill(); try { workerThreads.get(i).join(); }
94-
* catch (InterruptedException e) { } }
97+
* // CARLO: Maybe we might want to do this to kill threads that are hanging... if
98+
* (workerThreads.get(i).isAlive()) { workerThreads.get(i).kill(); try {
99+
* workerThreads.get(i).join(); } catch (InterruptedException e) { } }
95100
*/
96101

97102
requests += workers.get(i).getRequests();
@@ -116,17 +121,11 @@ private Results runRateLimitedMultiPhase() {
116121
this.createWorkerThreads();
117122

118123
// long measureStart = start;
124+
Phase phase = null;
119125

120-
long startTs = System.currentTimeMillis();
121-
long start = System.nanoTime();
122-
long warmupStart = System.nanoTime();
123-
long warmup = warmupStart;
124-
long measureEnd = -1;
125126
// used to determine the longest sleep interval
126127
double lowestRate = Double.MAX_VALUE;
127128

128-
Phase phase = null;
129-
130129
for (WorkloadState workState : workStates) {
131130
workState.switchToNextPhase();
132131
phase = workState.getCurrentPhase();
@@ -145,6 +144,12 @@ private Results runRateLimitedMultiPhase() {
145144
}
146145
}
147146

147+
long startTs = System.currentTimeMillis();
148+
long start = System.nanoTime();
149+
long warmupStart = System.nanoTime();
150+
long warmup = warmupStart;
151+
long measureEnd = -1;
152+
148153
long intervalNs = getInterval(lowestRate, phase.getArrival());
149154

150155
long nextInterval = start + intervalNs;
@@ -157,8 +162,11 @@ private Results runRateLimitedMultiPhase() {
157162
boolean lastEntry = false;
158163

159164
// Initialize the Monitor
160-
if (this.intervalMonitor > 0) {
161-
new MonitorThread(this.intervalMonitor).start();
165+
if (this.monitorInfo.getMonitoringInterval() > 0) {
166+
this.monitor =
167+
MonitorGen.getMonitor(
168+
this.monitorInfo, this.testState, this.workers, this.workConfs.get(0));
169+
this.monitor.start();
162170
}
163171

164172
// Allow workers to start work.
@@ -301,6 +309,18 @@ private Results runRateLimitedMultiPhase() {
301309
}
302310
}
303311

312+
// Stop the monitoring thread separately from cleanup all the workers so we can ignore errors
313+
// from these threads (including possible SQLExceptions), but not the others.
314+
try {
315+
if (this.monitor != null) {
316+
this.monitor.interrupt();
317+
this.monitor.join(MONITOR_REJOIN_TIME);
318+
this.monitor.tearDown();
319+
}
320+
} catch (Exception e) {
321+
LOG.error(e.getMessage(), e);
322+
}
323+
304324
try {
305325
int requests = finalizeWorkers(this.workerThreads);
306326

@@ -528,42 +548,4 @@ public void run() {
528548
}
529549
}
530550
}
531-
532-
private class MonitorThread extends Thread {
533-
private final int intervalMonitor;
534-
535-
{
536-
this.setDaemon(true);
537-
}
538-
539-
/**
540-
* @param interval How long to wait between polling in milliseconds
541-
*/
542-
MonitorThread(int interval) {
543-
this.intervalMonitor = interval;
544-
}
545-
546-
@Override
547-
public void run() {
548-
LOG.info("Starting MonitorThread Interval [{}ms]", this.intervalMonitor);
549-
while (true) {
550-
try {
551-
Thread.sleep(this.intervalMonitor);
552-
} catch (InterruptedException ex) {
553-
return;
554-
}
555-
556-
// Compute the last throughput
557-
long measuredRequests = 0;
558-
synchronized (testState) {
559-
for (Worker<?> w : workers) {
560-
measuredRequests += w.getAndResetIntervalRequests();
561-
}
562-
}
563-
double seconds = this.intervalMonitor / 1000d;
564-
double tps = (double) measuredRequests / seconds;
565-
LOG.info("Throughput: {} txn/sec", tps);
566-
}
567-
}
568-
}
569551
}

0 commit comments

Comments
 (0)