Skip to content

Commit b536895

Browse files
Kevin Wilfongfacebook-github-bot
Kevin Wilfong
authored andcommitted
Make CPU Operator Stats Optional (facebookincubator#2638)
Summary: Pull Request resolved: facebookincubator#2638 Similar to what facebookincubator#1534 did for Expressions, this adds a configuration property "driver.track_operator_cpu_usage" to control whether cpu usage tracking for operators is on of off. The default is on to maintain the behavior for existing use cases. We have seen in use cases with very small batches (128 or less) measuring the CPU time can itself account for ~10% of the CPU spent in processing batches. For example, running the SimpleArithmetic benchmark with batches of 100 rows With CPU usage tracking: ``` multiplySmall 4.58ns 218.29M multiplySameColumnSmall 4.37ns 228.67M multiplyHalfNullSmall 7.86ns 127.17M multiplyConstantSmall 5.08ns 196.67M multiplyNestedSmall 6.80ns 147.16M multiplyNestedDeepSmall 12.06ns 82.89M ``` Without CPU usage tracking: ``` multiplySmall 4.22ns 236.99M multiplySameColumnSmall 3.99ns 250.70M multiplyHalfNullSmall 7.12ns 140.39M multiplyConstantSmall 4.67ns 214.27M multiplyNestedSmall 5.94ns 168.39M multiplyNestedDeepSmall 11.03ns 90.65M ``` Reviewed By: mbasmanova Differential Revision: D39753640 fbshipit-source-id: f0d10dd5bfc938804713d069bec6d6cd5ef69c0e
1 parent e674e29 commit b536895

File tree

4 files changed

+28
-4
lines changed

4 files changed

+28
-4
lines changed

velox/core/QueryConfig.h

+9
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@ class QueryConfig {
5858
static constexpr const char* kExprTrackCpuUsage =
5959
"expression.track_cpu_usage";
6060

61+
// Whether to track CPU usage for stages of individual operators. True by
62+
// default. Can be expensive when processing small batches, e.g. < 10K rows.
63+
static constexpr const char* kOperatorTrackCpuUsage =
64+
"driver.track_operator_cpu_usage";
65+
6166
// Flags used to configure the CAST operator:
6267

6368
// This flag makes the Row conversion to by applied
@@ -271,6 +276,10 @@ class QueryConfig {
271276
return get<bool>(kExprTrackCpuUsage, false);
272277
}
273278

279+
bool operatorTrackCpuUsage() const {
280+
return get<bool>(kOperatorTrackCpuUsage, true);
281+
}
282+
274283
template <typename T>
275284
T get(const std::string& key, const T& defaultValue) const {
276285
return config_->get<T>(key, defaultValue);

velox/exec/Driver.cpp

+5-4
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ Driver::Driver(
160160
curOpIndex_ = operators_.size() - 1;
161161
// Operators need access to their Driver for adaptation.
162162
ctx_->driver = this;
163+
trackOperatorCpuUsage_ = ctx_->queryConfig().operatorTrackCpuUsage();
163164
}
164165

165166
namespace {
@@ -347,7 +348,7 @@ StopReason Driver::runInternal(
347348
uint64_t resultBytes = 0;
348349
RowVectorPtr result;
349350
{
350-
CpuWallTimer timer(op->stats().getOutputTiming);
351+
auto timer = cpuWallTimer(op->stats().getOutputTiming);
351352
result = op->getOutput();
352353
if (result) {
353354
VELOX_CHECK(
@@ -362,7 +363,7 @@ StopReason Driver::runInternal(
362363
}
363364
pushdownFilters(i);
364365
if (result) {
365-
CpuWallTimer timer(nextOp->stats().addInputTiming);
366+
auto timer = cpuWallTimer(op->stats().addInputTiming);
366367
nextOp->stats().inputVectors += 1;
367368
nextOp->stats().inputPositions += result->size();
368369
nextOp->stats().inputBytes += resultBytes;
@@ -391,7 +392,7 @@ StopReason Driver::runInternal(
391392
return StopReason::kBlock;
392393
}
393394
if (op->isFinished()) {
394-
CpuWallTimer timer(nextOp->stats().finishTiming);
395+
auto timer = cpuWallTimer(op->stats().finishTiming);
395396
nextOp->noMoreInput();
396397
break;
397398
}
@@ -403,7 +404,7 @@ StopReason Driver::runInternal(
403404
// this will be detected when trying to add input, and we
404405
// will come back here after this is again on thread.
405406
{
406-
CpuWallTimer timer(op->stats().getOutputTiming);
407+
auto timer = cpuWallTimer(op->stats().getOutputTiming);
407408
result = op->getOutput();
408409
if (result) {
409410
VELOX_CHECK(

velox/exec/Driver.h

+9
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@
1717
#include <folly/executors/CPUThreadPoolExecutor.h>
1818
#include <folly/futures/Future.h>
1919
#include <folly/portability/SysSyscall.h>
20+
#include <memory>
2021

2122
#include "velox/common/future/VeloxPromise.h"
23+
#include "velox/common/time/CpuWallTimer.h"
2224
#include "velox/connectors/Connector.h"
2325
#include "velox/core/PlanNode.h"
2426
#include "velox/core/QueryCtx.h"
@@ -295,6 +297,11 @@ class Driver : public std::enable_shared_from_this<Driver> {
295297
// position in the pipeline.
296298
void pushdownFilters(int operatorIndex);
297299

300+
std::unique_ptr<CpuWallTimer> cpuWallTimer(CpuWallTiming& timing) {
301+
return trackOperatorCpuUsage_ ? std::make_unique<CpuWallTimer>(timing)
302+
: nullptr;
303+
}
304+
298305
std::unique_ptr<DriverCtx> ctx_;
299306
std::atomic_bool closed_{false};
300307

@@ -310,6 +317,8 @@ class Driver : public std::enable_shared_from_this<Driver> {
310317
std::vector<std::unique_ptr<Operator>> operators_;
311318

312319
BlockingReason blockingReason_{BlockingReason::kNotBlocked};
320+
321+
bool trackOperatorCpuUsage_;
313322
};
314323

315324
using OperatorSupplier = std::function<std::unique_ptr<Operator>(

velox/exec/tests/DriverTest.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,11 @@ TEST_F(DriverTest, pause) {
448448
[](int64_t num) { return num % 10 > 0; },
449449
&hits);
450450
params.maxDrivers = 10;
451+
params.queryCtx =
452+
core::QueryCtx::createForTest(std::make_shared<core::MemConfig>(
453+
std::unordered_map<std::string, std::string>{
454+
// Make sure CPU usage tracking is enabled.
455+
{core::QueryConfig::kOperatorTrackCpuUsage, "true"}}));
451456
int32_t numRead = 0;
452457
readResults(params, ResultOperation::kPause, 370'000'000, &numRead);
453458
// Each thread will fully read the 1M rows in values.

0 commit comments

Comments
 (0)