Skip to content

Commit 915f1d8

Browse files
duanmengfacebook-github-bot
authored andcommitted
feat: Add a trace file operation tool (#12021)
Summary: Add a trace file operation tool, which provides trace file copy operation. We can specify the target query or task by gflags. Pull Request resolved: #12021 Reviewed By: yuandagits Differential Revision: D68254596 Pulled By: xiaoxmeng fbshipit-source-id: bb1e580898ca17f491ff4ec984efeac24cfbba5a
1 parent 3efa454 commit 915f1d8

6 files changed

+497
-0
lines changed

velox/tool/trace/CMakeLists.txt

+18
Original file line numberDiff line numberDiff line change
@@ -52,4 +52,22 @@ target_link_libraries(
5252
velox_exec_test_lib
5353
velox_tpch_connector)
5454

55+
add_library(velox_trace_file_tool_base TraceFileToolRunner.cpp)
56+
57+
target_link_libraries(
58+
velox_trace_file_tool_base
59+
velox_exec
60+
velox_type
61+
velox_vector
62+
velox_exec
63+
velox_hive_connector
64+
Folly::folly
65+
glog::glog
66+
gflags::gflags)
67+
68+
add_executable(velox_trace_file_tool TraceFileToolMain.cpp)
69+
70+
target_link_libraries(
71+
velox_trace_file_tool velox_trace_file_tool_base)
72+
5573
add_subdirectory(tests)
+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
/*
2+
* Copyright (c) Facebook, Inc. and its affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
#include <date/date.h>
17+
#include <gflags/gflags.h>
18+
#include "velox/tool/trace/TraceFileToolRunner.h"
19+
20+
int main(int argc, char** argv) {
21+
gflags::ParseCommandLineFlags(&argc, &argv, true);
22+
facebook::velox::tool::trace::TraceFileToolRunner runner;
23+
runner.init();
24+
runner.run();
25+
return 0;
26+
}
+120
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
/*
2+
* Copyright (c) Facebook, Inc. and its affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include "velox/tool/trace/TraceFileToolRunner.h"
18+
#include "velox/common/file/FileSystems.h"
19+
20+
DEFINE_string(trace_file_op, "copy", "Operation type of this run");
21+
DEFINE_string(
22+
source_root_dir,
23+
"",
24+
"Source root directory of the tracing data, it must be set");
25+
DEFINE_string(
26+
dest_root_dir,
27+
"",
28+
"Dest root directory, it must be set if the operation type is copy");
29+
DEFINE_string(trace_query_id, "", "Specify the trace query id");
30+
DEFINE_string(trace_task_id, "", "Specify the trace task id");
31+
32+
namespace facebook::velox::tool::trace {
33+
34+
TraceFileToolRunner::TraceFileToolRunner()
35+
: sourceRootDir_(FLAGS_source_root_dir), destRootDir_(FLAGS_dest_root_dir) {
36+
VELOX_USER_CHECK(!sourceRootDir_.empty());
37+
}
38+
39+
void TraceFileToolRunner::init() {
40+
filesystems::registerLocalFileSystem();
41+
sourceFs_ = filesystems::getFileSystem(sourceRootDir_, nullptr);
42+
VELOX_CHECK_NOT_NULL(sourceFs_);
43+
if (FLAGS_trace_file_op == "copy") {
44+
VELOX_USER_CHECK(!destRootDir_.empty());
45+
destFs_ = filesystems::getFileSystem(destRootDir_, nullptr);
46+
VELOX_CHECK_NOT_NULL(destFs_);
47+
std::string copyRootDir;
48+
if (FLAGS_trace_query_id.empty()) {
49+
VELOX_CHECK(
50+
FLAGS_trace_task_id.empty(),
51+
"Trace query ID is empty but trace task ID is not empty");
52+
copyRootDir = sourceRootDir_;
53+
} else if (FLAGS_trace_task_id.empty()) {
54+
copyRootDir = fmt::format("{}/{}", sourceRootDir_, FLAGS_trace_query_id);
55+
} else {
56+
copyRootDir = fmt::format(
57+
"{}/{}/{}",
58+
sourceRootDir_,
59+
FLAGS_trace_query_id,
60+
FLAGS_trace_task_id);
61+
}
62+
listFiles(copyRootDir);
63+
} else {
64+
VELOX_UNSUPPORTED(
65+
"Unsupported trace file operation type {}", FLAGS_trace_file_op);
66+
}
67+
}
68+
69+
void TraceFileToolRunner::run() {
70+
if (FLAGS_trace_file_op == "copy") {
71+
copyFiles();
72+
} else {
73+
VELOX_UNSUPPORTED(
74+
"Unsupported trace file operation type {}", FLAGS_trace_file_op);
75+
}
76+
}
77+
78+
void TraceFileToolRunner::copyFiles() const {
79+
const auto prefixLen = sourceFs_->extractPath(sourceRootDir_).length();
80+
for (const auto& source : sourceFiles_) {
81+
const auto targetFile =
82+
fmt::format("{}/{}", destRootDir_, source.substr(prefixLen));
83+
const auto readFile = sourceFs_->openFileForRead(source);
84+
const auto writeFile = destFs_->openFileForWrite(
85+
targetFile,
86+
filesystems::FileOptions{
87+
.values = {},
88+
.fileSize = std::nullopt,
89+
.shouldCreateParentDirectories = true});
90+
const auto fileSize = readFile->size();
91+
constexpr auto batchSize = 4 << 10;
92+
const auto ioBuf = folly::IOBuf::create(batchSize);
93+
uint64_t offset = 0;
94+
while (offset < fileSize) {
95+
const auto curLen = std::min<uint64_t>(fileSize - offset, batchSize);
96+
const auto dataView =
97+
readFile->pread(offset, curLen, ioBuf->writableData());
98+
writeFile->append(dataView);
99+
ioBuf->append(curLen);
100+
offset += curLen;
101+
ioBuf->clear();
102+
}
103+
writeFile->flush();
104+
writeFile->close();
105+
}
106+
}
107+
108+
void TraceFileToolRunner::listFiles(const std::string& path) {
109+
VELOX_USER_CHECK(sourceFs_->exists(path), "{} dose not exist", path);
110+
if (!sourceFs_->isDirectory(path)) {
111+
sourceFiles_.push_back(path);
112+
return;
113+
}
114+
115+
for (const auto& p : sourceFs_->list(sourceFs_->extractPath(path))) {
116+
listFiles(p);
117+
}
118+
}
119+
120+
} // namespace facebook::velox::tool::trace
+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/*
2+
* Copyright (c) Facebook, Inc. and its affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#pragma once
18+
19+
#include <folly/io/IOBuf.h>
20+
#include <gflags/gflags.h>
21+
#include <string>
22+
#include "velox/common/file/FileSystems.h"
23+
#include "velox/common/file/Utils.h"
24+
25+
DECLARE_string(source_root_dir);
26+
DECLARE_string(dest_root_dir);
27+
DECLARE_string(trace_file_op);
28+
DECLARE_string(trace_query_id);
29+
DECLARE_string(trace_task_id);
30+
31+
namespace facebook::velox::tool::trace {
32+
33+
/// The trace replay runner. It is configured through a set of gflags passed
34+
/// from replayer tool command line.
35+
class TraceFileToolRunner {
36+
public:
37+
TraceFileToolRunner();
38+
virtual ~TraceFileToolRunner() = default;
39+
40+
/// Initializes the trace file tool runner by setting the velox runtime
41+
/// environment for the trace file operations. It is invoked before run().
42+
virtual void init();
43+
44+
/// Runs the trace file operations.
45+
void run();
46+
47+
private:
48+
// List all the files in the source root dir recursively.
49+
void listFiles(const std::string& path);
50+
51+
void copyFiles() const;
52+
53+
const std::string sourceRootDir_;
54+
const std::string destRootDir_;
55+
std::shared_ptr<filesystems::FileSystem> sourceFs_;
56+
std::shared_ptr<filesystems::FileSystem> destFs_;
57+
std::vector<std::string> sourceFiles_;
58+
};
59+
60+
} // namespace facebook::velox::tool::trace

velox/tool/trace/tests/CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ add_executable(
1818
FilterProjectReplayerTest.cpp
1919
HashJoinReplayerTest.cpp
2020
PartitionedOutputReplayerTest.cpp
21+
TraceFileToolTest.cpp
2122
TableScanReplayerTest.cpp
2223
TableWriterReplayerTest.cpp)
2324

@@ -34,6 +35,7 @@ target_link_libraries(
3435
velox_exec_test_lib
3536
velox_memory
3637
velox_query_trace_replayer_base
38+
velox_trace_file_tool_base
3739
velox_vector_fuzzer
3840
GTest::gtest_main
3941
GTest::gmock

0 commit comments

Comments
 (0)