Skip to content

Commit e985ac0

Browse files
Add --fast-state-serializer flag and responding to AdnlMessageCreateChannel with Nop (ton-blockchain#1096)
* Add --fast-state-serializer and tools for jemalloc * Disable fast state serializer by default unless RAM is >= 90GB * Print jemalloc stats once a minute * Dump jemalloc profile on request * Respond to AdnlMessageCreateChannel with Nop --------- Co-authored-by: SpyCheese <mikle98@yandex.ru>
1 parent 8714477 commit e985ac0

10 files changed

+166
-2
lines changed

adnl/adnl-peer.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,12 @@ void AdnlPeerPairImpl::create_channel(pubkeys::Ed25519 pub, td::uint32 date) {
504504

505505
void AdnlPeerPairImpl::process_message(const adnlmessage::AdnlMessageCreateChannel &message) {
506506
create_channel(message.key(), message.date());
507+
if (respond_to_channel_create_after_.is_in_past()) {
508+
respond_to_channel_create_after_ = td::Timestamp::in(td::Random::fast(1.0, 2.0));
509+
std::vector<OutboundAdnlMessage> messages;
510+
messages.emplace_back(adnlmessage::AdnlMessageNop{}, 0);
511+
send_messages(std::move(messages));
512+
}
507513
}
508514

509515
void AdnlPeerPairImpl::process_message(const adnlmessage::AdnlMessageConfirmChannel &message) {

adnl/adnl-peer.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ class AdnlPeerPairImpl : public AdnlPeerPair {
214214
pubkeys::Ed25519 channel_pub_;
215215
td::int32 channel_pk_date_;
216216
td::actor::ActorOwn<AdnlChannel> channel_;
217+
td::Timestamp respond_to_channel_create_after_;
217218

218219
td::uint64 in_seqno_ = 0;
219220
td::uint64 out_seqno_ = 0;

tdutils/td/utils/port/Stat.cpp

+48
Original file line numberDiff line numberDiff line change
@@ -413,4 +413,52 @@ Result<CpuStat> cpu_stat() {
413413
#endif
414414
}
415415

416+
Result<uint64> get_total_ram() {
417+
#if TD_LINUX
418+
TRY_RESULT(fd, FileFd::open("/proc/meminfo", FileFd::Read));
419+
SCOPE_EXIT {
420+
fd.close();
421+
};
422+
constexpr int TMEM_SIZE = 10000;
423+
char mem[TMEM_SIZE];
424+
TRY_RESULT(size, fd.read(MutableSlice(mem, TMEM_SIZE - 1)));
425+
if (size >= TMEM_SIZE - 1) {
426+
return Status::Error("Failed for read /proc/meminfo");
427+
}
428+
mem[size] = 0;
429+
const char* s = mem;
430+
while (*s) {
431+
const char *name_begin = s;
432+
while (*s != 0 && *s != '\n') {
433+
s++;
434+
}
435+
auto name_end = name_begin;
436+
while (is_alpha(*name_end)) {
437+
name_end++;
438+
}
439+
Slice name(name_begin, name_end);
440+
if (name == "MemTotal") {
441+
Slice value(name_end, s);
442+
if (!value.empty() && value[0] == ':') {
443+
value.remove_prefix(1);
444+
}
445+
value = trim(value);
446+
value = split(value).first;
447+
TRY_RESULT_PREFIX(mem, to_integer_safe<uint64>(value), "Invalid value of MemTotal");
448+
if (mem >= 1ULL << (64 - 10)) {
449+
return Status::Error("Invalid value of MemTotal");
450+
}
451+
return mem * 1024;
452+
}
453+
if (*s == 0) {
454+
break;
455+
}
456+
s++;
457+
}
458+
return Status::Error("No MemTotal in /proc/meminfo");
459+
#else
460+
return Status::Error("Not supported");
461+
#endif
462+
}
463+
416464
} // namespace td

tdutils/td/utils/port/Stat.h

+2
Original file line numberDiff line numberDiff line change
@@ -64,4 +64,6 @@ Status update_atime(CSlice path) TD_WARN_UNUSED_RESULT;
6464

6565
#endif
6666

67+
Result<uint64> get_total_ram() TD_WARN_UNUSED_RESULT;
68+
6769
} // namespace td

validator-engine/CMakeLists.txt

+4
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,9 @@ add_executable(validator-engine ${VALIDATOR_ENGINE_SOURCE})
1414
target_link_libraries(validator-engine overlay tdutils tdactor adnl tl_api dht
1515
rldp rldp2 catchain validatorsession full-node validator ton_validator validator
1616
fift-lib memprof git ${JEMALLOC_LIBRARIES})
17+
if (JEMALLOC_FOUND)
18+
target_include_directories(validator-engine PRIVATE ${JEMALLOC_INCLUDE_DIR})
19+
target_compile_definitions(validator-engine PRIVATE -DTON_USE_JEMALLOC=1)
20+
endif()
1721

1822
install(TARGETS validator-engine RUNTIME DESTINATION bin)

validator-engine/validator-engine.cpp

+89-2
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,10 @@
7575
#include "block/precompiled-smc/PrecompiledSmartContract.h"
7676
#include "interfaces/validator-manager.h"
7777

78+
#if TON_USE_JEMALLOC
79+
#include <jemalloc/jemalloc.h>
80+
#endif
81+
7882
Config::Config() {
7983
out_port = 3278;
8084
full_node = ton::PublicKeyHash::zero();
@@ -1179,6 +1183,55 @@ class CheckDhtServerStatusQuery : public td::actor::Actor {
11791183
td::Promise<td::BufferSlice> promise_;
11801184
};
11811185

1186+
#if TON_USE_JEMALLOC
1187+
class JemallocStatsWriter : public td::actor::Actor {
1188+
public:
1189+
void start_up() override {
1190+
alarm();
1191+
}
1192+
1193+
void alarm() override {
1194+
alarm_timestamp() = td::Timestamp::in(60.0);
1195+
auto r_stats = get_stats();
1196+
if (r_stats.is_error()) {
1197+
LOG(WARNING) << "Jemalloc stats error : " << r_stats.move_as_error();
1198+
} else {
1199+
auto s = r_stats.move_as_ok();
1200+
LOG(WARNING) << "JEMALLOC_STATS : [ timestamp=" << (ton::UnixTime)td::Clocks::system()
1201+
<< " allocated=" << s.allocated << " active=" << s.active << " metadata=" << s.metadata
1202+
<< " resident=" << s.resident << " ]";
1203+
}
1204+
}
1205+
1206+
private:
1207+
struct JemallocStats {
1208+
size_t allocated, active, metadata, resident;
1209+
};
1210+
1211+
static td::Result<JemallocStats> get_stats() {
1212+
size_t sz = sizeof(size_t);
1213+
static size_t epoch = 1;
1214+
if (mallctl("epoch", &epoch, &sz, &epoch, sz)) {
1215+
return td::Status::Error("Failed to refrash stats");
1216+
}
1217+
JemallocStats stats;
1218+
if (mallctl("stats.allocated", &stats.allocated, &sz, nullptr, 0)) {
1219+
return td::Status::Error("Cannot get stats.allocated");
1220+
}
1221+
if (mallctl("stats.active", &stats.active, &sz, nullptr, 0)) {
1222+
return td::Status::Error("Cannot get stats.active");
1223+
}
1224+
if (mallctl("stats.metadata", &stats.metadata, &sz, nullptr, 0)) {
1225+
return td::Status::Error("Cannot get stats.metadata");
1226+
}
1227+
if (mallctl("stats.resident", &stats.resident, &sz, nullptr, 0)) {
1228+
return td::Status::Error("Cannot get stats.resident");
1229+
}
1230+
return stats;
1231+
}
1232+
};
1233+
#endif
1234+
11821235
void ValidatorEngine::set_local_config(std::string str) {
11831236
local_config_ = str;
11841237
}
@@ -1202,6 +1255,9 @@ void ValidatorEngine::schedule_shutdown(double at) {
12021255
}
12031256
void ValidatorEngine::start_up() {
12041257
alarm_timestamp() = td::Timestamp::in(1.0 + td::Random::fast(0, 100) * 0.01);
1258+
#if TON_USE_JEMALLOC
1259+
td::actor::create_actor<JemallocStatsWriter>("mem-stat").release();
1260+
#endif
12051261
}
12061262

12071263
void ValidatorEngine::alarm() {
@@ -1412,6 +1468,18 @@ td::Status ValidatorEngine::load_global_config() {
14121468
}
14131469
validator_options_.write().set_hardforks(std::move(h));
14141470

1471+
auto r_total_ram = td::get_total_ram();
1472+
if (r_total_ram.is_error()) {
1473+
LOG(ERROR) << "Failed to get total RAM size: " << r_total_ram.move_as_error();
1474+
} else {
1475+
td::uint64 total_ram = r_total_ram.move_as_ok();
1476+
LOG(WARNING) << "Total RAM = " << td::format::as_size(total_ram);
1477+
if (total_ram >= (90ULL << 30)) {
1478+
fast_state_serializer_enabled_ = true;
1479+
}
1480+
}
1481+
validator_options_.write().set_fast_state_serializer_enabled(fast_state_serializer_enabled_);
1482+
14151483
return td::Status::OK();
14161484
}
14171485

@@ -3894,7 +3962,7 @@ void need_scheduler_status(int sig) {
38943962
need_scheduler_status_flag.store(true);
38953963
}
38963964

3897-
void dump_memory_stats() {
3965+
void dump_memprof_stats() {
38983966
if (!is_memprof_on()) {
38993967
return;
39003968
}
@@ -3919,8 +3987,20 @@ void dump_memory_stats() {
39193987
LOG(WARNING) << td::tag("fast_backtrace_success_rate", get_fast_backtrace_success_rate());
39203988
}
39213989

3990+
void dump_jemalloc_prof() {
3991+
#if TON_USE_JEMALLOC
3992+
const char *filename = "/tmp/validator-jemalloc.dump";
3993+
if (mallctl("prof.dump", nullptr, nullptr, &filename, sizeof(const char *)) == 0) {
3994+
LOG(ERROR) << "Written jemalloc dump to " << filename;
3995+
} else {
3996+
LOG(ERROR) << "Failed to write jemalloc dump to " << filename;
3997+
}
3998+
#endif
3999+
}
4000+
39224001
void dump_stats() {
3923-
dump_memory_stats();
4002+
dump_memprof_stats();
4003+
dump_jemalloc_prof();
39244004
LOG(WARNING) << td::NamedThreadSafeCounter::get_default();
39254005
}
39264006

@@ -4158,6 +4238,13 @@ int main(int argc, char *argv[]) {
41584238
acts.push_back([&x, v]() { td::actor::send_closure(x, &ValidatorEngine::set_catchain_max_block_delay, v); });
41594239
return td::Status::OK();
41604240
});
4241+
p.add_option(
4242+
'\0', "fast-state-serializer",
4243+
"faster persistent state serializer, but requires more RAM (enabled automatically on machines with >= 90GB RAM)",
4244+
[&]() {
4245+
acts.push_back(
4246+
[&x]() { td::actor::send_closure(x, &ValidatorEngine::set_fast_state_serializer_enabled, true); });
4247+
});
41614248
auto S = p.run(argc, argv);
41624249
if (S.is_error()) {
41634250
LOG(ERROR) << "failed to parse options: " << S.move_as_error();

validator-engine/validator-engine.hpp

+4
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@ class ValidatorEngine : public td::actor::Actor {
220220
bool started_ = false;
221221
ton::BlockSeqno truncate_seqno_{0};
222222
std::string session_logs_file_;
223+
bool fast_state_serializer_enabled_ = false;
223224

224225
std::set<ton::CatchainSeqno> unsafe_catchains_;
225226
std::map<ton::BlockSeqno, std::pair<ton::CatchainSeqno, td::uint32>> unsafe_catchain_rotations_;
@@ -299,6 +300,9 @@ class ValidatorEngine : public td::actor::Actor {
299300
void set_catchain_max_block_delay(double value) {
300301
catchain_max_block_delay_ = value;
301302
}
303+
void set_fast_state_serializer_enabled(bool value) {
304+
fast_state_serializer_enabled_ = value;
305+
}
302306
void start_up() override;
303307
ValidatorEngine() {
304308
}

validator/state-serializer.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,9 @@ class CachedCellDbReader : public vm::CellDbReader {
253253
};
254254

255255
void AsyncStateSerializer::prepare_previous_state_cache(ShardIdFull shard) {
256+
if (!opts_->get_fast_state_serializer_enabled()) {
257+
return;
258+
}
256259
std::vector<ShardIdFull> prev_shards;
257260
for (const auto& [_, prev_shard] : previous_state_files_) {
258261
if (shard_intersects(shard, prev_shard)) {

validator/validator-options.hpp

+7
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,9 @@ struct ValidatorManagerOptionsImpl : public ValidatorManagerOptions {
147147
td::Ref<CollatorOptions> get_collator_options() const override {
148148
return collator_options_;
149149
}
150+
bool get_fast_state_serializer_enabled() const override {
151+
return fast_state_serializer_enabled_;
152+
}
150153

151154
void set_zero_block_id(BlockIdExt block_id) override {
152155
zero_block_id_ = block_id;
@@ -233,6 +236,9 @@ struct ValidatorManagerOptionsImpl : public ValidatorManagerOptions {
233236
void set_collator_options(td::Ref<CollatorOptions> value) override {
234237
collator_options_ = std::move(value);
235238
}
239+
void set_fast_state_serializer_enabled(bool value) override {
240+
fast_state_serializer_enabled_ = value;
241+
}
236242

237243
ValidatorManagerOptionsImpl *make_copy() const override {
238244
return new ValidatorManagerOptionsImpl(*this);
@@ -286,6 +292,7 @@ struct ValidatorManagerOptionsImpl : public ValidatorManagerOptions {
286292
td::optional<double> catchain_max_block_delay_;
287293
bool state_serializer_enabled_ = true;
288294
td::Ref<CollatorOptions> collator_options_{true};
295+
bool fast_state_serializer_enabled_ = false;
289296
};
290297

291298
} // namespace validator

validator/validator.h

+2
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ struct ValidatorManagerOptions : public td::CntObject {
107107
virtual td::optional<double> get_catchain_max_block_delay() const = 0;
108108
virtual bool get_state_serializer_enabled() const = 0;
109109
virtual td::Ref<CollatorOptions> get_collator_options() const = 0;
110+
virtual bool get_fast_state_serializer_enabled() const = 0;
110111

111112
virtual void set_zero_block_id(BlockIdExt block_id) = 0;
112113
virtual void set_init_block_id(BlockIdExt block_id) = 0;
@@ -137,6 +138,7 @@ struct ValidatorManagerOptions : public td::CntObject {
137138
virtual void set_catchain_max_block_delay(double value) = 0;
138139
virtual void set_state_serializer_enabled(bool value) = 0;
139140
virtual void set_collator_options(td::Ref<CollatorOptions> value) = 0;
141+
virtual void set_fast_state_serializer_enabled(bool value) = 0;
140142

141143
static td::Ref<ValidatorManagerOptions> create(
142144
BlockIdExt zero_block_id, BlockIdExt init_block_id,

0 commit comments

Comments
 (0)