Skip to content

Commit 066d7df

Browse files
authored
java: Uprade to AP 3.0 (#855)
1 parent 89216fb commit 066d7df

File tree

5 files changed

+83
-22
lines changed

5 files changed

+83
-22
lines changed

gprofiler/gprofiler_types.py

+8
Original file line numberDiff line numberDiff line change
@@ -96,3 +96,11 @@ def integer_range_check(value_str: str) -> int:
9696
return value
9797

9898
return integer_range_check
99+
100+
101+
def comma_separated_enum_list(options: List[str], value: str) -> List[str]:
102+
values = value.split(",")
103+
for v in values:
104+
if v not in options:
105+
raise configargparse.ArgumentTypeError(f"invalid value {v!r} (allowed values: {options!r})")
106+
return values

gprofiler/profilers/java.py

+62-16
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
ProcessToProfileData,
6060
ProfileData,
6161
StackToSampleCount,
62+
comma_separated_enum_list,
6263
integer_range,
6364
positive_integer,
6465
)
@@ -112,9 +113,6 @@ def needs_musl_ap_cached(process: Process) -> bool:
112113
return is_musl(process, maps) and not any("glibc-compat" in m.path for m in maps)
113114

114115

115-
JAVA_SAFEMODE_ALL = "all" # magic value for *all* options from JavaSafemodeOptions
116-
117-
118116
class JavaSafemodeOptions(str, Enum):
119117
# a profiled process was OOM-killed and we saw it in the kernel log
120118
PROFILED_OOM = "profiled-oom"
@@ -138,18 +136,45 @@ class JavaSafemodeOptions(str, Enum):
138136
AP_LOADED_CHECK = "ap-loaded-check"
139137

140138

139+
JAVA_SAFEMODE_ALL = "all" # magic value for *all* options from JavaSafemodeOptions
141140
JAVA_SAFEMODE_ALL_OPTIONS = [o.value for o in JavaSafemodeOptions]
142141
JAVA_SAFEMODE_DEFAULT_OPTIONS = [
143142
JavaSafemodeOptions.PROFILED_OOM.value,
144143
JavaSafemodeOptions.PROFILED_SIGNALED.value,
145144
JavaSafemodeOptions.HSERR.value,
146145
]
147146

148-
# https://github.com/jvm-profiling-tools/async-profiler/blob/63799a6055363cbd7ca8ef951e2393db0d0ba7dd/src/profiler.cpp#L77
149-
JAVA_ASYNC_PROFILER_DEFAULT_SAFEMODE = 256 # StackRecovery.PROBE_SP
150147

151148
SUPPORTED_AP_MODES = ["cpu", "itimer", "alloc"]
152149

150+
151+
# see StackWalkFeatures
152+
# https://github.com/async-profiler/async-profiler/blob/a17529378b47e6700d84f89d74ca5e6284ffd1a6/src/arguments.h#L95-L112
153+
class AsyncProfilerFeatures(str, Enum):
154+
# these will be controllable via "features" in a future AP release:
155+
#
156+
# unknown_java
157+
# unwind_stub
158+
# unwind_comp
159+
# unwind_native
160+
# java_anchor
161+
# gc_traces
162+
163+
# these are controllable via "features" in AP 3.0
164+
probe_sp = "probesp"
165+
vtable_target = "vtable"
166+
comp_task = "comptask"
167+
# as of AP 3.0
168+
169+
170+
SUPPORTED_AP_FEATURES = [o.value for o in AsyncProfilerFeatures]
171+
DEFAULT_AP_FEATURES = [AsyncProfilerFeatures.probe_sp.value, AsyncProfilerFeatures.vtable_target.value]
172+
173+
# see options still here and not in "features":
174+
# https://github.com/async-profiler/async-profiler/blob/a17529378b47e6700d84f89d74ca5e6284ffd1a6/src/arguments.cpp#L262
175+
# we don't want any of them disabled by default.
176+
JAVA_ASYNC_PROFILER_DEFAULT_SAFEMODE = 0
177+
153178
PROBLEMATIC_FRAME_REGEX = re.compile(r"^# Problematic frame:\n# (.*?)\n#\n", re.MULTILINE | re.DOTALL)
154179
"""
155180
See VMError::report.
@@ -239,7 +264,7 @@ def __init__(self, stop_event: Event, jattach_timeout: int):
239264
def run(self, process: Process, cmd: str) -> str:
240265
try:
241266
return run_process(
242-
[asprof_path(), "jcmd", "--jattach-cmd", cmd, str(process.pid)],
267+
[asprof_path(), "jcmd", str(process.pid), cmd],
243268
stop_event=self.stop_event,
244269
timeout=self.jattach_timeout,
245270
).stdout.decode()
@@ -467,6 +492,7 @@ def __init__(
467492
profiler_state: ProfilerState,
468493
mode: str,
469494
ap_safemode: int,
495+
ap_features: List[str],
470496
ap_args: str,
471497
jattach_timeout: int = _DEFAULT_JATTACH_TIMEOUT,
472498
mcache: int = 0,
@@ -517,6 +543,7 @@ def __init__(
517543
self._mode = mode
518544
self._fdtransfer_path = f"@async-profiler-{process.pid}-{secrets.token_hex(10)}" if mode == "cpu" else None
519545
self._ap_safemode = ap_safemode
546+
self._ap_features = ap_features
520547
self._ap_args = ap_args
521548
self._jattach_timeout = jattach_timeout
522549
self._mcache = mcache
@@ -651,10 +678,10 @@ def _check_disk_requirements(self) -> None:
651678
def _get_base_cmd(self) -> List[str]:
652679
return [
653680
asprof_path(),
654-
"jattach",
655-
"-L",
681+
str(self.process.pid),
682+
"load",
656683
self._libap_path_process,
657-
"--jattach-cmd",
684+
"true", # 'true' means the given path ^^ is absolute.
658685
]
659686

660687
def _get_extra_ap_args(self) -> str:
@@ -677,7 +704,9 @@ def _get_start_cmd(self, interval: int, ap_timeout: int) -> List[str]:
677704
f"{self._get_ap_output_args()}{self._get_interval_arg(interval)},"
678705
f"log={self._log_path_process}"
679706
f"{f',fdtransfer={self._fdtransfer_path}' if self._mode == 'cpu' else ''}"
680-
f",safemode={self._ap_safemode},timeout={ap_timeout}"
707+
f",safemode={self._ap_safemode},"
708+
f",features={'+'.join(self._ap_features)}," # asprof uses '+' as a separator: https://github.com/async-profiler/async-profiler/blob/a17529378b47e6700d84f89d74ca5e6284ffd1a6/src/launcher/main.cpp#L441 # noqa
709+
f"timeout={ap_timeout}"
681710
f"{',lib' if self._profiler_state.insert_dso_name else ''}{self._get_extra_ap_args()}"
682711
]
683712

@@ -705,7 +734,7 @@ def _run_async_profiler(self, cmd: List[str]) -> str:
705734
try:
706735
# kill jattach with SIGTERM if it hangs. it will go down
707736
run_process(
708-
cmd + [str(self.process.pid)],
737+
cmd,
709738
stop_event=self._profiler_state.stop_event,
710739
timeout=self._jattach_timeout,
711740
kill_signal=signal.SIGTERM,
@@ -773,7 +802,10 @@ def start_async_profiler(self, interval: int, second_try: bool = False, ap_timeo
773802
if e.is_ap_loaded:
774803
if (
775804
e.returncode == 200 # 200 == AP's COMMAND_ERROR
776-
and e.get_ap_log() == "[ERROR] Profiler already started\n"
805+
# this is the error we get when we try to start AP on a process that already has it loaded.
806+
# check with "in" and not "==" in case other warnings/infos are printed alongside it,
807+
# but generally, we expect it to be the only output in this case.
808+
and "[ERROR] Profiler already started\n" in e.get_ap_log()
777809
):
778810
# profiler was already running
779811
return False
@@ -817,11 +849,22 @@ def read_output(self) -> Optional[str]:
817849
"--java-async-profiler-safemode",
818850
dest="java_async_profiler_safemode",
819851
default=JAVA_ASYNC_PROFILER_DEFAULT_SAFEMODE,
820-
type=integer_range(0, 0x200),
821-
metavar="[0-511]",
852+
type=integer_range(0, 0x40),
853+
metavar="[0-63]",
822854
help="Controls the 'safemode' parameter passed to async-profiler. This is parameter denotes multiple"
823-
" bits that describe different stack recovery techniques which async-profiler uses (see StackRecovery"
824-
" enum in async-profiler's code, in profiler.cpp)."
855+
" bits that describe different stack recovery techniques which async-profiler uses. In a future release,"
856+
" these optinos will be migrated to the 'features' parameter."
857+
" Defaults to '%(default)s'.",
858+
),
859+
ProfilerArgument(
860+
"--java-async-profiler-features",
861+
dest="java_async_profiler_features",
862+
default=DEFAULT_AP_FEATURES,
863+
metavar=",".join(SUPPORTED_AP_FEATURES),
864+
type=functools.partial(comma_separated_enum_list, SUPPORTED_AP_FEATURES),
865+
help="Controls the 'features' parameter passed to async-profiler. This is parameter is a comma-separated"
866+
" list of options which describe async-profiler's available features (see StackWalkFeatures"
867+
" enum in async-profiler's code, in arguments.h)."
825868
" Defaults to '%(default)s').",
826869
),
827870
ProfilerArgument(
@@ -933,6 +976,7 @@ def __init__(
933976
java_version_check: bool,
934977
java_async_profiler_mode: str,
935978
java_async_profiler_safemode: int,
979+
java_async_profiler_features: List[str],
936980
java_async_profiler_args: str,
937981
java_safemode: str,
938982
java_jattach_timeout: int,
@@ -957,6 +1001,7 @@ def __init__(
9571001
logger.warning("Java version checks are disabled")
9581002
self._init_ap_mode(self._profiler_state.profiling_mode, java_async_profiler_mode)
9591003
self._ap_safemode = java_async_profiler_safemode
1004+
self._ap_features = java_async_profiler_features
9601005
self._ap_args = java_async_profiler_args
9611006
self._jattach_timeout = java_jattach_timeout
9621007
self._ap_mcache = java_async_profiler_mcache
@@ -1214,6 +1259,7 @@ def _profile_process(self, process: Process, duration: int, spawned: bool) -> Pr
12141259
self._profiler_state,
12151260
self._mode,
12161261
self._ap_safemode,
1262+
self._ap_features,
12171263
self._ap_args,
12181264
self._jattach_timeout,
12191265
self._ap_mcache,

scripts/async_profiler_build_shared.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
#
66
set -euo pipefail
77

8-
VERSION=v2.10g2
9-
GIT_REV="40b850a4101756bc398051661d1adbbe5d7e2211"
8+
VERSION=v3.0g1
9+
GIT_REV="952b30f9d28aeb9cb9e418ed6f60772dfdf83e46"
1010

1111
git clone --depth 1 -b "$VERSION" https://github.com/Granulate/async-profiler.git && cd async-profiler && git reset --hard "$GIT_REV"
1212
make all

tests/test_java.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ def test_async_profiler_already_running(
126126
profiler_state=profiler._profiler_state,
127127
mode=profiler._mode,
128128
ap_safemode=0,
129+
ap_features=[],
129130
ap_args="",
130131
) as ap_proc:
131132
assert ap_proc.start_async_profiler(frequency_to_ap_interval(11))
@@ -136,6 +137,7 @@ def test_async_profiler_already_running(
136137
profiler_state=profiler._profiler_state,
137138
mode="itimer",
138139
ap_safemode=0,
140+
ap_features=[],
139141
ap_args="",
140142
) as ap_proc:
141143
ap_proc.status_async_profiler()
@@ -370,6 +372,7 @@ def test_async_profiler_stops_after_given_timeout(
370372
profiler_state=profiler_state,
371373
mode="itimer",
372374
ap_safemode=0,
375+
ap_features=[],
373376
ap_args="",
374377
) as ap_proc:
375378
assert ap_proc.start_async_profiler(frequency_to_ap_interval(11), ap_timeout=timeout_s)
@@ -466,12 +469,12 @@ def test_java_deleted_libjvm(
466469
def filter_jattach_load_records(records: List[LogRecord]) -> List[LogRecord]:
467470
def _filter_record(r: LogRecord) -> bool:
468471
# find the log record of
469-
# Running command (command=['/app/gprofiler/resources/java/apsprof', 'jattach',
470-
# '-L', '/path/to/libasyncProfiler.so', "--jattach-cmd", "start,..."])
472+
# Running command (command=['/app/gprofiler/resources/java/apsprof', '<PID>', 'load',
473+
# '/path/to/libasyncProfiler.so', 'true', 'start,...'])
471474
return (
472475
r.message == "Running command"
473-
and len(log_record_extra(r)["command"]) >= 6
474-
and log_record_extra(r)["command"][1] == "jattach"
476+
and len(log_record_extra(r)["command"]) == 6
477+
and log_record_extra(r)["command"][2] == "load"
475478
and any(map(lambda k: k in log_record_extra(r)["command"][5], ["start,", "stop,"]))
476479
)
477480

@@ -916,6 +919,7 @@ def flush_output_and_stop_async_profiler(self: AsyncProfiledProcess, *args: Any,
916919
profiler_state=profiler._profiler_state,
917920
mode="itimer",
918921
ap_safemode=0,
922+
ap_features=[],
919923
ap_args="",
920924
) as ap_proc:
921925
ap_version = ap_proc.read_ap_version()

tests/utils.py

+3
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from gprofiler.metadata import ProfileMetadata
2626
from gprofiler.profiler_state import ProfilerState
2727
from gprofiler.profilers.java import (
28+
DEFAULT_AP_FEATURES,
2829
JAVA_ASYNC_PROFILER_DEFAULT_SAFEMODE,
2930
JAVA_SAFEMODE_ALL,
3031
AsyncProfiledProcess,
@@ -207,6 +208,7 @@ def make_java_profiler(
207208
java_version_check: bool = True,
208209
java_async_profiler_mode: str = "cpu",
209210
java_async_profiler_safemode: int = JAVA_ASYNC_PROFILER_DEFAULT_SAFEMODE,
211+
java_async_profiler_features: List[str] = DEFAULT_AP_FEATURES,
210212
java_async_profiler_args: str = "",
211213
java_safemode: str = JAVA_SAFEMODE_ALL,
212214
java_jattach_timeout: int = AsyncProfiledProcess._DEFAULT_JATTACH_TIMEOUT,
@@ -226,6 +228,7 @@ def make_java_profiler(
226228
java_version_check=java_version_check,
227229
java_async_profiler_mode=java_async_profiler_mode,
228230
java_async_profiler_safemode=java_async_profiler_safemode,
231+
java_async_profiler_features=java_async_profiler_features,
229232
java_async_profiler_args=java_async_profiler_args,
230233
java_safemode=java_safemode,
231234
java_jattach_timeout=java_jattach_timeout,

0 commit comments

Comments
 (0)