Skip to content

Commit fd9cb77

Browse files
committed
first version
1 parent afdd485 commit fd9cb77

File tree

173 files changed

+140374
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

173 files changed

+140374
-0
lines changed

.gitignore

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
CMakeCache.txt
2+
CMakeFiles/
3+
Makefile
4+
cmake_install.cmake
5+
out/

CMakeLists.txt

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
cmake_minimum_required (VERSION 2.6)
2+
project (perf-map-agent)
3+
4+
# uncomment to make a debug build (including source positions and symbols)
5+
# set(CMAKE_BUILD_TYPE DEBUG)
6+
7+
# Binaries (agent libperfmap.so and attach-main.jar) will end up in ./out
8+
set(OUTDIR ${PROJECT_BINARY_DIR}/out)
9+
set(LIBRARY_OUTPUT_PATH ${OUTDIR})
10+
11+
find_package(JNI)
12+
if (JNI_FOUND)
13+
message (STATUS "JNI_INCLUDE_DIRS=${JNI_INCLUDE_DIRS}")
14+
message (STATUS "JNI_LIBRARIES=${JNI_LIBRARIES}")
15+
message (STATUS "JAVA_INCLUDE_PATH=${JAVA_INCLUDE_PATH}")
16+
message (STATUS "JAVA_INCLUDE_PATH2=${JAVA_INCLUDE_PATH2}")
17+
endif()
18+
INCLUDE_DIRECTORIES(${JAVA_INCLUDE_PATH})
19+
INCLUDE_DIRECTORIES(${JAVA_INCLUDE_PATH2})
20+
21+
add_library(perfmap SHARED src/c/perf-map-agent.c src/c/perf-map-file.c)
22+
23+
find_package(Java REQUIRED)
24+
include(UseJava)
25+
26+
set(CMAKE_JAVA_INCLUDE_PATH ${JAVA_INCLUDE_PATH}/../lib/tools.jar)
27+
set(CMAKE_JAVA_TARGET_OUTPUT_DIR ${OUTDIR})
28+
29+
#message(STATUS "LIBS: ${Java_JAR_EXECUTABLE} ${Java_INCLUDE_DIRS} incl: ${CMAKE_JAVA_INCLUDE_PATH} output_dir: ${CMAKE_JAVA_TARGET_OUTPUT_DIR}")
30+
add_jar(attach-main src/java/AttachOnce.java ENTRY_POINT net/virtualvoid/perf/AttachOnce OUTPUT_DIR ${OUTDIR})

FlameGraph/.travis.yml

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Using the container-based infrastructure
2+
sudo: false
3+
4+
language: perl
5+
perl:
6+
- "5.24"
7+
- "5.22"
8+
- "5.20"
9+
- "5.18"
10+
- "5.16"
11+
- "5.14"
12+
- "5.12"
13+
- "5.10"
14+
15+
install:
16+
/bin/true
17+
18+
script:
19+
./test.sh

FlameGraph/README.md

+219
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
# Flame Graphs visualize profiled code
2+
3+
Main Website: http://www.brendangregg.com/flamegraphs.html
4+
5+
Example (click to zoom):
6+
[![Example](http://www.brendangregg.com/FlameGraphs/cpu-bash-flamegraph.svg)](http://www.brendangregg.com/FlameGraphs/cpu-bash-flamegraph.svg)
7+
8+
Other sites:
9+
- The Flame Graph article in ACMQ and CACM: http://queue.acm.org/detail.cfm?id=2927301 http://cacm.acm.org/magazines/2016/6/202665-the-flame-graph/abstract
10+
- CPU profiling using Linux perf\_events, DTrace, SystemTap, or ktap: http://www.brendangregg.com/FlameGraphs/cpuflamegraphs.html
11+
- CPU profiling using XCode Instruments: http://schani.wordpress.com/2012/11/16/flame-graphs-for-instruments/
12+
- CPU profiling using Xperf.exe: http://randomascii.wordpress.com/2013/03/26/summarizing-xperf-cpu-usage-with-flame-graphs/
13+
- Memory profiling: http://www.brendangregg.com/FlameGraphs/memoryflamegraphs.html
14+
- Other examples, updates, and news: http://www.brendangregg.com/flamegraphs.html#Updates
15+
16+
Flame graphs can be created in three steps:
17+
18+
1. Capture stacks
19+
2. Fold stacks
20+
3. flamegraph.pl
21+
22+
1\. Capture stacks
23+
=================
24+
Stack samples can be captured using Linux perf\_events, FreeBSD pmcstat (hwpmc), DTrace, SystemTap, and many other profilers. See the stackcollapse-\* converters.
25+
26+
### Linux perf\_events
27+
28+
Using Linux perf\_events (aka "perf") to capture 60 seconds of 99 Hertz stack samples, both user- and kernel-level stacks, all processes:
29+
30+
```
31+
# perf record -F 99 -a -g -- sleep 60
32+
# perf script > out.perf
33+
```
34+
35+
Now only capturing PID 181:
36+
37+
```
38+
# perf record -F 99 -p 181 -g -- sleep 60
39+
# perf script > out.perf
40+
```
41+
42+
### DTrace
43+
44+
Using DTrace to capture 60 seconds of kernel stacks at 997 Hertz:
45+
46+
```
47+
# dtrace -x stackframes=100 -n 'profile-997 /arg0/ { @[stack()] = count(); } tick-60s { exit(0); }' -o out.kern_stacks
48+
```
49+
50+
Using DTrace to capture 60 seconds of user-level stacks for PID 12345 at 97 Hertz:
51+
52+
```
53+
# dtrace -x ustackframes=100 -n 'profile-97 /pid == 12345 && arg1/ { @[ustack()] = count(); } tick-60s { exit(0); }' -o out.user_stacks
54+
```
55+
56+
60 seconds of user-level stacks, including time spent in-kernel, for PID 12345 at 97 Hertz:
57+
58+
```
59+
# dtrace -x ustackframes=100 -n 'profile-97 /pid == 12345/ { @[ustack()] = count(); } tick-60s { exit(0); }' -o out.user_stacks
60+
```
61+
62+
Switch `ustack()` for `jstack()` if the application has a ustack helper to include translated frames (eg, node.js frames; see: http://dtrace.org/blogs/dap/2012/01/05/where-does-your-node-program-spend-its-time/). The rate for user-level stack collection is deliberately slower than kernel, which is especially important when using `jstack()` as it performs additional work to translate frames.
63+
64+
2\. Fold stacks
65+
==============
66+
Use the stackcollapse programs to fold stack samples into single lines. The programs provided are:
67+
68+
- `stackcollapse.pl`: for DTrace stacks
69+
- `stackcollapse-perf.pl`: for Linux perf_events "perf script" output
70+
- `stackcollapse-pmc.pl`: for FreeBSD pmcstat -G stacks
71+
- `stackcollapse-stap.pl`: for SystemTap stacks
72+
- `stackcollapse-instruments.pl`: for XCode Instruments
73+
- `stackcollapse-vtune.pl`: for Intel VTune profiles
74+
- `stackcollapse-ljp.awk`: for Lightweight Java Profiler
75+
- `stackcollapse-jstack.pl`: for Java jstack(1) output
76+
- `stackcollapse-gdb.pl`: for gdb(1) stacks
77+
- `stackcollapse-go.pl`: for Golang pprof stacks
78+
- `stackcollapse-vsprof.pl`: for Microsoft Visual Studio profiles
79+
80+
Usage example:
81+
82+
```
83+
For perf_events:
84+
$ ./stackcollapse-perf.pl out.perf > out.folded
85+
86+
For DTrace:
87+
$ ./stackcollapse.pl out.kern_stacks > out.kern_folded
88+
```
89+
90+
The output looks like this:
91+
92+
```
93+
unix`_sys_sysenter_post_swapgs 1401
94+
unix`_sys_sysenter_post_swapgs;genunix`close 5
95+
unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf 85
96+
unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;c2audit`audit_closef 26
97+
unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;c2audit`audit_setf 5
98+
unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;genunix`audit_getstate 6
99+
unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;genunix`audit_unfalloc 2
100+
unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;genunix`closef 48
101+
[...]
102+
```
103+
104+
3\. flamegraph.pl
105+
================
106+
Use flamegraph.pl to render a SVG.
107+
108+
```
109+
$ ./flamegraph.pl out.kern_folded > kernel.svg
110+
```
111+
112+
An advantage of having the folded input file (and why this is separate to flamegraph.pl) is that you can use grep for functions of interest. Eg:
113+
114+
```
115+
$ grep cpuid out.kern_folded | ./flamegraph.pl > cpuid.svg
116+
```
117+
118+
Provided Examples
119+
=================
120+
121+
### Linux perf\_events
122+
123+
An example output from Linux "perf script" is included, gzip'd, as example-perf-stacks.txt.gz. The resulting flame graph is example-perf.svg:
124+
125+
[![Example](http://www.brendangregg.com/FlameGraphs/example-perf.svg)](http://www.brendangregg.com/FlameGraphs/example-perf.svg)
126+
127+
You can create this using:
128+
129+
```
130+
$ gunzip -c example-perf-stacks.txt.gz | ./stackcollapse-perf.pl --all | ./flamegraph.pl --color=java --hash > example-perf.svg
131+
```
132+
133+
This shows my typical workflow: I'll gzip profiles on the target, then copy them to my laptop for analysis. Since I have hundreds of profiles, I leave them gzip'd!
134+
135+
Since this profile included Java, I used the flamegraph.pl --color=java palette. I've also used stackcollapse-perf.pl --all, which includes all annotations that help flamegraph.pl use separate colors for kernel and user level code. The resulting flame graph uses: green == Java, yellow == C++, red == user-mode native, orange == kernel.
136+
137+
This profile was from an analysis of vert.x performance. The benchmark client, wrk, is also visible in the flame graph.
138+
139+
### DTrace
140+
141+
An example output from DTrace is also included, example-dtrace-stacks.txt, and the resulting flame graph, example-dtrace.svg:
142+
143+
[![Example](http://www.brendangregg.com/FlameGraphs/example-dtrace.svg)](http://www.brendangregg.com/FlameGraphs/example-dtrace.svg)
144+
145+
You can generate this using:
146+
147+
```
148+
$ ./stackcollapse.pl example-stacks.txt | ./flamegraph.pl > example.svg
149+
```
150+
151+
This was from a particular performance investigation: the Flame Graph identified that CPU time was spent in the lofs module, and quantified that time.
152+
153+
154+
Options
155+
=======
156+
See the USAGE message (--help) for options:
157+
158+
USAGE: ./flamegraph.pl [options] infile > outfile.svg
159+
160+
--title TEXT # change title text
161+
--subtitle TEXT # second level title (optional)
162+
--width NUM # width of image (default 1200)
163+
--height NUM # height of each frame (default 16)
164+
--minwidth NUM # omit smaller functions (default 0.1 pixels)
165+
--fonttype FONT # font type (default "Verdana")
166+
--fontsize NUM # font size (default 12)
167+
--countname TEXT # count type label (default "samples")
168+
--nametype TEXT # name type label (default "Function:")
169+
--colors PALETTE # set color palette. choices are: hot (default), mem,
170+
# io, wakeup, chain, java, js, perl, red, green, blue,
171+
# aqua, yellow, purple, orange
172+
--bgcolors COLOR # set background colors. gradient choices are yellow
173+
# (default), blue, green, grey; flat colors use "#rrggbb"
174+
--hash # colors are keyed by function name hash
175+
--cp # use consistent palette (palette.map)
176+
--reverse # generate stack-reversed flame graph
177+
--inverted # icicle graph
178+
--flamechart # produce a flame chart (sort by time, do not merge stacks)
179+
--negate # switch differential hues (blue<->red)
180+
--notes TEXT # add notes comment in SVG (for debugging)
181+
--help # this message
182+
183+
eg,
184+
./flamegraph.pl --title="Flame Graph: malloc()" trace.txt > graph.svg
185+
186+
As suggested in the example, flame graphs can process traces of any event,
187+
such as malloc()s, provided stack traces are gathered.
188+
189+
190+
Consistent Palette
191+
==================
192+
If you use the `--cp` option, it will use the $colors selection and randomly
193+
generate the palette like normal. Any future flamegraphs created using the `--cp`
194+
option will use the same palette map. Any new symbols from future flamegraphs
195+
will have their colors randomly generated using the $colors selection.
196+
197+
If you don't like the palette, just delete the palette.map file.
198+
199+
This allows your to change your colorscheme between flamegraphs to make the
200+
differences REALLY stand out.
201+
202+
Example:
203+
204+
Say we have 2 captures, one with a problem, and one when it was working
205+
(whatever "it" is):
206+
207+
```
208+
cat working.folded | ./flamegraph.pl --cp > working.svg
209+
# this generates a palette.map, as per the normal random generated look.
210+
211+
cat broken.folded | ./flamegraph.pl --cp --colors mem > broken.svg
212+
# this svg will use the same palette.map for the same events, but a very
213+
# different colorscheme for any new events.
214+
```
215+
216+
Take a look at the demo directory for an example:
217+
218+
palette-example-working.svg
219+
palette-example-broken.svg

FlameGraph/aix-perf.pl

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#!/usr/bin/perl
2+
3+
use Getopt::Std;
4+
5+
getopt('urt');
6+
7+
unless ($opt_r && $opt_t){
8+
print "Usage: $0 [ -u user] -r sample_count -t sleep_time\n";
9+
exit(0);
10+
}
11+
12+
my $i;
13+
my @proc = "";
14+
for ($i = 0; $i < $opt_r ; $i++){
15+
if ($opt_u){
16+
$proc = `/usr/sysv/bin/ps -u $opt_u `;
17+
$proc =~ s/^.*\n//;
18+
$proc =~ s/\s*(\d+).*\n/\1 /g;
19+
@proc = split(/\s+/,$proc);
20+
} else {
21+
opendir(my $dh, '/proc') || die "Cant't open /proc: $!";
22+
@proc = grep { /^[\d]+$/ } readdir($dh);
23+
closedir ($dh);
24+
}
25+
26+
foreach my $pid (@proc){
27+
my $command = "/usr/bin/procstack $pid";
28+
print `$command 2>/dev/null`;
29+
}
30+
select(undef, undef, undef, $opt_t);
31+
}

FlameGraph/demos/README

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Flame Graph demos gathered and created for the talk "Blazing Performance with
2+
Flame Graphs" at USENIX/LISA 2013.
3+
4+
These SVGs can not be seen on github directly; save them locally first (git
5+
clone or download), then open them in a browser (file://...).

0 commit comments

Comments
 (0)