sciware

Sciware

Finding Where Your Code Spends Time

Performance Troubleshooting and Profiling

https://github.com/flatironinstitute/sciware/tree/main/19_Profiling

Rules of Engagement

Goal:

Activities where participants all actively work to foster an environment which encourages participation across experience levels, coding language fluency, technology choices*, and scientific disciplines.

*though sometimes we try to expand your options

Rules of Engagement

(These will always be a work in progress and will be updated, clarified, or expanded as needed.)

Zoom Specific

Future Sessions

Today’s agenda

Profiling Intro

Dylan Simon

Goal

“Wall time”

Time units

Profilers

Strategies

Bite-sized pieces

Simple timing

Jeff Soules

Smallest and largest scales

time

Calling time

So on my laptop:

$ /usr/bin/time -p src/sciware/19_Profiling/python_example/simulation.py

What time shows

What time doesn’t show

Selected examples

Selected examples

Understanding time

timeit

How to timeit

timeit output

A/B Testing

A

Vector manipuations with built-in arrays and explicit loops:

python -m timeit "
a = range(100)
b = range(99, -1, -1)
c = []
for x, index in enumerate(a):
  c.append(x + b[index])
d = []
for x in a:
  d.append(6 * x)
print(c, file=sys.stderr)
print(d, file=sys.stderr)
"

B

The same vector manipulations with numpy arrays:

python -m timeit "
import numpy as np
a = np.arange(100)
b = np.arange(99, -1, -1)
c = a + b
d = 6 * a
print(c, file=sys.stderr)
print(d, file=sys.stderr)
"

I/O Makes A Difference

...
print(d, file=sys.stderr)
" 2>/dev/null

Use Representative Data

Small Changes, Big Effects

Complexity Never Goes Away

Complexity Never Goes Away

Python profiling

Robert Blackwell (SCC)

Examples

Python CPU profilers

Way too many to choose from, with different costs and benefits

cProfile - a python staple

Let’s try it out!

Julia profiling

James Smith (CCQ)

Examples

Using @time Pt. 1


function copy_cols(x::Vector{Float64})
    n = size(x, 1)
    out = zeros(Float64, n, n)
    for i = 1:n
        out[:, i] = x
    end
    out
end
    
    

function copy_rows(x::Vector{Float64})
    n = size(x, 1)
    out = zeros(Float64, n, n)
    for i = 1:n
        out[i, :] = x
    end
    out
end
      
    

Using @time Pt. 2

Using @time Pt. 3

➜  julia 01_timer.jl 
Copying vector to columns
  0.354818 seconds (2 allocations: 762.939 MiB, 1.12% gc time)
Copying vector to rows
  1.033734 seconds (2 allocations: 762.939 MiB, 3.08% gc time)

Using @profile Pt. 1


function add_no_prealloc(x::Vector{Float64})
    x_new = x .+ 3.0
    return x_new
end
    
    

function add_prealloc!(x::Vector{Float64})
    x .+= 3.0
    nothing
end
      
    

Using @profile Pt. 2

function main()
    x = zeros(10)

    println("\nShowing the profiling info")
    @profile (
        for i = 1:1e7
            add_no_prealloc(x)
            add_prealloc!(x)
        end
    )
    Profile.print(format = :tree, maxdepth = 12)

end

Using @profile Pt. 3


Showing the profiling info
Overhead ╎ [+additional indent] Count File:Line; Function
=========================================================
   ╎398 @Base/client.jl:495; _start()
   ╎ 398 @Base/client.jl:292; exec_options(opts::Base.JLOptions)
   ╎  398 @Base/Base.jl:418; include(mod::Module, _path::String)
   ╎   398 @Base/loading.jl:1253; _include(mapexpr::Function, mod::Module, _path::String)
   ╎    398 @Base/loading.jl:1196; include_string(mapexpr::typeof(identity), mod::Module, code::String, filename::String)
   ╎     398 @Base/boot.jl:373; eval
 12╎    ╎ 12  ...ojects/sciware/19_Profiling/julia_example/02_profiling.jl:5; add_no_prealloc(x::Vector{Float64})
  4╎    ╎ 4   ...ojects/sciware/19_Profiling/julia_example/02_profiling.jl:7; add_no_prealloc(x::Vector{Float64})
  1╎    ╎ 1   ...ojects/sciware/19_Profiling/julia_example/02_profiling.jl:10; add_prealloc!(x::Vector{Float64})
   ╎    ╎ 381 ...ojects/sciware/19_Profiling/julia_example/02_profiling.jl:38; main()
   ╎    ╎  381 ...k-src/usr/share/julia/stdlib/v1.7/Profile/src/Profile.jl:28; macro expansion
  7╎    ╎   332 ...jects/sciware/19_Profiling/julia_example/02_profiling.jl:40; macro expansion
  3╎    ╎    3   @Base/simdloop.jl:0; add_no_prealloc(x::Vector{Float64})
 10╎    ╎    10  ...jects/sciware/19_Profiling/julia_example/02_profiling.jl:5; add_no_prealloc(x::Vector{Float64})
   ╎    ╎    311 ...jects/sciware/19_Profiling/julia_example/02_profiling.jl:6; add_no_prealloc(x::Vector{Float64})
  1╎    ╎    1   ...jects/sciware/19_Profiling/julia_example/02_profiling.jl:7; add_no_prealloc(x::Vector{Float64})
  2╎    ╎   49  ...jects/sciware/19_Profiling/julia_example/02_profiling.jl:41; macro expansion
   ╎    ╎    41  ...ects/sciware/19_Profiling/julia_example/02_profiling.jl:11; add_prealloc!(x::Vector{Float64})
  6╎    ╎    6   ...ects/sciware/19_Profiling/julia_example/02_profiling.jl:12; add_prealloc!(x::Vector{Float64})
Total snapshots: 800

Using PProf Pt. 1


function complicated_func()
    # Pick parameters for our function
    p = [0.1, -0.5, 0.42, -3, 0.01, -0.2]
    n = 2000000
    
    # Setting up our data
    x = LinRange(0, 10, n)
    y = zeros(length(x))
    
    for i = 1:length(p)
        y .+= p[i] * x .^ i
    end
    
    # Add some noise
    y .+= rand(n) * 0.01
    
    # Setup X for solving
    X = zeros(Float64, (length(x), length(p)))
    for i = 1:length(p)
        X[:, i] = x .^ i
    end
    
    # Solve  Xβ=y
    β = X \ y
    error = (β - p) / norm(p)
    println("Relative error in coefficients ", error)
end      

Using PProf Pt. 2

function main()

    @profile complicated_func()
    Profile.print(format = :tree, maxdepth = 9)

    # Save the data for later
    save("_03_profile_data.jlprof", Profile.retrieve()...)

end

Using PProf Pt. 3

julia> using PProf, FlameGraphs, FileIO
julia> data = load("_03_profile_data.jlprof")
julia> g = flamegraph(data[1]; lidict=data[2])
julia> pprof(g)

Using PProf Pt. 3

Using PProf Pt. 4

Julia Wrap-Up

C/C++ profiling

with gprof

Dylan Simon

Building

Running

Example

19_Profiling/gprof_example

Takeaways

Alternatives

Questions

Survey