Skip to content

Commit 6132e9d

Browse files
committed
Fixes #141 and #142.
1 parent 3f4e31a commit 6132e9d

6 files changed

Lines changed: 74 additions & 81 deletions

File tree

Project.toml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "KomaMRI"
22
uuid = "6a340f8b-2cdf-4c04-99be-4953d9b66d0a"
33
authors = ["Carlos Castillo Passi <cncastillo@uc.cl>"]
4-
version = "0.7.1"
4+
version = "0.7.2"
55

66
[deps]
77
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
@@ -12,7 +12,6 @@ Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
1212
FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
1313
Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
1414
HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
15-
Hwloc = "0e44f5e4-bd66-52a0-8798-143a42290a1d"
1615
Interact = "c601a237-2ae4-5e1e-952c-7a85b0c7eef1"
1716
Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59"
1817
JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
@@ -37,7 +36,6 @@ CUDA = "3"
3736
FileIO = "1"
3837
Functors = "0.4"
3938
HDF5 = "0.16"
40-
Hwloc = "2"
4139
Interact = "0.10"
4240
Interpolations = "0.13, 0.14"
4341
JLD2 = "0.4"
Lines changed: 60 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,63 +1,63 @@
11
### KOMA Multi-Shot Spiral CPU ###
2-
4.468904 seconds (348.87 k allocations: 7.061 GiB, 3.55% gc time, 0.74% compilation time)
3-
4.445230 seconds (304.62 k allocations: 7.059 GiB, 3.22% gc time)
4-
4.444298 seconds (304.62 k allocations: 7.059 GiB, 3.22% gc time)
5-
4.438783 seconds (304.62 k allocations: 7.059 GiB, 3.23% gc time)
6-
4.438602 seconds (304.62 k allocations: 7.059 GiB, 3.16% gc time)
7-
4.421823 seconds (304.62 k allocations: 7.059 GiB, 3.17% gc time)
8-
4.422771 seconds (304.49 k allocations: 7.059 GiB, 3.13% gc time)
9-
4.418151 seconds (304.62 k allocations: 7.059 GiB, 3.05% gc time)
10-
4.419588 seconds (304.62 k allocations: 7.059 GiB, 3.12% gc time)
11-
4.423932 seconds (304.62 k allocations: 7.059 GiB, 3.05% gc time)
12-
4.438833 seconds (304.62 k allocations: 7.059 GiB, 3.07% gc time)
13-
4.438832 seconds (304.62 k allocations: 7.059 GiB, 3.04% gc time)
14-
4.486883 seconds (304.62 k allocations: 7.059 GiB, 3.07% gc time)
15-
4.700309 seconds (304.49 k allocations: 7.059 GiB, 2.98% gc time)
16-
4.691741 seconds (304.62 k allocations: 7.059 GiB, 2.94% gc time)
17-
4.752007 seconds (304.36 k allocations: 7.059 GiB, 2.83% gc time)
18-
4.466893 seconds (304.49 k allocations: 7.059 GiB, 3.01% gc time)
19-
4.464153 seconds (304.49 k allocations: 7.059 GiB, 3.03% gc time)
20-
4.493246 seconds (304.62 k allocations: 7.059 GiB, 3.19% gc time)
21-
4.457662 seconds (304.62 k allocations: 7.059 GiB, 3.00% gc time)
2+
1.791288 seconds (348.88 k allocations: 7.061 GiB, 6.10% gc time, 2.04% compilation time)
3+
1.788641 seconds (305.13 k allocations: 7.059 GiB, 3.22% gc time)
4+
1.797306 seconds (305.01 k allocations: 7.059 GiB, 3.97% gc time)
5+
1.760078 seconds (305.16 k allocations: 7.059 GiB, 3.21% gc time)
6+
1.780291 seconds (304.88 k allocations: 7.059 GiB, 4.14% gc time)
7+
1.803833 seconds (304.74 k allocations: 7.059 GiB, 4.24% gc time)
8+
1.783240 seconds (305.01 k allocations: 7.059 GiB, 3.56% gc time)
9+
1.779908 seconds (305.00 k allocations: 7.059 GiB, 3.28% gc time)
10+
1.765165 seconds (304.60 k allocations: 7.059 GiB, 3.57% gc time)
11+
1.797075 seconds (305.39 k allocations: 7.059 GiB, 3.16% gc time)
12+
1.761110 seconds (305.29 k allocations: 7.059 GiB, 2.69% gc time)
13+
1.771107 seconds (305.01 k allocations: 7.059 GiB, 2.82% gc time)
14+
1.790191 seconds (304.60 k allocations: 7.059 GiB, 3.04% gc time)
15+
1.743203 seconds (305.00 k allocations: 7.059 GiB, 2.63% gc time)
16+
1.811720 seconds (305.14 k allocations: 7.059 GiB, 2.85% gc time)
17+
1.775254 seconds (305.00 k allocations: 7.059 GiB, 3.58% gc time)
18+
1.768405 seconds (304.86 k allocations: 7.059 GiB, 2.96% gc time)
19+
2.550956 seconds (304.59 k allocations: 7.059 GiB, 35.09% gc time)
20+
1.737120 seconds (305.14 k allocations: 7.059 GiB, 2.59% gc time)
21+
1.774394 seconds (305.01 k allocations: 7.059 GiB, 3.46% gc time)
2222
### KOMA Multi-Shot Spiral GPU0 ###
23-
0.191547 seconds (377.14 k allocations: 23.680 MiB, 17.10% compilation time)
24-
0.150679 seconds (332.70 k allocations: 21.300 MiB)
25-
0.151408 seconds (332.65 k allocations: 21.297 MiB)
26-
0.150065 seconds (332.85 k allocations: 21.309 MiB)
27-
0.151362 seconds (332.89 k allocations: 21.311 MiB)
28-
0.147650 seconds (332.80 k allocations: 21.305 MiB)
29-
0.151219 seconds (332.80 k allocations: 21.306 MiB)
30-
0.150325 seconds (332.70 k allocations: 21.300 MiB)
31-
0.148065 seconds (332.84 k allocations: 21.308 MiB)
32-
0.152075 seconds (332.94 k allocations: 21.314 MiB)
33-
0.150261 seconds (332.70 k allocations: 21.300 MiB)
34-
0.150011 seconds (332.94 k allocations: 21.314 MiB)
35-
0.149172 seconds (332.80 k allocations: 21.306 MiB)
36-
0.148890 seconds (332.75 k allocations: 21.302 MiB)
37-
0.148253 seconds (332.94 k allocations: 21.315 MiB)
38-
0.152094 seconds (332.75 k allocations: 21.303 MiB)
39-
0.147646 seconds (332.75 k allocations: 21.302 MiB)
40-
0.151022 seconds (332.94 k allocations: 21.315 MiB)
41-
0.149571 seconds (332.75 k allocations: 21.303 MiB)
42-
0.147972 seconds (332.80 k allocations: 21.306 MiB)
23+
0.222617 seconds (410.00 k allocations: 24.362 MiB, 10.73% gc time, 26.09% compilation time)
24+
0.149812 seconds (355.18 k allocations: 21.673 MiB)
25+
0.150912 seconds (355.30 k allocations: 21.673 MiB)
26+
0.154048 seconds (355.06 k allocations: 21.669 MiB)
27+
0.153823 seconds (355.26 k allocations: 21.673 MiB)
28+
0.150777 seconds (355.44 k allocations: 21.681 MiB)
29+
0.151338 seconds (355.24 k allocations: 21.677 MiB)
30+
0.155108 seconds (355.14 k allocations: 21.671 MiB)
31+
0.152609 seconds (355.07 k allocations: 21.667 MiB)
32+
0.152995 seconds (355.29 k allocations: 21.678 MiB)
33+
0.149100 seconds (355.31 k allocations: 21.676 MiB)
34+
0.148364 seconds (355.13 k allocations: 21.671 MiB)
35+
0.147747 seconds (355.24 k allocations: 21.675 MiB)
36+
0.152601 seconds (355.35 k allocations: 21.675 MiB)
37+
0.148840 seconds (355.51 k allocations: 21.683 MiB)
38+
0.150223 seconds (355.15 k allocations: 21.673 MiB)
39+
0.151326 seconds (355.41 k allocations: 21.681 MiB)
40+
0.149905 seconds (355.60 k allocations: 21.687 MiB)
41+
0.150076 seconds (355.47 k allocations: 21.685 MiB)
42+
0.152087 seconds (354.98 k allocations: 21.662 MiB)
4343
### KOMA Multi-Shot Spiral GPU1 ###
44-
0.383762 seconds (387.52 k allocations: 24.026 MiB, 3.61% gc time, 8.37% compilation time)
45-
0.326810 seconds (343.31 k allocations: 21.657 MiB, 2.48% gc time)
46-
0.317470 seconds (343.31 k allocations: 21.658 MiB, 2.54% gc time)
47-
0.315657 seconds (343.32 k allocations: 21.659 MiB, 2.59% gc time)
48-
0.312953 seconds (343.41 k allocations: 21.663 MiB, 2.58% gc time)
49-
0.317549 seconds (343.50 k allocations: 21.669 MiB, 2.56% gc time)
50-
0.316872 seconds (343.41 k allocations: 21.663 MiB, 2.57% gc time)
51-
0.318535 seconds (343.50 k allocations: 21.669 MiB, 2.54% gc time)
52-
0.317094 seconds (343.45 k allocations: 21.666 MiB, 2.54% gc time)
53-
0.317818 seconds (343.45 k allocations: 21.666 MiB, 2.57% gc time)
54-
0.319701 seconds (343.45 k allocations: 21.666 MiB, 2.51% gc time)
55-
0.320600 seconds (343.36 k allocations: 21.660 MiB, 2.50% gc time)
56-
0.316938 seconds (343.36 k allocations: 21.660 MiB, 2.56% gc time)
57-
0.318238 seconds (343.31 k allocations: 21.657 MiB, 2.51% gc time)
58-
0.316747 seconds (343.45 k allocations: 21.666 MiB, 2.58% gc time)
59-
0.323734 seconds (343.40 k allocations: 21.663 MiB, 2.49% gc time)
60-
0.320136 seconds (343.40 k allocations: 21.663 MiB, 2.52% gc time)
61-
0.316931 seconds (343.50 k allocations: 21.669 MiB, 2.55% gc time)
62-
0.316066 seconds (343.40 k allocations: 21.663 MiB, 2.53% gc time)
63-
0.315290 seconds (343.37 k allocations: 21.663 MiB, 2.57% gc time)
44+
0.375906 seconds (410.02 k allocations: 24.403 MiB, 6.18% gc time, 8.52% compilation time)
45+
0.328961 seconds (365.91 k allocations: 22.043 MiB, 2.27% gc time)
46+
0.324912 seconds (365.93 k allocations: 22.044 MiB, 2.21% gc time)
47+
0.317966 seconds (365.93 k allocations: 22.043 MiB, 2.31% gc time)
48+
0.317574 seconds (365.99 k allocations: 22.047 MiB, 2.41% gc time)
49+
0.316693 seconds (365.93 k allocations: 22.043 MiB, 2.25% gc time)
50+
0.318666 seconds (365.97 k allocations: 22.047 MiB, 2.37% gc time)
51+
0.316730 seconds (365.83 k allocations: 22.038 MiB, 2.29% gc time)
52+
0.317443 seconds (365.69 k allocations: 22.031 MiB, 2.33% gc time)
53+
0.318769 seconds (365.94 k allocations: 22.044 MiB, 2.36% gc time)
54+
0.320683 seconds (365.92 k allocations: 22.043 MiB, 2.22% gc time)
55+
0.319358 seconds (365.88 k allocations: 22.041 MiB, 2.38% gc time)
56+
0.318528 seconds (365.78 k allocations: 22.035 MiB, 2.33% gc time)
57+
0.322693 seconds (365.73 k allocations: 22.034 MiB, 2.31% gc time)
58+
0.323850 seconds (365.83 k allocations: 22.038 MiB, 2.66% gc time)
59+
0.320972 seconds (365.85 k allocations: 22.040 MiB, 2.41% gc time)
60+
0.324835 seconds (365.77 k allocations: 22.035 MiB, 2.22% gc time)
61+
0.322921 seconds (365.93 k allocations: 22.044 MiB, 2.19% gc time)
62+
0.320311 seconds (365.80 k allocations: 22.037 MiB, 2.24% gc time)
63+
0.324080 seconds (365.67 k allocations: 22.031 MiB, 2.23% gc time)

examples/3.koma_paper/comparison_speed/MRiLab_speed.jl

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,22 +39,20 @@ if (ARGS == String[]) #No arguments, use defaults
3939
simParams = Dict{String,Any}(
4040
"Nblocks" => 20,
4141
"gpu" => true,
42-
"gpu_device" => 0,
43-
"Nthreads" => 1
42+
"gpu_device" => 0
4443
)
4544
else
4645
simParams = Dict{String,Any}(
4746
"Nblocks" => 20,
4847
"gpu" => ARGS[1] == "gpu" ? true : false,
49-
"gpu_device" => parse(Int64, ARGS[2]),
50-
"Nthreads" => parse(Int64, ARGS[3])
48+
"gpu_device" => parse(Int64, ARGS[2])
5149
)
5250
end
5351

5452
Nexp = 20
5553
raw = @suppress simulate(phantom, seq, sys; simParams) #warmup
5654
for i = 1:Nexp
57-
raw = simulate(phantom, seq, sys; simParams)
55+
local raw = simulate(phantom, seq, sys; simParams)
5856
end
5957
# plot_signal(raw; range=[50.5, 54]) #; show_sim_blocks=true)
6058

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/bin/bash
22
echo "### KOMA Multi-Shot Spiral CPU ###" | tee .out
3-
julia $1 ./MRiLab_speed.jl cpu 0 8 | tee -a .out #CPU
3+
julia -t 8 $1 ./MRiLab_speed.jl cpu 0 | tee -a .out #CPU
44
echo "### KOMA Multi-Shot Spiral GPU0 ###" | tee -a .out
5-
julia $1 ./MRiLab_speed.jl gpu 0 1 | tee -a .out #GPU0
5+
julia -t 8 $1 ./MRiLab_speed.jl gpu 0 | tee -a .out #GPU0
66
echo "### KOMA Multi-Shot Spiral GPU1 ###" | tee -a .out
7-
julia $1 ./MRiLab_speed.jl gpu 1 1 | tee -a .out #GPU1
7+
julia -t 8 $1 ./MRiLab_speed.jl gpu 1 | tee -a .out #GPU1

src/KomaMRI.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ using Scanf, ProgressMeter
1111
#Datatypes
1212
using Parameters
1313
#Simulation
14-
using CUDA, Interpolations, Hwloc
14+
using CUDA, Interpolations
1515
#Reconstruction
1616
using MRIReco, MRIFiles
1717
@reexport using MRIReco: RawAcquisitionData, AcquisitionData, reconstruction

src/simulation/SimulatorCore.jl

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
const Nphyscores = Hwloc.num_physical_cores()
2-
31
abstract type SimulationMethod end #get all available types by using subtypes(KomaMRI.SimulationMethod)
42
abstract type SpinStateRepresentation{T<:Real} end #get all available types by using subtypes(KomaMRI.SpinStateRepresentation)
53

@@ -28,7 +26,7 @@ separating the spins of the phantom `obj` in `Nthreads`.
2826
"""
2927
function run_spin_precession_parallel!(obj::Phantom{T}, seq::DiscreteSequence{T}, sig::AbstractArray{Complex{T}},
3028
Xt::SpinStateRepresentation{T}, sim_method::SimulationMethod;
31-
Nthreads=Nphyscores) where {T<:Real}
29+
Nthreads=Threads.nthreads()) where {T<:Real}
3230

3331
parts = kfoldperm(length(obj), Nthreads, type="ordered")
3432

@@ -60,7 +58,7 @@ different number threads to excecute the process.
6058
"""
6159
function run_spin_excitation_parallel!(obj::Phantom{T}, seq::DiscreteSequence{T},
6260
Xt::SpinStateRepresentation{T}, sim_method::SimulationMethod;
63-
Nthreads=Nphyscores) where {T<:Real}
61+
Nthreads=Threads.nthreads()) where {T<:Real}
6462

6563
parts = kfoldperm(length(obj), Nthreads; type="ordered")
6664

@@ -98,7 +96,7 @@ take advantage of CPU parallel processing.
9896
"""
9997
function run_sim_time_iter!(obj::Phantom, seq::DiscreteSequence, sig::AbstractArray{Complex{T}},
10098
Xt::SpinStateRepresentation{T}, sim_method::SimulationMethod;
101-
Nblocks=1, Nthreads=Nphyscores, parts=[1:length(seq)], w=nothing) where {T<:Real}
99+
Nblocks=1, Nthreads=Threads.nthreads(), parts=[1:length(seq)], w=nothing) where {T<:Real}
102100
# Simulation
103101
rfs = 0
104102
samples = 1
@@ -168,16 +166,15 @@ julia> plot_signal(ismrmrd)
168166
"""
169167
function simulate(obj::Phantom, seq::Sequence, sys::Scanner; simParams=Dict{String,Any}(), w=nothing)
170168
#Simulation parameter parsing, and setting defaults
171-
enable_gpu = get(simParams, "gpu", true)
169+
enable_gpu = get(simParams, "gpu", true); if enable_gpu check_use_cuda(); enable_gpu &= use_cuda[] end
172170
gpu_device = get(simParams, "gpu_device", 0)
173-
Nthreads = get(simParams, "Nthreads", enable_gpu ? 1 : Nphyscores)
171+
Nthreads = get(simParams, "Nthreads", enable_gpu ? 1 : Threads.nthreads())
174172
Nblocks = get(simParams, "Nblocks", 20)
175173
Δt = get(simParams, "Δt", 1e-3)
176174
Δt_rf = get(simParams, "Δt_rf", 5e-5)
177175
sim_method = get(simParams, "sim_method", Bloch())
178176
precision = get(simParams, "precision", "f32")
179-
return_type = get(simParams, "return_type", "raw")
180-
if enable_gpu check_use_cuda(); enable_gpu &= use_cuda[] end
177+
return_type = get(simParams, "return_type", "raw")
181178
# Simulation init
182179
t, Δt = get_uniform_times(seq, Δt; Δt_rf)
183180
t = [t; t[end]+Δt[end]]

0 commit comments

Comments
 (0)