-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathfastpath.cpp
More file actions
198 lines (181 loc) · 7.99 KB
/
fastpath.cpp
File metadata and controls
198 lines (181 loc) · 7.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
/* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* SPDX-License-Identifier: MIT
*/
#include "buffer.h"
#include "context.h"
#include "fastpath.h"
#include "file.h"
#include "hip.h"
#include "hipfile.h"
#include "io.h"
#include "stats.h"
#include <cstdint>
#include <fcntl.h>
#include <hip/hip_runtime_api.h>
#include <linux/stat.h>
#include <stdexcept>
using namespace hipFile;
using namespace std;
/* The fastpath backend is used when:
* - The file has been opened with the O_DIRECT flag
* - if statx contains direct io information
* - statx indicates the file descriptor supports direct IO (see statx (2))
* - file_offset is a multiple of stx_dio_offset_align (see statx (2))
* - buffer_offset is a multiple of stx_dio_mem_align (see statx (2))
* - size is a multiple of stx_dio_offset_align (see statx (2))
* - if statx does not contain direct io information
* - file_offset is a multiple of 4096
* - buffer_offset is a multiple of 4096
* - size is a multiple of 4096
* - The buffer type is hipMemoryTypeDevice
*
* When using the fastpath the IO flows through the following
* - hipFile HIP wrapper (userspace)
* - repository: hipFile
* - file: hipfile/src/amd_detail/hip.cpp
* - methods: Hip::hipAmdFileRead(...), Hip::hipAmdFileWrite(...)
* - throws
* - std::system_error if status is non-zero (set by kfd)
* - Hip::runtime_error if hip runtime does not return hipSuccess or
* if hipFile was unable to find hipAmdFileRead/hipAmdFileWrite
* - HIP Runtime (userspace)
* - repository: rocm-systems
* - file: projects/clr/hipamd/src/hip_storage.cpp
* - functions: hipAmdFileRead(...), hipAmdFileWrite(...)
* - hipAmdFileRead & hipAmdFileWrite are not exposed through a public
* header so hipGetProcAddress() is used to lookup each function's function
* pointer
* - returns
* - hipSuccess if size is zero
* - This check should be removed
* - hipErrorInvalidDevice if unable to lookup current device
* - hipErrorUnknown if rocr runtime does not return success
* - ROCR Runtime (userspace)
* - repository: rocm-systems
* - file: projects/clr/rocclr/device/rocm/rocdevice.cpp
* - methods: Device::amdFileRead(...), Device::amdFileWrite(...)
* - Does not perform any input validation
* - Logs on every IO failure
* - size_copied and status are never modified
* - returns error if HSA does not return HSA_STATUS_SUCCESS
* - HSA (userspace)
* - repository: rocm-systems
* - file: projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp
* - hsa_amd_ais_file_read(...), hsa_amd_ais_file_write(...)
* - status and size_coppied untouched
* - returns
* - HSA_STATUS_ERROR_INVALID_ARGUMENT if size is zero
* - This check should be be removed
* - HSA_STATUS_ERROR_INVALID_ARGUMENT if devicePtr is NULL
* - This check should be be removed
* - HSA_STATUS_ERROR if hsaKmtAisReadWriteFile(...) does not return success
* - Thunk (userspace)
* - repository: rocm-systems
* - file: projects/rocr-runtime/libhsakmt/src/ais.c
* - functions: hsaKmtAisReadWriteFile(...),
* - If the buffer pointer, and io_size combination are invalid, return HSAKMT_STATUS_INVALID_PARAMETER
* - converts buffer pointer + io_size to a buffer handle (no alignment check)
* - size_copied and status are untouched
* - returns
* - HSAKMT_STATUS_INVALID_PARAMETER if
* - the buffer pointer + size combination are invalid
* - if AisFlags (read/write) are invalid
* - HSAKMT_STATUS_ERROR if the kfd ioctl does not return success
* - KFD (chardev) (kernel)
* - repository: ROCm
* - file: amdgpu/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
* - function: kfd_ioctl_ais(...)
* - returns
* - -EINVAL if op != READ or WRITE
* - -EINVAL if fd < 0
* - -EINVAL if unable to lookup device
* - -NODEV if AIS is not initialized on the device
* - -ESRCH if unable to bind the process to the device
* - -EINVAL if the buffer's domain is not AMDGPU_GEM_DOMAIN_VRAM
* - an error if it is unable to pin the buffer object
* - error is from amdgpu_amdkfd_gpuvm_pin_bo(...)
* - the result of the call to kfd_ais_rw_file(...)
* - the value returned is also written to the status variable
* - KFD (kernel)
* - repository: ROCm
* - file: amdgpu/drivers/gpu/drm/amd/amdkfd/kfd_ais.c
* - function: kfd_ais_rw_file(...)
* - Calls dev_dbg on every successful IO
* - May want to remove this
* - returns
* - -EINVAL if file_offset or size are not page aligned (4K)
* - This check should be removed /if/ the IO stack checks for alignment.
* - -EBADF unable to lookup the file object
* - -ENODEV if unable to lookup the storage device
* - -ENODEV if pcie_p2pdma_distance < 0
* - This should be changed to return -EREMOTE
* - -EINVAL if buffer object domain is not AMDGPU_GEM_DOMAIN_VRAM
* - an error if unable to create a sg_table for the buffer object
* - error is from amdgpu_amdkfd_gpuvm_get_sg_table(...)
* - -ENOMEM if unable to init a bvec
* - -EIO on short reads
* - Need to confirm & fix
*/
int
Fastpath::score(shared_ptr<IFile> file, shared_ptr<IBuffer> buffer, size_t size, hoff_t file_offset,
hoff_t buffer_offset) const
{
bool accept_io{true};
accept_io &= file->getUnbufferedFd().has_value();
accept_io &= buffer->getType() == hipMemoryTypeDevice;
accept_io &= 0 <= file_offset;
accept_io &= 0 <= buffer_offset;
uint64_t mem_align_mask{4096 - 1};
uint64_t offset_align_mask{4096 - 1};
#if defined(STATX_DIOALIGN)
const struct statx &stx{file->getStatx()};
accept_io &= !!(stx.stx_mask & STATX_DIOALIGN);
accept_io &= stx.stx_dio_offset_align && stx.stx_dio_mem_align;
mem_align_mask = stx.stx_dio_mem_align - 1;
offset_align_mask = stx.stx_dio_offset_align - 1;
#endif
accept_io &= !(size & offset_align_mask);
accept_io &= !(file_offset & static_cast<int64_t>(offset_align_mask));
auto buffer_address{reinterpret_cast<intptr_t>(buffer->getBuffer())};
accept_io &= !((buffer_address + buffer_offset) & static_cast<int64_t>(mem_align_mask));
return accept_io ? 100 : -1;
}
ssize_t
Fastpath::io(IoType type, shared_ptr<IFile> file, shared_ptr<IBuffer> buffer, size_t size, hoff_t file_offset,
hoff_t buffer_offset)
{
void *devptr{reinterpret_cast<void *>(reinterpret_cast<intptr_t>(buffer->getBuffer()) + buffer_offset)};
hipAmdFileHandle_t handle{};
size_t nbytes{};
handle.fd = file->getUnbufferedFd().value();
if (!paramsValid(buffer, size, file_offset, buffer_offset)) {
throw std::invalid_argument("The selected file or buffer region is invalid");
}
// Currently, when IO sizes > MAX_RW_COUNT are submitted to amdgpu/kfd an
// Illegal Seek error is returned. To avoid this, hipFile limits IO size to
// MAX_RW_COUNT. When amdgpu/kfd properly handles IO sizes > MAX_RW_COUNT
// this can be removed.
size = std::min(size, MAX_RW_COUNT);
switch (type) {
case IoType::Read:
nbytes = Context<Hip>::get()->hipAmdFileRead(handle, devptr, size, file_offset);
break;
case IoType::Write:
nbytes = Context<Hip>::get()->hipAmdFileWrite(handle, devptr, size, file_offset);
break;
default:
throw std::runtime_error("Invalid IoType");
}
switch (type) {
case IoType::Read:
statsAddFastPathRead(nbytes);
break;
case IoType::Write:
statsAddFastPathWrite(nbytes);
break;
default:
break;
}
return static_cast<ssize_t>(nbytes);
}