Skip to content

[GSD-11964] clEnqueueReadBuffer fails on IGPUs for mapped host-buffer desinations #866

@FreddieWitherden

Description

@FreddieWitherden

Consider the following snippet which I believe to be a valid use of the OpenCL API:

#include <CL/cl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define CHECK(err, msg) \
if (err != CL_SUCCESS) { \
    fprintf(stderr, "%s failed (%d)\n", msg, err); \
    exit(1); \
}

int main(void) {
    cl_int err;

    cl_platform_id platform;
    CHECK(clGetPlatformIDs(1, &platform, NULL), "clGetPlatformIDs");

    cl_device_id device;
    CHECK(clGetDeviceIDs(platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, NULL),
          "clGetDeviceIDs");

    cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
    CHECK(err, "clCreateContext");

    cl_command_queue queue = clCreateCommandQueueWithProperties(context, device, 0, &err);
    CHECK(err, "clCreateCommandQueue");

    const size_t N = 16;
    const size_t bytes = N * sizeof(float);
    cl_mem dev_buf = clCreateBuffer(context, CL_MEM_READ_WRITE, bytes, NULL, &err);
    CHECK(err, "clCreateBuffer dev_buf");

    float pattern = 42.0f; // fill value
    CHECK(clEnqueueFillBuffer(queue, dev_buf, &pattern, sizeof(float),
                              0, bytes, 0, NULL, NULL),
          "clEnqueueFillBuffer");

    cl_mem host_buf = clCreateBuffer(context,
                                     CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
                                     bytes, NULL, &err);
    CHECK(err, "clCreateBuffer host_buf");

    void *host_ptr = clEnqueueMapBuffer(queue, host_buf, CL_TRUE,
                                        CL_MAP_WRITE, 0, bytes,
                                        0, NULL, NULL, &err);
    CHECK(err, "clEnqueueMapBuffer");

    err = clEnqueueReadBuffer(queue, dev_buf, CL_FALSE, 0, bytes, host_ptr,
                              0, NULL, NULL);
    CHECK(err, "clEnqueueReadBuffer (non-blocking)");

    CHECK(clFinish(queue), "clFinish");

    CHECK(clEnqueueUnmapMemObject(queue, host_buf, host_ptr, 0, NULL, NULL),
          "clEnqueueUnmapMemObject");
    clFinish(queue);

    clReleaseMemObject(dev_buf);
    clReleaseMemObject(host_buf);
    clReleaseCommandQueue(queue);
    clReleaseContext(context);

    return 0;
}

Running this on an NVIDIA GPU or an Intel A770m works as expected. However, on my IGPU (TigerLake-H GT1) it fails with:

clEnqueueReadBuffer (non-blocking) failed (-5)

Changing to an ordinary buffer (from malloc) appears to work, as does changing to blocking reads. My runtime version is 25.40.35563.4 and I am on a 6.17.5 kernel with the i915 module.

Metadata

Metadata

Assignees

No one assigned

    Labels

    OS: LinuxIssue specific to Linux distributions (Ubuntu, Fedora, RHEL, etc.)Status: BacklogConfirmed issue; pending scheduling or queued awaiting resources

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions