diff --git a/drivers/media/platform/raspberrypi/hevc_dec/hevc_d.c b/drivers/media/platform/raspberrypi/hevc_dec/hevc_d.c index 6ff8d37fb1e308..78ff5de9798051 100644 --- a/drivers/media/platform/raspberrypi/hevc_dec/hevc_d.c +++ b/drivers/media/platform/raspberrypi/hevc_dec/hevc_d.c @@ -2,7 +2,7 @@ /* * Raspberry Pi HEVC driver * - * Copyright (C) 2024 Raspberry Pi Ltd + * Copyright (C) 2025 Raspberry Pi Ltd * * Based on the Cedrus VPU driver, that is: * @@ -25,6 +25,10 @@ #include "hevc_d_video.h" #include "hevc_d_hw.h" +int hevc_d_v4l2_debug; +module_param_named(debug, hevc_d_v4l2_debug, int, 0644); +MODULE_PARM_DESC(debug, "Debug level 0-2"); + /* * Default /dev/videoN node number. * Deliberately avoid the very low numbers as these are often taken by webcams @@ -34,75 +38,41 @@ static int video_nr = 19; module_param(video_nr, int, 0644); MODULE_PARM_DESC(video_nr, "decoder video device number"); -static const struct hevc_d_control hevc_d_ctrls[] = { +static const struct v4l2_ctrl_config hevc_d_ctrls[] = { { - .cfg = { - .id = V4L2_CID_STATELESS_HEVC_SPS, - .ops = &hevc_d_hevc_sps_ctrl_ops, - }, - .required = false, + .id = V4L2_CID_STATELESS_HEVC_SPS, + .ops = &hevc_d_hevc_sps_ctrl_ops, }, { - .cfg = { - .id = V4L2_CID_STATELESS_HEVC_PPS, - .ops = &hevc_d_hevc_pps_ctrl_ops, - }, - .required = false, + .id = V4L2_CID_STATELESS_HEVC_PPS, + .ops = &hevc_d_hevc_pps_ctrl_ops, }, { - .cfg = { - .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX, - }, - .required = false, + .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX, }, { - .cfg = { - .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS, - }, - .required = true, + .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS, }, { - .cfg = { - .name = "Slice param array", - .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS, - .type = V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS, - .flags = V4L2_CTRL_FLAG_DYNAMIC_ARRAY, - .dims = { 0x1000 }, - }, - .required = true, + .name = "Slice param array", + .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS, + .type = V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS, + .flags = V4L2_CTRL_FLAG_DYNAMIC_ARRAY, + .dims = { 600 }, }, { - .cfg = { - .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, - .min = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED, - .max = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED, - .def = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED, - }, - .required = false, + .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, + .min = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED, + .max = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED, + .def = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED, }, { - .cfg = { - .id = V4L2_CID_STATELESS_HEVC_START_CODE, - .min = V4L2_STATELESS_HEVC_START_CODE_NONE, - .max = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B, - .def = V4L2_STATELESS_HEVC_START_CODE_NONE, - }, - .required = false, + .id = V4L2_CID_STATELESS_HEVC_START_CODE, + .min = V4L2_STATELESS_HEVC_START_CODE_NONE, + .max = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B, + .def = V4L2_STATELESS_HEVC_START_CODE_NONE, }, }; -#define HEVC_D_CTRLS_COUNT ARRAY_SIZE(hevc_d_ctrls) - -struct v4l2_ctrl *hevc_d_find_ctrl(struct hevc_d_ctx *ctx, u32 id) -{ - unsigned int i; - - for (i = 0; i < HEVC_D_CTRLS_COUNT; i++) - if (ctx->ctrls[i]->id == id) - return ctx->ctrls[i]; - - return NULL; -} - void *hevc_d_find_control_data(struct hevc_d_ctx *ctx, u32 id) { - struct v4l2_ctrl *const ctrl = hevc_d_find_ctrl(ctx, id); + struct v4l2_ctrl *const ctrl = v4l2_ctrl_find(ctx->fh.ctrl_handler, id); - return !ctrl ? NULL : ctrl->p_cur.p; + return ctrl ? ctrl->p_cur.p : NULL; } static int hevc_d_init_ctrls(struct hevc_d_dev *dev, struct hevc_d_ctx *ctx) @@ -111,31 +81,23 @@ static int hevc_d_init_ctrls(struct hevc_d_dev *dev, struct hevc_d_ctx *ctx) struct v4l2_ctrl *ctrl; unsigned int i; - v4l2_ctrl_handler_init(hdl, HEVC_D_CTRLS_COUNT); + v4l2_ctrl_handler_init(hdl, ARRAY_SIZE(hevc_d_ctrls)); if (hdl->error) { v4l2_err(&dev->v4l2_dev, "Failed to initialize control handler\n"); return hdl->error; } - ctx->ctrls = kzalloc(HEVC_D_CTRLS_COUNT * sizeof(ctrl), GFP_KERNEL); - if (!ctx->ctrls) - return -ENOMEM; - - for (i = 0; i < HEVC_D_CTRLS_COUNT; i++) { - ctrl = v4l2_ctrl_new_custom(hdl, &hevc_d_ctrls[i].cfg, - ctx); + for (i = 0; i < ARRAY_SIZE(hevc_d_ctrls); i++) { + ctrl = v4l2_ctrl_new_custom(hdl, &hevc_d_ctrls[i], ctx); if (hdl->error) { v4l2_err(&dev->v4l2_dev, "Failed to create new custom control id=%#x\n", - hevc_d_ctrls[i].cfg.id); + hevc_d_ctrls[i].id); v4l2_ctrl_handler_free(hdl); - kfree(ctx->ctrls); return hdl->error; } - - ctx->ctrls[i] = ctrl; } ctx->fh.ctrl_handler = hdl; @@ -144,84 +106,15 @@ static int hevc_d_init_ctrls(struct hevc_d_dev *dev, struct hevc_d_ctx *ctx) return 0; } -static int hevc_d_request_validate(struct media_request *req) -{ - struct media_request_object *obj; - struct v4l2_ctrl_handler *parent_hdl, *hdl; - struct hevc_d_ctx *ctx = NULL; - struct v4l2_ctrl *ctrl_test; - unsigned int count; - unsigned int i; - - list_for_each_entry(obj, &req->objects, list) { - struct vb2_buffer *vb; - - if (vb2_request_object_is_buffer(obj)) { - vb = container_of(obj, struct vb2_buffer, req_obj); - ctx = vb2_get_drv_priv(vb->vb2_queue); - - break; - } - } - - if (!ctx) - return -ENOENT; - - count = vb2_request_buffer_cnt(req); - if (!count) { - v4l2_info(&ctx->dev->v4l2_dev, - "No buffer was provided with the request\n"); - return -ENOENT; - } else if (count > 1) { - v4l2_info(&ctx->dev->v4l2_dev, - "More than one buffer was provided with the request\n"); - return -EINVAL; - } - - parent_hdl = &ctx->hdl; - - hdl = v4l2_ctrl_request_hdl_find(req, parent_hdl); - if (!hdl) { - v4l2_info(&ctx->dev->v4l2_dev, "Missing codec control(s)\n"); - return -ENOENT; - } - - for (i = 0; i < HEVC_D_CTRLS_COUNT; i++) { - if (!hevc_d_ctrls[i].required) - continue; - - ctrl_test = - v4l2_ctrl_request_hdl_ctrl_find(hdl, - hevc_d_ctrls[i].cfg.id); - if (!ctrl_test) { - v4l2_info(&ctx->dev->v4l2_dev, - "Missing required codec control %d: id=%#x\n", - i, hevc_d_ctrls[i].cfg.id); - v4l2_ctrl_request_hdl_put(hdl); - return -ENOENT; - } - } - - v4l2_ctrl_request_hdl_put(hdl); - - return vb2_request_validate(req); -} - static int hevc_d_open(struct file *file) { struct hevc_d_dev *dev = video_drvdata(file); struct hevc_d_ctx *ctx = NULL; int ret; - if (mutex_lock_interruptible(&dev->dev_mutex)) - return -ERESTARTSYS; - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) { - mutex_unlock(&dev->dev_mutex); - ret = -ENOMEM; - goto err_unlock; - } + if (!ctx) + return -ENOMEM; mutex_init(&ctx->ctx_mutex); @@ -229,15 +122,11 @@ static int hevc_d_open(struct file *file) file->private_data = &ctx->fh; ctx->dev = dev; - ret = hevc_d_init_ctrls(dev, ctx); - if (ret) - goto err_free; - ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(dev->m2m_dev, ctx, &hevc_d_queue_init); if (IS_ERR(ctx->fh.m2m_ctx)) { ret = PTR_ERR(ctx->fh.m2m_ctx); - goto err_ctrls; + goto err_free; } /* The only bit of format info that we can guess now is H265 src @@ -245,45 +134,36 @@ static int hevc_d_open(struct file *file) */ hevc_d_prepare_src_format(&ctx->src_fmt); - v4l2_fh_add(&ctx->fh, file); - - mutex_unlock(&dev->dev_mutex); + ret = hevc_d_init_ctrls(dev, ctx); + if (ret) + goto err_ctx; + v4l2_fh_add(&ctx->fh, file); return 0; -err_ctrls: - v4l2_ctrl_handler_free(&ctx->hdl); - kfree(ctx->ctrls); +err_ctx: + v4l2_m2m_ctx_release(ctx->fh.m2m_ctx); err_free: mutex_destroy(&ctx->ctx_mutex); kfree(ctx); -err_unlock: - mutex_unlock(&dev->dev_mutex); - return ret; } static int hevc_d_release(struct file *file) { - struct hevc_d_dev *dev = video_drvdata(file); struct hevc_d_ctx *ctx = container_of(file->private_data, struct hevc_d_ctx, fh); - mutex_lock(&dev->dev_mutex); - v4l2_fh_del(&ctx->fh, file); - v4l2_m2m_ctx_release(ctx->fh.m2m_ctx); v4l2_ctrl_handler_free(&ctx->hdl); - kfree(ctx->ctrls); + + v4l2_m2m_ctx_release(ctx->fh.m2m_ctx); v4l2_fh_exit(&ctx->fh); mutex_destroy(&ctx->ctx_mutex); kfree(ctx); - - mutex_unlock(&dev->dev_mutex); - return 0; } @@ -317,7 +197,7 @@ static const struct v4l2_m2m_ops hevc_d_m2m_ops = { }; static const struct media_device_ops hevc_d_m2m_media_ops = { - .req_validate = hevc_d_request_validate, + .req_validate = vb2_request_validate, .req_queue = hevc_d_media_req_queue, }; @@ -335,7 +215,6 @@ static int hevc_d_probe(struct platform_device *pdev) dev->dev = &pdev->dev; dev->pdev = pdev; - ret = 0; ret = hevc_d_hw_probe(dev); if (ret) { dev_err(&pdev->dev, "Failed to probe hardware - %d\n", ret); @@ -425,11 +304,9 @@ static void hevc_d_remove(struct platform_device *pdev) { struct hevc_d_dev *dev = platform_get_drvdata(pdev); - if (media_devnode_is_registered(dev->mdev.devnode)) { - media_device_unregister(&dev->mdev); - v4l2_m2m_unregister_media_controller(dev->m2m_dev); - media_device_cleanup(&dev->mdev); - } + media_device_unregister(&dev->mdev); + v4l2_m2m_unregister_media_controller(dev->m2m_dev); + media_device_cleanup(&dev->mdev); v4l2_m2m_release(dev->m2m_dev); video_unregister_device(&dev->vfd); @@ -439,7 +316,8 @@ static void hevc_d_remove(struct platform_device *pdev) } static const struct of_device_id hevc_d_dt_match[] = { - { .compatible = "raspberrypi,hevc-dec", }, + { .compatible = "brcm,bcm2711-hevc-dec", }, + { .compatible = "brcm,bcm2712-hevc-dec", }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, hevc_d_dt_match); diff --git a/drivers/media/platform/raspberrypi/hevc_dec/hevc_d.h b/drivers/media/platform/raspberrypi/hevc_dec/hevc_d.h index 37c2d1612d2a4b..61446165d5e58e 100644 --- a/drivers/media/platform/raspberrypi/hevc_dec/hevc_d.h +++ b/drivers/media/platform/raspberrypi/hevc_dec/hevc_d.h @@ -2,7 +2,7 @@ /* * Raspberry Pi HEVC driver * - * Copyright (C) 2024 Raspberry Pi Ltd + * Copyright (C) 2025 Raspberry Pi Ltd * * Based on the Cedrus VPU driver, that is: * @@ -22,28 +22,44 @@ #include #include -#define HEVC_D_DEC_ENV_COUNT 6 +/* Decoder limits */ +#define HEVC_D_MIN_WIDTH 16U +#define HEVC_D_MIN_HEIGHT 16U +#define HEVC_D_DEFAULT_WIDTH 1920U +#define HEVC_D_DEFAULT_HEIGHT 1088U +#define HEVC_D_MAX_WIDTH 4096U +#define HEVC_D_MAX_HEIGHT 4096U + +/* + * Q sizes of 3 give one entry being prepared, one waiting and + * one processing. Testing shows no advantage to greater Q depths + */ + +/* + * Max processing Q size Phase 0 -> Phase 1 + * This is per open context + */ #define HEVC_D_P1BUF_COUNT 3 +/* + * Max processing Q size Phase 1 -> Phase 2 + * This is per device + */ #define HEVC_D_P2BUF_COUNT 3 +/* + * Number of decode environments a context has + * There is no independent flow control on this number so it must be + * capable of holding P1 + P2 entries. + */ +#define HEVC_D_DEC_ENV_COUNT (HEVC_D_P1BUF_COUNT + HEVC_D_P2BUF_COUNT) #define HEVC_D_NAME "rpi-hevc-dec" -#define HEVC_D_CAPABILITY_UNTILED BIT(0) -#define HEVC_D_CAPABILITY_H265_DEC BIT(1) - -#define HEVC_D_QUIRK_NO_DMA_OFFSET BIT(0) - enum hevc_d_irq_status { HEVC_D_IRQ_NONE, HEVC_D_IRQ_ERROR, HEVC_D_IRQ_OK, }; -struct hevc_d_control { - struct v4l2_ctrl_config cfg; - unsigned char required:1; -}; - struct hevc_d_h265_run { u32 slice_ents; const struct v4l2_ctrl_hevc_sps *sps; @@ -67,9 +83,9 @@ struct hevc_d_buffer { struct hevc_d_dec_state; struct hevc_d_dec_env; -struct hevc_d_gptr { +struct hevc_d_hwbuf { size_t size; - __u8 *ptr; + u8 *ptr; dma_addr_t addr; unsigned long attrs; }; @@ -88,9 +104,6 @@ struct hevc_d_ctx { struct v4l2_pix_format_mplane dst_fmt; int dst_fmt_set; - int src_stream_on; - int dst_stream_on; - /* * fatal_err is set if an error has occurred s.t. decode cannot * continue (such as running out of CMA) @@ -101,7 +114,6 @@ struct hevc_d_ctx { struct mutex ctx_mutex; struct v4l2_ctrl_handler hdl; - struct v4l2_ctrl **ctrls; /* * state contains stuff that is only needed in phase0 @@ -116,12 +128,11 @@ struct hevc_d_ctx { struct hevc_d_dec_env *dec_pool; - unsigned int p1idx; atomic_t p1out; unsigned int p2idx; - struct hevc_d_gptr pu_bufs[HEVC_D_P2BUF_COUNT]; - struct hevc_d_gptr coeff_bufs[HEVC_D_P2BUF_COUNT]; + struct hevc_d_hwbuf pu_bufs[HEVC_D_P2BUF_COUNT]; + struct hevc_d_hwbuf coeff_bufs[HEVC_D_P2BUF_COUNT]; /* Spinlock protecting aux_free */ spinlock_t aux_lock; @@ -133,12 +144,6 @@ struct hevc_d_ctx { unsigned int colmv_picsize; }; -struct hevc_d_variant { - unsigned int capabilities; - unsigned int quirks; - unsigned int mod_rate; -}; - struct hevc_d_hw_irq_ent; #define HEVC_D_ICTL_ENABLE_UNLIMITED (-1) @@ -163,12 +168,11 @@ struct hevc_d_dev { struct v4l2_device v4l2_dev; struct video_device vfd; struct media_device mdev; - struct media_pad pad[2]; struct platform_device *pdev; struct device *dev; struct v4l2_m2m_dev *m2m_dev; - /* Device file mutex */ + /* Video device file (vfd) mutex */ struct mutex dev_mutex; void __iomem *base_irq; @@ -177,12 +181,14 @@ struct hevc_d_dev { struct clk *clock; unsigned long max_clock_rate; - int cache_align; - struct hevc_d_hw_irq_ctrl ic_active1; struct hevc_d_hw_irq_ctrl ic_active2; }; +extern int hevc_d_v4l2_debug; +#define hevc_d_dbg(level, dev, fmt, arg...)\ + v4l2_dbg((level), hevc_d_v4l2_debug, (dev), fmt, ## arg) + struct v4l2_ctrl *hevc_d_find_ctrl(struct hevc_d_ctx *ctx, u32 id); void *hevc_d_find_control_data(struct hevc_d_ctx *ctx, u32 id); diff --git a/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_h265.c b/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_h265.c index 6c9a9d6db15a54..b86c935e8b3ae9 100644 --- a/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_h265.c +++ b/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_h265.c @@ -2,7 +2,7 @@ /* * Raspberry Pi HEVC driver * - * Copyright (C) 2020 Raspberry Pi Ltd + * Copyright (C) 2025 Raspberry Pi Ltd * * Based on the Cedrus VPU driver, that is: * @@ -21,6 +21,15 @@ #include "hevc_d_hw.h" #include "hevc_d_video.h" +/* Maximum length of command buffer before we rate it an error */ +#define CMD_BUFFER_SIZE_MAX 0x100000 + +/* Initial size of command FIFO in commands. + * The FIFO will be extended if this value is exceeded but 8192 seems to + * deal with all streams found in the wild. + */ +#define CMD_BUFFER_SIZE_INIT 8192 + enum hevc_slice_type { HEVC_SLICE_B = 0, HEVC_SLICE_P = 1, @@ -29,62 +38,62 @@ enum hevc_slice_type { enum hevc_layer { L0 = 0, L1 = 1 }; -static int gptr_alloc(struct hevc_d_dev *const dev, struct hevc_d_gptr *gptr, - size_t size, unsigned long attrs) +static int hwbuf_alloc(struct hevc_d_dev *const dev, struct hevc_d_hwbuf *hwbuf, + size_t size, unsigned long attrs) { - gptr->size = size; - gptr->attrs = attrs; - gptr->addr = 0; - gptr->ptr = dma_alloc_attrs(dev->dev, gptr->size, &gptr->addr, - GFP_KERNEL, gptr->attrs); - return !gptr->ptr ? -ENOMEM : 0; + hwbuf->size = size; + hwbuf->attrs = attrs; + hwbuf->addr = 0; + hwbuf->ptr = dma_alloc_attrs(dev->dev, hwbuf->size, &hwbuf->addr, + GFP_KERNEL, hwbuf->attrs); + return !hwbuf->ptr ? -ENOMEM : 0; } -static void gptr_free(struct hevc_d_dev *const dev, - struct hevc_d_gptr *const gptr) +static void hwbuf_free(struct hevc_d_dev *const dev, + struct hevc_d_hwbuf *const hwbuf) { - if (gptr->ptr) - dma_free_attrs(dev->dev, gptr->size, gptr->ptr, gptr->addr, - gptr->attrs); - gptr->size = 0; - gptr->ptr = NULL; - gptr->addr = 0; - gptr->attrs = 0; + if (hwbuf->ptr) + dma_free_attrs(dev->dev, hwbuf->size, hwbuf->ptr, hwbuf->addr, + hwbuf->attrs); + hwbuf->size = 0; + hwbuf->ptr = NULL; + hwbuf->addr = 0; + hwbuf->attrs = 0; } /* Realloc but do not copy * * Frees then allocs. - * If the alloc fails then it attempts to re-allocote the old size - * On error then check gptr->ptr to determine if anything is currently + * If the alloc fails then it attempts to re-allocate the old size + * On error check hwbuf->ptr to determine if anything is currently * allocated. */ -static int gptr_realloc_new(struct hevc_d_dev * const dev, - struct hevc_d_gptr * const gptr, size_t size) +static int hwbuf_realloc_new(struct hevc_d_dev * const dev, + struct hevc_d_hwbuf * const hwbuf, size_t size) { - const size_t old_size = gptr->size; + const size_t old_size = hwbuf->size; - if (size == gptr->size) + if (size == hwbuf->size) return 0; - if (gptr->ptr) - dma_free_attrs(dev->dev, gptr->size, gptr->ptr, - gptr->addr, gptr->attrs); - - gptr->addr = 0; - gptr->size = size; - gptr->ptr = dma_alloc_attrs(dev->dev, gptr->size, - &gptr->addr, GFP_KERNEL, gptr->attrs); - - if (!gptr->ptr) { - gptr->addr = 0; - gptr->size = old_size; - gptr->ptr = dma_alloc_attrs(dev->dev, gptr->size, - &gptr->addr, GFP_KERNEL, gptr->attrs); - if (!gptr->ptr) { - gptr->size = 0; - gptr->addr = 0; - gptr->attrs = 0; + if (hwbuf->ptr) + dma_free_attrs(dev->dev, hwbuf->size, hwbuf->ptr, + hwbuf->addr, hwbuf->attrs); + + hwbuf->addr = 0; + hwbuf->size = size; + hwbuf->ptr = dma_alloc_attrs(dev->dev, hwbuf->size, + &hwbuf->addr, GFP_KERNEL, hwbuf->attrs); + + if (!hwbuf->ptr) { + hwbuf->addr = 0; + hwbuf->size = old_size; + hwbuf->ptr = dma_alloc_attrs(dev->dev, hwbuf->size, + &hwbuf->addr, GFP_KERNEL, hwbuf->attrs); + if (!hwbuf->ptr) { + hwbuf->size = 0; + hwbuf->addr = 0; + hwbuf->attrs = 0; } return -ENOMEM; } @@ -92,6 +101,24 @@ static int gptr_realloc_new(struct hevc_d_dev * const dev, return 0; } +/* Realloc with copy */ +static int hwbuf_realloc_copy(struct hevc_d_dev * const dev, + struct hevc_d_hwbuf * const hwbuf, size_t newsize) +{ + struct hevc_d_hwbuf bnew; + + if (newsize <= hwbuf->size) + return 0; + + if (hwbuf_alloc(dev, &bnew, newsize, hwbuf->attrs)) + return -ENOMEM; + + memcpy(bnew.ptr, hwbuf->ptr, hwbuf->size); + hwbuf_free(dev, hwbuf); + *hwbuf = bnew; + return 0; +} + static size_t next_size(const size_t x) { return hevc_d_round_up_size(x + 1); @@ -106,16 +133,11 @@ static size_t next_size(const size_t x) #define HEVC_MAX_REFS V4L2_HEVC_DPB_ENTRIES_NUM_MAX -struct rpi_cmd { - u32 addr; - u32 data; -} __packed; - struct hevc_d_q_aux { unsigned int refcount; unsigned int q_index; struct hevc_d_q_aux *next; - struct hevc_d_gptr col; + struct hevc_d_hwbuf col; }; enum hevc_d_decode_state { @@ -125,16 +147,22 @@ enum hevc_d_decode_state { HEVC_D_DECODE_END, }; +/* + * Decode environment + * One dec_env is allocated per active frame decode and holds state that + * needs to persist between decode phases or callbacks + */ struct hevc_d_dec_env { struct hevc_d_ctx *ctx; struct hevc_d_dec_env *next; enum hevc_d_decode_state state; unsigned int decode_order; - int p1_status; /* P1 status - what to realloc */ + int p1_status; /* Phase 1 status - what to realloc */ - struct rpi_cmd *cmd_fifo; - unsigned int cmd_len, cmd_max; + struct hevc_d_hwbuf cmd; + unsigned int cmd_len; + unsigned int cmd_max; unsigned int num_slice_msgs; unsigned int pic_width_in_ctbs_y; unsigned int pic_height_in_ctbs_y; @@ -167,9 +195,6 @@ struct hevc_d_dec_env { struct hevc_d_q_aux *frame_aux; struct hevc_d_q_aux *col_aux; - dma_addr_t cmd_addr; - size_t cmd_size; - dma_addr_t pu_base_vc; dma_addr_t coeff_base_vc; u32 pu_stride; @@ -179,10 +204,15 @@ struct hevc_d_dec_env { u16 slice_msgs[SLICE_MSGS_MAX]; u8 scaling_factors[NUM_SCALING_FACTORS]; - struct media_request *req_pin; + struct media_request *request; struct hevc_d_hw_irq_ent irq_ent; }; +/* + * Decode state + * Decode information that persists between frame decodes but is only + * used or changed in phase 0 (setup) + */ struct hevc_d_dec_state { struct v4l2_ctrl_hevc_sps sps; struct v4l2_ctrl_hevc_pps pps; @@ -200,12 +230,13 @@ struct hevc_d_dec_state { int *ctb_addr_rs_to_ts; int *ctb_addr_ts_to_rs; - /* Aux starage for DPB */ + /* Aux storage for DPB */ struct hevc_d_q_aux *ref_aux[HEVC_MAX_REFS]; struct hevc_d_q_aux *frame_aux; /* Slice vars */ unsigned int slice_idx; + unsigned int idx_inuse; bool slice_temporal_mvp; /* Slice flag but constant for frame */ bool use_aux; bool mk_aux; @@ -216,8 +247,9 @@ struct hevc_d_dec_state { const struct v4l2_ctrl_hevc_decode_params *dec; unsigned int nb_refs[2]; unsigned int slice_qp; - unsigned int max_num_merge_cand; // 0 if I-slice + unsigned int max_num_merge_cand; /* 0 if I-slice */ bool dependent_slice_segment_flag; + u32 data_len; unsigned int start_ts; /* slice_segment_addr -> ts */ unsigned int start_ctb_x; /* CTB X,Y of start_ts */ @@ -226,57 +258,38 @@ struct hevc_d_dec_state { unsigned int prev_ctb_y; }; -static inline int clip_int(const int x, const int lo, const int hi) -{ - return x < lo ? lo : x > hi ? hi : x; -} - /* Phase 1 command and bit FIFOs */ static int cmds_check_space(struct hevc_d_dec_env *const de, unsigned int n) { - struct rpi_cmd *a; unsigned int newmax; - if (n > 0x100000) { - v4l2_err(&de->ctx->dev->v4l2_dev, - "%s: n %u implausible\n", __func__, n); - return -ENOMEM; - } - if (de->cmd_len + n <= de->cmd_max) return 0; newmax = roundup_pow_of_two(de->cmd_len + n); + if (newmax > CMD_BUFFER_SIZE_MAX) { + v4l2_err(&de->ctx->dev->v4l2_dev, + "%s: n %u implausible\n", __func__, newmax); + return -ENOMEM; + } - a = krealloc(de->cmd_fifo, newmax * sizeof(struct rpi_cmd), - GFP_KERNEL); - if (!a) { + if (hwbuf_realloc_copy(de->ctx->dev, &de->cmd, newmax * sizeof(u64))) { v4l2_err(&de->ctx->dev->v4l2_dev, "Failed cmd buffer realloc from %u to %u\n", de->cmd_max, newmax); return -ENOMEM; } - v4l2_info(&de->ctx->dev->v4l2_dev, - "cmd buffer realloc from %u to %u\n", de->cmd_max, newmax); + hevc_d_dbg(1, &de->ctx->dev->v4l2_dev, + "cmd buffer realloc from %u to %u\n", de->cmd_max, newmax); - de->cmd_fifo = a; de->cmd_max = newmax; return 0; } -// ???? u16 addr - put in u32 static void p1_apb_write(struct hevc_d_dec_env *const de, const u16 addr, const u32 data) { - if (de->cmd_len >= de->cmd_max) { - v4l2_err(&de->ctx->dev->v4l2_dev, - "%s: Overflow @ %d\n", __func__, de->cmd_len); - return; - } - - de->cmd_fifo[de->cmd_len].addr = addr; - de->cmd_fifo[de->cmd_len].data = data; - + WRITE_ONCE(((u64 *)de->cmd.ptr)[de->cmd_len], addr | ((u64)data << 32)); de->cmd_len++; } @@ -307,7 +320,7 @@ static void aux_q_free(struct hevc_d_ctx *const ctx, { struct hevc_d_dev *const dev = ctx->dev; - gptr_free(dev, &aq->col); + hwbuf_free(dev, &aq->col); kfree(aq); } @@ -320,8 +333,8 @@ static struct hevc_d_q_aux *aux_q_alloc(struct hevc_d_ctx *const ctx, if (!aq) return NULL; - if (gptr_alloc(dev, &aq->col, ctx->colmv_picsize, - DMA_ATTR_FORCE_CONTIGUOUS | DMA_ATTR_NO_KERNEL_MAPPING)) + if (hwbuf_alloc(dev, &aq->col, ctx->colmv_picsize, + DMA_ATTR_FORCE_CONTIGUOUS | DMA_ATTR_NO_KERNEL_MAPPING)) goto fail; /* @@ -506,7 +519,7 @@ static void write_prob(struct hevc_d_dec_env *const de, s->sh->slice_type != HEVC_SLICE_I) ? s->sh->slice_type + 1 : 2 - s->sh->slice_type; - const int q = clip_int(s->slice_qp, 0, 51); + const int q = clamp((int)s->slice_qp, 0, 51); const u8 *p = prob_init[init_type]; u8 dst[RPI_PROB_ARRAY_SIZE]; unsigned int i; @@ -560,21 +573,15 @@ static inline __u32 dma_to_axi_addr(dma_addr_t a) static int write_bitstream(struct hevc_d_dec_env *const de, const struct hevc_d_dec_state *const s) { - // FIXME!!!! - // Note that FFmpeg V4L2 does not remove emulation prevention bytes, - // so this is matched in the configuration here. - // Whether that is the correct behaviour or not is not clear in the - // spec. + /* V4L2 always has emulation prevention bytes in the stream */ const int rpi_use_emu = 1; - unsigned int offset = s->sh->data_byte_offset; - const unsigned int len = (s->sh->bit_size + 7) / 8 - offset; - dma_addr_t addr = s->src_addr + offset; - - offset = addr & 63; + const unsigned int len = s->data_len; + const dma_addr_t addr = s->src_addr + s->sh->data_byte_offset; + const unsigned int offset = addr & 63; p1_apb_write(de, RPI_BFBASE, dma_to_axi_addr(addr)); p1_apb_write(de, RPI_BFNUM, len); - p1_apb_write(de, RPI_BFCONTROL, offset + (1 << 7)); // Stop + p1_apb_write(de, RPI_BFCONTROL, offset + (1 << 7)); /* Stop */ p1_apb_write(de, RPI_BFCONTROL, offset + (rpi_use_emu << 6)); return 0; } @@ -698,7 +705,10 @@ static void program_slicecmds(struct hevc_d_dec_env *const de, p1_apb_write(de, 0x4000 + 4 * i, de->slice_msgs[i] & 0xffff); } -/* NoBackwardPredictionFlag 8.3.5 - Simply checks POCs */ +/* NoBackwardPredictionFlag 8.3.5 - Simply checks POCs of the frames referenced + * by the idx array against cur_poc. Needs to be called twice (with L0 & L1) to + * get NoBackwardPredictionFlag. + */ static int has_backward(const struct v4l2_hevc_dpb_entry *const dpb, const __u8 *const idx, const unsigned int n, const s32 cur_poc) @@ -1106,7 +1116,7 @@ static int tile_entry_fill(struct hevc_d_dec_env *const de, 2 | (last_x << 5) | (last_y << 18)); p1_apb_write(de, RPI_TRANSFER, PROB_RELOAD); - // Inc tile + /* Inc tile */ if (++t_x >= s->tile_width) { t_x = 0; ++t_y; @@ -1411,24 +1421,6 @@ static int updated_ps(struct hevc_d_dec_state *const s) return -ENOMEM; } -static int write_cmd_buffer(struct hevc_d_dev *const dev, - struct hevc_d_dec_env *const de, - const struct hevc_d_dec_state *const s) -{ - const size_t cmd_size = ALIGN(de->cmd_len * sizeof(de->cmd_fifo[0]), - dev->cache_align); - - de->cmd_addr = dma_map_single(dev->dev, de->cmd_fifo, - cmd_size, DMA_TO_DEVICE); - if (dma_mapping_error(dev->dev, de->cmd_addr)) { - v4l2_err(&dev->v4l2_dev, - "Map cmd buffer (%zu): FAILED\n", cmd_size); - return -ENOMEM; - } - de->cmd_size = cmd_size; - return 0; -} - static void setup_colmv(struct hevc_d_ctx *const ctx, struct hevc_d_run *run, struct hevc_d_dec_state *const s) { @@ -1461,12 +1453,6 @@ static void dec_env_delete(struct hevc_d_dec_env *const de) struct hevc_d_ctx * const ctx = de->ctx; unsigned long lock_flags; - if (de->cmd_size) { - dma_unmap_single(ctx->dev->dev, de->cmd_addr, de->cmd_size, - DMA_TO_DEVICE); - de->cmd_size = 0; - } - aux_q_release(ctx, &de->frame_aux); aux_q_release(ctx, &de->col_aux); @@ -1484,11 +1470,8 @@ static void dec_env_uninit(struct hevc_d_ctx *const ctx) unsigned int i; if (ctx->dec_pool) { - for (i = 0; i != HEVC_D_DEC_ENV_COUNT; ++i) { - struct hevc_d_dec_env *const de = ctx->dec_pool + i; - - kfree(de->cmd_fifo); - } + for (i = 0; i != HEVC_D_DEC_ENV_COUNT; ++i) + hwbuf_free(ctx->dev, &ctx->dec_pool[i].cmd); kfree(ctx->dec_pool); } @@ -1517,11 +1500,9 @@ static int dec_env_init(struct hevc_d_ctx *const ctx) de->ctx = ctx; de->decode_order = i; - de->cmd_max = 8096; - de->cmd_fifo = kmalloc_array(de->cmd_max, - sizeof(struct rpi_cmd), - GFP_KERNEL); - if (!de->cmd_fifo) + de->cmd_max = CMD_BUFFER_SIZE_INIT; + if (hwbuf_alloc(ctx->dev, &de->cmd, + de->cmd_max * sizeof(u64), 0)) goto fail; } @@ -1577,7 +1558,7 @@ static inline bool is_ref_unit_type(const unsigned int nal_unit_type) return (nal_unit_type & ~0xe) != 0; } -void hevc_d_h265_setup(struct hevc_d_ctx *ctx, struct hevc_d_run *run) +static int hevc_d_h265_setup(struct hevc_d_ctx *ctx, struct hevc_d_run *run) { struct hevc_d_dev *const dev = ctx->dev; const struct v4l2_ctrl_hevc_decode_params *const dec = @@ -1598,16 +1579,18 @@ void hevc_d_h265_setup(struct hevc_d_ctx *ctx, struct hevc_d_run *run) unsigned int ctb_size_y; bool sps_changed = false; + de = dec_env_new(ctx); + if (!de) { + v4l2_err(&dev->v4l2_dev, "Failed to find free decode env\n"); + return -1; + } + ctx->dec0 = de; + s->sh = NULL; /* Avoid use until in the slice loop */ slice_temporal_mvp = (sh0->flags & V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED); - if (de) { - v4l2_warn(&dev->v4l2_dev, "Decode env set unexpectedly"); - goto fail; - } - /* Frame start */ if (!is_sps_set(run->h265.sps)) { @@ -1632,13 +1615,6 @@ void hevc_d_h265_setup(struct hevc_d_ctx *ctx, struct hevc_d_run *run) goto fail; } - de = dec_env_new(ctx); - if (!de) { - v4l2_err(&dev->v4l2_dev, "Failed to find free decode env\n"); - goto fail; - } - ctx->dec0 = de; - ctb_size_y = 1U << (s->sps.log2_min_luma_coding_block_size_minus3 + 3 + s->sps.log2_diff_max_min_luma_coding_block_size); @@ -1675,14 +1651,6 @@ void hevc_d_h265_setup(struct hevc_d_ctx *ctx, struct hevc_d_run *run) de->frame_aux = NULL; - if (s->sps.bit_depth_luma_minus8 != - s->sps.bit_depth_chroma_minus8) { - v4l2_warn(&dev->v4l2_dev, - "Chroma depth (%d) != Luma depth (%d)\n", - s->sps.bit_depth_chroma_minus8 + 8, - s->sps.bit_depth_luma_minus8 + 8); - goto fail; - } if (s->sps.bit_depth_luma_minus8 == 0) { if (ctx->dst_fmt.pixelformat != V4L2_PIX_FMT_NV12MT_COL128 && ctx->dst_fmt.pixelformat != V4L2_PIX_FMT_NV12_COL128) { @@ -1691,7 +1659,7 @@ void hevc_d_h265_setup(struct hevc_d_ctx *ctx, struct hevc_d_run *run) ctx->dst_fmt.pixelformat); goto fail; } - } else if (s->sps.bit_depth_luma_minus8 == 2) { + } else { if (ctx->dst_fmt.pixelformat != V4L2_PIX_FMT_NV12MT_10_COL128 && ctx->dst_fmt.pixelformat != V4L2_PIX_FMT_NV12_10_COL128) { v4l2_err(&dev->v4l2_dev, @@ -1699,11 +1667,19 @@ void hevc_d_h265_setup(struct hevc_d_ctx *ctx, struct hevc_d_run *run) ctx->dst_fmt.pixelformat); goto fail; } - } else { - v4l2_warn(&dev->v4l2_dev, "Luma depth (%d) unsupported\n", - s->sps.bit_depth_luma_minus8 + 8); + } + + if (s->sps.pic_width_in_luma_samples > ctx->dst_fmt.width || + s->sps.pic_height_in_luma_samples > ctx->dst_fmt.height) { + v4l2_warn(&dev->v4l2_dev, + "SPS size (%dx%d) > capture size (%d,%d)\n", + s->sps.pic_width_in_luma_samples, + s->sps.pic_height_in_luma_samples, + ctx->dst_fmt.width, + ctx->dst_fmt.height); goto fail; } + switch (ctx->dst_fmt.pixelformat) { case V4L2_PIX_FMT_NV12MT_COL128: case V4L2_PIX_FMT_NV12MT_10_COL128: @@ -1796,24 +1772,30 @@ void hevc_d_h265_setup(struct hevc_d_ctx *ctx, struct hevc_d_run *run) /* Pre calc parameters */ s->dec = dec; + s->idx_inuse = 0; for (i = 0; i != run->h265.slice_ents; ++i) { const struct v4l2_ctrl_hevc_slice_params *const sh = sh0 + i; const bool last_slice = i + 1 == run->h265.slice_ents; + const u32 byte_size = DIV_ROUND_UP(sh->bit_size, 8); + unsigned int j; s->sh = sh; - if (run->src->planes[0].bytesused < (sh->bit_size + 7) / 8) { - v4l2_warn(&dev->v4l2_dev, - "Bit size %d > bytesused %d\n", - sh->bit_size, run->src->planes[0].bytesused); - goto fail; - } - if (sh->data_byte_offset >= sh->bit_size / 8) { + if (sh->data_byte_offset + byte_size > run->src->planes[0].bytesused) { v4l2_warn(&dev->v4l2_dev, - "Bit size %u < Byte offset %u * 8\n", - sh->bit_size, sh->data_byte_offset); + "data_byte_offset %d + bits %d (= %d bytes) > bytesused %d\n", + sh->data_byte_offset, sh->bit_size, byte_size, + run->src->planes[0].bytesused); goto fail; } + /* BFNUM (data_len) includes the byte with rbsp_stop_one_bit which is not + * part of slice_segment_data but is all but certain to be in the input + * stream so add that when calulating the value we need, but limit to the + * actual size of the buffer (which may well be what is used to set + * bit_size if the caller isn't being very pedantic). + */ + s->data_len = min(sh->bit_size / 8 + 1, + run->src->planes[0].bytesused - sh->data_byte_offset); s->slice_qp = 26 + s->pps.init_qp_minus26 + sh->slice_qp_delta; s->max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ? @@ -1830,6 +1812,11 @@ void hevc_d_h265_setup(struct hevc_d_ctx *ctx, struct hevc_d_run *run) 0 : sh->num_ref_idx_l1_active_minus1 + 1; + for (j = 0; j != s->nb_refs[0]; ++j) + s->idx_inuse |= 1 << sh->ref_idx_l0[j]; + for (j = 0; j != s->nb_refs[1]; ++j) + s->idx_inuse |= 1 << sh->ref_idx_l1[j]; + if (s->sps.flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) populate_scaling_factors(run, de, s); @@ -1860,7 +1847,6 @@ void hevc_d_h265_setup(struct hevc_d_ctx *ctx, struct hevc_d_run *run) * Locate ref frames * At least in the current implementation this is constant across all * slices. If this changes we will need idx mapping code. - * Uses sh so here rather than trigger */ vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, @@ -1871,16 +1857,18 @@ void hevc_d_h265_setup(struct hevc_d_ctx *ctx, struct hevc_d_run *run) goto fail; } - if (write_cmd_buffer(dev, de, s)) - goto fail; - for (i = 0; i < dec->num_active_dpb_entries; ++i) { struct vb2_buffer *buf = vb2_find_buffer(vq, dec->dpb[i].timestamp); if (!buf) { - v4l2_warn(&dev->v4l2_dev, - "Missing DPB ent %d, timestamp=%lld\n", - i, (long long)dec->dpb[i].timestamp); + if (!(s->idx_inuse & (1 << i))) + hevc_d_dbg(2, &dev->v4l2_dev, + "Missing unused DPB ent %d, timestamp=%lld\n", + i, (long long)dec->dpb[i].timestamp); + else + v4l2_warn(&dev->v4l2_dev, + "Missing inuse DPB ent %d, timestamp=%lld\n", + i, (long long)dec->dpb[i].timestamp); continue; } @@ -1955,12 +1943,12 @@ void hevc_d_h265_setup(struct hevc_d_ctx *ctx, struct hevc_d_run *run) } de->state = HEVC_D_DECODE_PHASE1; - return; + return 0; fail: - if (de) - // Actual error reporting happens in Trigger - de->state = HEVC_D_DECODE_ERROR_DONE; + /* Actual error reporting happens in Trigger */ + de->state = HEVC_D_DECODE_ERROR_DONE; + return 0; } /* Handle PU and COEFF stream overflow @@ -1996,20 +1984,25 @@ static int check_status(const struct hevc_d_dev *const dev) return -1; } -static void phase2_cb(struct hevc_d_dev *const dev, void *v) +static void phase2_done(struct hevc_d_dev *const dev, + struct hevc_d_dec_env *const de, + enum vb2_buffer_state state) { - struct hevc_d_dec_env *const de = v; - - /* Done with buffers - allow new P1 */ - hevc_d_hw_irq_active1_enable_claim(dev, 1); - - v4l2_m2m_buf_done(de->frame_buf, VB2_BUF_STATE_DONE); + v4l2_m2m_buf_done(de->frame_buf, state); de->frame_buf = NULL; - media_request_manual_complete(de->req_pin); - de->req_pin = NULL; + media_request_manual_complete(de->request); + de->request = NULL; dec_env_delete(de); + + /* Finally allow new P1. Avoids possibility of race with de alloc */ + hevc_d_hw_irq_active1_enable_claim(dev, 1); +} + +static void phase2_cb(struct hevc_d_dev *const dev, void *v) +{ + phase2_done(dev, v, VB2_BUF_STATE_DONE); } static void phase2_claimed(struct hevc_d_dev *const dev, void *v) @@ -2028,11 +2021,17 @@ static void phase2_claimed(struct hevc_d_dev *const dev, void *v) apb_write_vc_len(dev, RPI_OUTCSTRIDE, de->chroma_stride); for (i = 0; i < 16; i++) { - // Strides are in fact unused but fill in anyway - apb_write_vc_addr(dev, 0x9000 + 16 * i, de->ref_addrs[i][0]); - apb_write_vc_len(dev, 0x9004 + 16 * i, de->luma_stride); - apb_write_vc_addr(dev, 0x9008 + 16 * i, de->ref_addrs[i][1]); - apb_write_vc_len(dev, 0x900C + 16 * i, de->chroma_stride); + /* Strides are in fact unused but fill in anyway */ + unsigned int roff = i * RPI_REFREGS_SIZE; + + apb_write_vc_addr(dev, RPI_REFYBASE0 + roff, + de->ref_addrs[i][0]); + apb_write_vc_len(dev, RPI_REFYSTRIDE0 + roff, + de->luma_stride); + apb_write_vc_addr(dev, RPI_REFCBASE0 + roff, + de->ref_addrs[i][1]); + apb_write_vc_len(dev, RPI_REFCSTRIDE0 + roff, + de->chroma_stride); } apb_write(dev, RPI_CONFIG2, de->rpi_config2); @@ -2054,35 +2053,33 @@ static void phase2_claimed(struct hevc_d_dev *const dev, void *v) apb_write_final(dev, RPI_NUMROWS, de->pic_height_in_ctbs_y); } +static void phase2_err_claimed(struct hevc_d_dev *const dev, void *v) +{ + phase2_done(dev, v, VB2_BUF_STATE_ERROR); +} + static void phase1_claimed(struct hevc_d_dev *const dev, void *v); -/* release any and all objects associated with de and reenable phase 1 if - * required - */// 1 if required -static void phase1_err_fin(struct hevc_d_dev *const dev, - struct hevc_d_ctx *const ctx, - struct hevc_d_dec_env *const de) +static void phase1_done(struct hevc_d_dev *const dev, + struct hevc_d_dec_env *const de, + enum vb2_buffer_state state) { - /* Return all detached buffers */ - if (de->src_buf) - v4l2_m2m_buf_done(de->src_buf, VB2_BUF_STATE_ERROR); - de->src_buf = NULL; - if (de->frame_buf) - v4l2_m2m_buf_done(de->frame_buf, VB2_BUF_STATE_ERROR); - de->frame_buf = NULL; + struct hevc_d_ctx *const ctx = de->ctx; + hevc_d_irq_callback p2_cb; - if (de->req_pin) - media_request_manual_complete(de->req_pin); - de->req_pin = NULL; + p2_cb = (state == VB2_BUF_STATE_DONE) ? phase2_claimed : + phase2_err_claimed; + v4l2_m2m_buf_done(de->src_buf, state); + de->src_buf = NULL; - dec_env_delete(de); + /* All phase1 error paths done - it is safe to inc p2idx */ + ctx->p2idx = (ctx->p2idx + 1 >= HEVC_D_P2BUF_COUNT) ? 0 : ctx->p2idx + 1; - /* Reenable phase 0 if we were blocking */ + /* Renable the next setup if we were blocking */ if (atomic_add_return(-1, &ctx->p1out) >= HEVC_D_P1BUF_COUNT - 1) v4l2_m2m_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx); - /* Done with P1-P2 buffers - allow new P1 */ - hevc_d_hw_irq_active1_enable_claim(dev, 1); + hevc_d_hw_irq_active2_claim(dev, &de->irq_ent, p2_cb, de); } static void phase1_thread(struct hevc_d_dev *const dev, void *v) @@ -2090,56 +2087,55 @@ static void phase1_thread(struct hevc_d_dev *const dev, void *v) struct hevc_d_dec_env *const de = v; struct hevc_d_ctx *const ctx = de->ctx; - struct hevc_d_gptr *const pu_gptr = ctx->pu_bufs + ctx->p2idx; - struct hevc_d_gptr *const coeff_gptr = ctx->coeff_bufs + ctx->p2idx; + struct hevc_d_hwbuf *const pu_hwbuf = ctx->pu_bufs + ctx->p2idx; + struct hevc_d_hwbuf *const coeff_hwbuf = ctx->coeff_bufs + ctx->p2idx; if (de->p1_status & STATUS_PU_EXHAUSTED) { - if (gptr_realloc_new(dev, pu_gptr, next_size(pu_gptr->size))) { + if (hwbuf_realloc_new(dev, pu_hwbuf, next_size(pu_hwbuf->size))) { v4l2_err(&dev->v4l2_dev, "%s: PU realloc (%zx) failed\n", - __func__, pu_gptr->size); + __func__, pu_hwbuf->size); goto fail; } - v4l2_info(&dev->v4l2_dev, "%s: PU realloc (%zx) OK\n", - __func__, pu_gptr->size); + hevc_d_dbg(1, &dev->v4l2_dev, "%s: PU realloc (%zx) OK\n", + __func__, pu_hwbuf->size); } if (de->p1_status & STATUS_COEFF_EXHAUSTED) { - if (gptr_realloc_new(dev, coeff_gptr, - next_size(coeff_gptr->size))) { + if (hwbuf_realloc_new(dev, coeff_hwbuf, + next_size(coeff_hwbuf->size))) { v4l2_err(&dev->v4l2_dev, "%s: Coeff realloc (%zx) failed\n", - __func__, coeff_gptr->size); + __func__, coeff_hwbuf->size); goto fail; } - v4l2_info(&dev->v4l2_dev, "%s: Coeff realloc (%zx) OK\n", - __func__, coeff_gptr->size); + hevc_d_dbg(1, &dev->v4l2_dev, "%s: Coeff realloc (%zx) OK\n", + __func__, coeff_hwbuf->size); } phase1_claimed(dev, de); return; fail: - if (!pu_gptr->addr || !coeff_gptr->addr) { + if (!pu_hwbuf->addr || !coeff_hwbuf->addr) { v4l2_err(&dev->v4l2_dev, "%s: Fatal: failed to reclaim old alloc\n", __func__); ctx->fatal_err = 1; } - phase1_err_fin(dev, ctx, de); + phase1_done(dev, de, VB2_BUF_STATE_ERROR); } /* Always called in irq context (this is good) */ static void phase1_cb(struct hevc_d_dev *const dev, void *v) { struct hevc_d_dec_env *const de = v; - struct hevc_d_ctx *const ctx = de->ctx; de->p1_status = check_status(dev); if (de->p1_status != 0) { - v4l2_info(&dev->v4l2_dev, "%s: Post wait: %#x\n", - __func__, de->p1_status); + hevc_d_dbg(2, &dev->v4l2_dev, "%s: Post wait: %#x\n", + __func__, de->p1_status); if (de->p1_status < 0) goto fail; @@ -2150,23 +2146,11 @@ static void phase1_cb(struct hevc_d_dev *const dev, void *v) return; } - v4l2_m2m_buf_done(de->src_buf, VB2_BUF_STATE_DONE); - de->src_buf = NULL; - - /* All phase1 error paths done - it is safe to inc p2idx */ - ctx->p2idx = - (ctx->p2idx + 1 >= HEVC_D_P2BUF_COUNT) ? 0 : ctx->p2idx + 1; - - /* Renable the next setup if we were blocking */ - if (atomic_add_return(-1, &ctx->p1out) >= HEVC_D_P1BUF_COUNT - 1) - v4l2_m2m_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx); - - hevc_d_hw_irq_active2_claim(dev, &de->irq_ent, phase2_claimed, de); - + phase1_done(dev, de, VB2_BUF_STATE_DONE); return; fail: - phase1_err_fin(dev, ctx, de); + phase1_done(dev, de, VB2_BUF_STATE_ERROR); } static void phase1_claimed(struct hevc_d_dev *const dev, void *v) @@ -2174,20 +2158,20 @@ static void phase1_claimed(struct hevc_d_dev *const dev, void *v) struct hevc_d_dec_env *const de = v; struct hevc_d_ctx *const ctx = de->ctx; - const struct hevc_d_gptr * const pu_gptr = ctx->pu_bufs + ctx->p2idx; - const struct hevc_d_gptr * const coeff_gptr = ctx->coeff_bufs + - ctx->p2idx; + const struct hevc_d_hwbuf * const pu_hwbuf = ctx->pu_bufs + ctx->p2idx; + const struct hevc_d_hwbuf * const coeff_hwbuf = ctx->coeff_bufs + + ctx->p2idx; if (ctx->fatal_err) goto fail; - de->pu_base_vc = pu_gptr->addr; + de->pu_base_vc = pu_hwbuf->addr; de->pu_stride = - ALIGN_DOWN(pu_gptr->size / de->pic_height_in_ctbs_y, 64); + ALIGN_DOWN(pu_hwbuf->size / de->pic_height_in_ctbs_y, 64); - de->coeff_base_vc = coeff_gptr->addr; + de->coeff_base_vc = coeff_hwbuf->addr; de->coeff_stride = - ALIGN_DOWN(coeff_gptr->size / de->pic_height_in_ctbs_y, 64); + ALIGN_DOWN(coeff_hwbuf->size / de->pic_height_in_ctbs_y, 64); /* phase1_claimed blocked until cb_phase1 completed so p2idx inc * in cb_phase1 after error detection @@ -2205,12 +2189,16 @@ static void phase1_claimed(struct hevc_d_dev *const dev, void *v) hevc_d_hw_irq_active1_irq(dev, &de->irq_ent, phase1_cb, de); /* Start the h/w */ - apb_write_vc_addr_final(dev, RPI_CFBASE, de->cmd_addr); - + apb_write_vc_addr_final(dev, RPI_CFBASE, de->cmd.addr); return; fail: - phase1_err_fin(dev, ctx, de); + phase1_done(dev, de, VB2_BUF_STATE_ERROR); +} + +static void phase1_err_claimed(struct hevc_d_dev *const dev, void *v) +{ + phase1_done(dev, v, VB2_BUF_STATE_ERROR); } static void dec_state_delete(struct hevc_d_ctx *const ctx) @@ -2286,9 +2274,9 @@ static void h265_ctx_uninit(struct hevc_d_dev *const dev, struct hevc_d_ctx *ctx aux_q_uninit(ctx); for (i = 0; i != ARRAY_SIZE(ctx->pu_bufs); ++i) - gptr_free(dev, ctx->pu_bufs + i); + hwbuf_free(dev, ctx->pu_bufs + i); for (i = 0; i != ARRAY_SIZE(ctx->coeff_bufs); ++i) - gptr_free(dev, ctx->coeff_bufs + i); + hwbuf_free(dev, ctx->coeff_bufs + i); } void hevc_d_h265_stop(struct hevc_d_ctx *ctx) @@ -2304,22 +2292,9 @@ int hevc_d_h265_start(struct hevc_d_ctx *ctx) struct hevc_d_dev *const dev = ctx->dev; unsigned int i; - unsigned int w = ctx->dst_fmt.width; - unsigned int h = ctx->dst_fmt.height; - unsigned int wxh; - size_t pu_alloc; - size_t coeff_alloc; - - /* Generate a sanitised WxH for memory alloc. Assume HD if unset */ - if (w == 0) - w = 1920; - if (w > 4096) - w = 4096; - if (h == 0) - h = 1088; - if (h > 4096) - h = 4096; - wxh = w * h; + const unsigned int wxh = ctx->dst_fmt.width * ctx->dst_fmt.height; + size_t pu_size; + size_t coeff_size; ctx->fatal_err = 0; ctx->dec0 = NULL; @@ -2334,22 +2309,22 @@ int hevc_d_h265_start(struct hevc_d_ctx *ctx) goto fail; } - coeff_alloc = hevc_d_round_up_size(wxh); - pu_alloc = hevc_d_round_up_size(wxh / 4); + coeff_size = hevc_d_round_up_size(wxh); + pu_size = hevc_d_round_up_size(wxh / 4); for (i = 0; i != ARRAY_SIZE(ctx->pu_bufs); ++i) { /* Don't actually need a kernel mapping here */ - if (gptr_alloc(dev, ctx->pu_bufs + i, pu_alloc, - DMA_ATTR_NO_KERNEL_MAPPING)) { + if (hwbuf_alloc(dev, ctx->pu_bufs + i, pu_size, + DMA_ATTR_NO_KERNEL_MAPPING)) { v4l2_err(&dev->v4l2_dev, "Failed to alloc %#zx PU%d buffer\n", - pu_alloc, i); + pu_size, i); goto fail; } - if (gptr_alloc(dev, ctx->coeff_bufs + i, coeff_alloc, - DMA_ATTR_NO_KERNEL_MAPPING)) { + if (hwbuf_alloc(dev, ctx->coeff_bufs + i, coeff_size, + DMA_ATTR_NO_KERNEL_MAPPING)) { v4l2_err(&dev->v4l2_dev, "Failed to alloc %#zx Coeff%d buffer\n", - pu_alloc, i); + pu_size, i); goto fail; } } @@ -2368,47 +2343,25 @@ void hevc_d_h265_trigger(struct hevc_d_ctx *ctx) struct hevc_d_dec_env *const de = ctx->dec0; struct vb2_v4l2_buffer *src_buf; struct media_request *req; + hevc_d_irq_callback p1_cb; + + p1_cb = (de->state == HEVC_D_DECODE_PHASE1) ? phase1_claimed : + phase1_err_claimed; src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); req = src_buf->vb2_buf.req_obj.req; + ctx->dec0 = NULL; - switch (!de ? HEVC_D_DECODE_ERROR_DONE : de->state) { - default: - v4l2_err(&dev->v4l2_dev, "%s: Unexpected state: %d\n", __func__, - de->state); - fallthrough; - case HEVC_D_DECODE_ERROR_DONE: - ctx->dec0 = NULL; - dec_env_delete(de); - v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx, - VB2_BUF_STATE_ERROR); - media_request_manual_complete(req); - break; - - case HEVC_D_DECODE_PHASE1: - ctx->dec0 = NULL; - - ctx->p1idx = (ctx->p1idx + 1 >= HEVC_D_P1BUF_COUNT) ? - 0 : ctx->p1idx + 1; - - /* We know we have src & dst so no need to test */ - de->src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); - de->frame_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); - de->req_pin = req; - - /* We could get rid of the src buffer here if we've already - * copied it, but we don't copy the last buffer unless it - * didn't return a contig dma addr, and that shouldn't happen - */ + /* We know we have src & dst so no need to test */ + de->src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); + de->frame_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); + de->request = req; - /* Enable the next setup if our Q isn't too big */ - if (atomic_add_return(1, &ctx->p1out) < HEVC_D_P1BUF_COUNT) - v4l2_m2m_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx); + /* Enable the next setup if our Q isn't too big */ + if (atomic_add_return(1, &ctx->p1out) < HEVC_D_P1BUF_COUNT) + v4l2_m2m_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx); - hevc_d_hw_irq_active1_claim(dev, &de->irq_ent, phase1_claimed, - de); - break; - } + hevc_d_hw_irq_active1_claim(dev, &de->irq_ent, p1_cb, de); } static int try_ctrl_sps(struct v4l2_ctrl *ctrl) @@ -2440,10 +2393,10 @@ static int try_ctrl_sps(struct v4l2_ctrl *ctrl) return -EINVAL; } - if (!sps->pic_width_in_luma_samples || - !sps->pic_height_in_luma_samples || - sps->pic_width_in_luma_samples > 4096 || - sps->pic_height_in_luma_samples > 4096) { + if (sps->pic_width_in_luma_samples < HEVC_D_MIN_WIDTH || + sps->pic_height_in_luma_samples < HEVC_D_MIN_HEIGHT || + sps->pic_width_in_luma_samples > HEVC_D_MAX_WIDTH || + sps->pic_height_in_luma_samples > HEVC_D_MAX_HEIGHT) { v4l2_warn(&dev->v4l2_dev, "Bad sps width (%u) x height (%u)\n", sps->pic_width_in_luma_samples, @@ -2451,32 +2404,6 @@ static int try_ctrl_sps(struct v4l2_ctrl *ctrl) return -EINVAL; } - if (!ctx->dst_fmt_set) - return 0; - - if ((sps->bit_depth_luma_minus8 == 0 && - ctx->dst_fmt.pixelformat != V4L2_PIX_FMT_NV12MT_COL128 && - ctx->dst_fmt.pixelformat != V4L2_PIX_FMT_NV12_COL128) || - (sps->bit_depth_luma_minus8 == 2 && - ctx->dst_fmt.pixelformat != V4L2_PIX_FMT_NV12MT_10_COL128 && - ctx->dst_fmt.pixelformat != V4L2_PIX_FMT_NV12_10_COL128)) { - v4l2_warn(&dev->v4l2_dev, - "SPS luma depth %d does not match capture format\n", - sps->bit_depth_luma_minus8 + 8); - return -EINVAL; - } - - if (sps->pic_width_in_luma_samples > ctx->dst_fmt.width || - sps->pic_height_in_luma_samples > ctx->dst_fmt.height) { - v4l2_warn(&dev->v4l2_dev, - "SPS size (%dx%d) > capture size (%d,%d)\n", - sps->pic_width_in_luma_samples, - sps->pic_height_in_luma_samples, - ctx->dst_fmt.width, - ctx->dst_fmt.height); - return -EINVAL; - } - return 0; } @@ -2513,63 +2440,44 @@ void hevc_d_device_run(void *priv) struct hevc_d_dev *const dev = ctx->dev; struct hevc_d_run run = {}; struct media_request *src_req; + const struct v4l2_ctrl *ctrl; run.src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); run.dst = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); - if (!run.src || !run.dst) { - v4l2_err(&dev->v4l2_dev, "%s: Missing buffer: src=%p, dst=%p\n", - __func__, run.src, run.dst); - goto fail; - } - /* Apply request(s) controls */ src_req = run.src->vb2_buf.req_obj.req; - if (!src_req) { - v4l2_err(&dev->v4l2_dev, "%s: Missing request\n", __func__); + if (v4l2_ctrl_request_setup(src_req, &ctx->hdl)) goto fail; - } - v4l2_ctrl_request_setup(src_req, &ctx->hdl); - - switch (ctx->src_fmt.pixelformat) { - case V4L2_PIX_FMT_HEVC_SLICE: - { - const struct v4l2_ctrl *ctrl; - - run.h265.sps = - hevc_d_find_control_data(ctx, - V4L2_CID_STATELESS_HEVC_SPS); - run.h265.pps = - hevc_d_find_control_data(ctx, - V4L2_CID_STATELESS_HEVC_PPS); - run.h265.dec = - hevc_d_find_control_data(ctx, - V4L2_CID_STATELESS_HEVC_DECODE_PARAMS); - - ctrl = hevc_d_find_ctrl(ctx, - V4L2_CID_STATELESS_HEVC_SLICE_PARAMS); - if (!ctrl || !ctrl->elems) { - v4l2_err(&dev->v4l2_dev, "%s: Missing slice params\n", - __func__); - goto fail; - } - run.h265.slice_ents = ctrl->elems; - run.h265.slice_params = ctrl->p_cur.p; - - run.h265.scaling_matrix = - hevc_d_find_control_data(ctx, - V4L2_CID_STATELESS_HEVC_SCALING_MATRIX); - break; + run.h265.sps = + hevc_d_find_control_data(ctx, + V4L2_CID_STATELESS_HEVC_SPS); + run.h265.pps = + hevc_d_find_control_data(ctx, + V4L2_CID_STATELESS_HEVC_PPS); + run.h265.dec = + hevc_d_find_control_data(ctx, + V4L2_CID_STATELESS_HEVC_DECODE_PARAMS); + + ctrl = v4l2_ctrl_find(ctx->fh.ctrl_handler, + V4L2_CID_STATELESS_HEVC_SLICE_PARAMS); + if (!ctrl || !ctrl->elems) { + v4l2_err(&dev->v4l2_dev, "%s: Missing slice params\n", + __func__); + goto fail; } + run.h265.slice_ents = ctrl->elems; + run.h265.slice_params = ctrl->p_cur.p; - default: - break; - } + run.h265.scaling_matrix = + hevc_d_find_control_data(ctx, + V4L2_CID_STATELESS_HEVC_SCALING_MATRIX); v4l2_m2m_buf_copy_metadata(run.src, run.dst, true); - hevc_d_h265_setup(ctx, &run); + if (hevc_d_h265_setup(ctx, &run) == -1) + goto fail; /* Complete request(s) controls */ v4l2_ctrl_request_complete(src_req, &ctx->hdl); diff --git a/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_h265.h b/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_h265.h index 775fe7de5dd6ef..48f08d37362b19 100644 --- a/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_h265.h +++ b/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_h265.h @@ -2,7 +2,7 @@ /* * Raspberry Pi HEVC driver * - * Copyright (C) 2024 Raspberry Pi Ltd + * Copyright (C) 2025 Raspberry Pi Ltd * */ @@ -13,7 +13,6 @@ extern const struct v4l2_ctrl_ops hevc_d_hevc_sps_ctrl_ops; extern const struct v4l2_ctrl_ops hevc_d_hevc_pps_ctrl_ops; -void hevc_d_h265_setup(struct hevc_d_ctx *ctx, struct hevc_d_run *run); int hevc_d_h265_start(struct hevc_d_ctx *ctx); void hevc_d_h265_stop(struct hevc_d_ctx *ctx); void hevc_d_h265_trigger(struct hevc_d_ctx *ctx); diff --git a/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_hw.c b/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_hw.c index 5030a69093018f..1b9ffcd4df3d81 100644 --- a/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_hw.c +++ b/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_hw.c @@ -2,7 +2,7 @@ /* * Raspberry Pi HEVC driver * - * Copyright (C) 2024 Raspberry Pi Ltd + * Copyright (C) 2025 Raspberry Pi Ltd * * Based on the Cedrus VPU driver, that is: * @@ -307,28 +307,47 @@ void hevc_d_hw_irq_active2_irq(struct hevc_d_dev *dev, pre_irq(dev, ient, irq_cb, ctx, &dev->ic_active2); } -int hevc_d_hw_probe(struct hevc_d_dev *dev) +/* + * Stop the clock for this context + * clk_disable_unprepare does ref counting so this will not actually + * disable the clock if there are other running contexts + */ +void hevc_d_hw_stop_clock(struct hevc_d_dev *dev) { - struct rpi_firmware *firmware; - struct device_node *node; - __u32 irq_stat; - int irq_dec; - int ret = 0; + clk_disable_unprepare(dev->clock); +} - ictl_init(&dev->ic_active1, HEVC_D_P2BUF_COUNT); - ictl_init(&dev->ic_active2, HEVC_D_ICTL_ENABLE_UNLIMITED); +/* Always starts the clock if it isn't already on this ctx */ +int hevc_d_hw_start_clock(struct hevc_d_dev *dev) +{ + int rv; - dev->base_irq = devm_platform_ioremap_resource_byname(dev->pdev, "intc"); - if (IS_ERR(dev->base_irq)) - return PTR_ERR(dev->base_irq); + rv = clk_set_min_rate(dev->clock, dev->max_clock_rate); + if (rv) { + dev_err(dev->dev, "Failed to set clock rate\n"); + return rv; + } - dev->base_h265 = devm_platform_ioremap_resource_byname(dev->pdev, "hevc"); - if (IS_ERR(dev->base_h265)) - return PTR_ERR(dev->base_h265); + rv = clk_prepare_enable(dev->clock); + if (rv) { + dev_err(dev->dev, "Failed to enable clock\n"); + return rv; + } + return 0; +} - dev->clock = devm_clk_get(&dev->pdev->dev, NULL); - if (IS_ERR(dev->clock)) - return PTR_ERR(dev->clock); +static int hw_setup(struct hevc_d_dev *dev) +{ + struct device_node *node; + u32 ver; + u32 irq_stat; + struct rpi_firmware *firmware; + + ver = apb_read(dev, RPI_VERSION); + if (ver != 0x202) { + dev_err(dev->dev, "Unexpected version %#x only 0x202 supported\n", ver); + return -ENODEV; + } node = rpi_firmware_find_node(); if (!node) @@ -343,13 +362,47 @@ int hevc_d_hw_probe(struct hevc_d_dev *dev) RPI_FIRMWARE_HEVC_CLK_ID); rpi_firmware_put(firmware); - dev->cache_align = dma_get_cache_alignment(); - - /* Disable IRQs & reset anything pending */ - irq_write(dev, 0, + /* + * Enable IRQs & reset anything pending + * Whilst this seems the wrong way round the h/w doesn't actually + * set the IRQ status bits till the IRQs are enabled. As we haven't + * got the IRQ yet this should still be safe. + */ + irq_write(dev, ARG_IC_ICTRL, ARG_IC_ICTRL_ACTIVE1_EN_SET | ARG_IC_ICTRL_ACTIVE2_EN_SET); - irq_stat = irq_read(dev, 0); - irq_write(dev, 0, irq_stat); + irq_stat = irq_read(dev, ARG_IC_ICTRL); + irq_write(dev, ARG_IC_ICTRL, irq_stat); + + return 0; +} + +int hevc_d_hw_probe(struct hevc_d_dev *dev) +{ + int irq_dec; + int ret; + + ictl_init(&dev->ic_active1, HEVC_D_P2BUF_COUNT); + ictl_init(&dev->ic_active2, HEVC_D_ICTL_ENABLE_UNLIMITED); + + dev->base_irq = devm_platform_ioremap_resource_byname(dev->pdev, "intc"); + if (IS_ERR(dev->base_irq)) + return PTR_ERR(dev->base_irq); + + dev->base_h265 = devm_platform_ioremap_resource_byname(dev->pdev, "hevc"); + if (IS_ERR(dev->base_h265)) + return PTR_ERR(dev->base_h265); + + dev->clock = devm_clk_get(&dev->pdev->dev, NULL); + if (IS_ERR(dev->clock)) + return PTR_ERR(dev->clock); + + ret = clk_prepare_enable(dev->clock); + if (ret) + return ret; + ret = hw_setup(dev); + clk_disable_unprepare(dev->clock); + if (ret) + return ret; irq_dec = platform_get_irq(dev->pdev, 0); if (irq_dec <= 0) diff --git a/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_hw.h b/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_hw.h index 8d91931aadf207..765b27ab161bad 100644 --- a/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_hw.h +++ b/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_hw.h @@ -2,7 +2,7 @@ /* * Raspberry Pi HEVC driver * - * Copyright (C) 2024 Raspberry Pi Ltd + * Copyright (C) 2025 Raspberry Pi Ltd * * Based on the Cedrus VPU driver, that is: * @@ -73,6 +73,17 @@ struct hevc_d_hw_irq_ent { #define RPI_COLSTRIDE 0x803C #define RPI_CURRPOC 0x8040 +/* + * Reference frame register values + * There are 16 of these arranged sequentially + */ +#define RPI_REFYBASE0 0x9000 +#define RPI_REFYSTRIDE0 0x9004 +#define RPI_REFCBASE0 0x9008 +#define RPI_REFCSTRIDE0 0x900c +/* Offset to get from REFYBASEn to REFYBASEn+1 */ +#define RPI_REFREGS_SIZE 16 + /* * Write a general register value * Order is unimportant @@ -297,6 +308,9 @@ void hevc_d_hw_irq_active2_irq(struct hevc_d_dev *dev, struct hevc_d_hw_irq_ent *ient, hevc_d_irq_callback irq_cb, void *ctx); +int hevc_d_hw_start_clock(struct hevc_d_dev *dev); +void hevc_d_hw_stop_clock(struct hevc_d_dev *dev); + int hevc_d_hw_probe(struct hevc_d_dev *dev); void hevc_d_hw_remove(struct hevc_d_dev *dev); diff --git a/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_video.c b/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_video.c index 274af191d9e65e..cff997f74d6413 100644 --- a/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_video.c +++ b/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_video.c @@ -2,7 +2,7 @@ /* * Raspberry Pi HEVC driver * - * Copyright (C) 2024 Raspberry Pi Ltd + * Copyright (C) 2025 Raspberry Pi Ltd * * Based on the Cedrus VPU driver, that is: * @@ -22,16 +22,6 @@ #include "hevc_d_hw.h" #include "hevc_d_video.h" -#define HEVC_D_DECODE_SRC BIT(0) -#define HEVC_D_DECODE_DST BIT(1) - -#define HEVC_D_MIN_WIDTH 16U -#define HEVC_D_MIN_HEIGHT 16U -#define HEVC_D_DEFAULT_WIDTH 1920U -#define HEVC_D_DEFAULT_HEIGHT 1088U -#define HEVC_D_MAX_WIDTH 4096U -#define HEVC_D_MAX_HEIGHT 4096U - static inline struct hevc_d_ctx *hevc_d_file2ctx(struct file *file) { return container_of(file->private_data, struct hevc_d_ctx, fh); @@ -117,12 +107,12 @@ static void hevc_d_prepare_dst_format(struct v4l2_pix_format_mplane *pix_fmt) if (!width) width = HEVC_D_DEFAULT_WIDTH; - if (width > HEVC_D_MAX_WIDTH) - width = HEVC_D_MAX_WIDTH; + else + width = clamp(width, HEVC_D_MIN_WIDTH, HEVC_D_MAX_WIDTH); if (!height) height = HEVC_D_DEFAULT_HEIGHT; - if (height > HEVC_D_MAX_HEIGHT) - height = HEVC_D_MAX_HEIGHT; + else + height = clamp(height, HEVC_D_MIN_HEIGHT, HEVC_D_MAX_HEIGHT); /* For column formats set bytesperline to column height (stride2) */ switch (pix_fmt->pixelformat) { @@ -284,8 +274,8 @@ static u32 pixelformat_from_sps(const struct v4l2_ctrl_hevc_sps * const sps, const int index) { static const u32 all_formats[] = { - //V4L2_PIX_FMT_NV12MT_COL128, - //V4L2_PIX_FMT_NV12MT_10_COL128, + V4L2_PIX_FMT_NV12MT_COL128, + V4L2_PIX_FMT_NV12MT_10_COL128, V4L2_PIX_FMT_NV12_COL128, V4L2_PIX_FMT_NV12_10_COL128, }; @@ -298,12 +288,11 @@ static u32 pixelformat_from_sps(const struct v4l2_ctrl_hevc_sps * const sps, pf = all_formats[index]; } else { if (index == 0) { -/* if (sps->bit_depth_luma_minus8 == 0) + if (sps->bit_depth_luma_minus8 == 0) pf = V4L2_PIX_FMT_NV12MT_COL128; else if (sps->bit_depth_luma_minus8 == 2) pf = V4L2_PIX_FMT_NV12MT_10_COL128; } else if (index == 1) { - */ if (sps->bit_depth_luma_minus8 == 0) pf = V4L2_PIX_FMT_NV12_COL128; else if (sps->bit_depth_luma_minus8 == 2) @@ -597,70 +586,28 @@ static int hevc_d_buf_prepare(struct vb2_buffer *vb) return 0; } -/* Only stops the clock if streaom off on both output & capture */ -static void stop_clock(struct hevc_d_dev *dev, struct hevc_d_ctx *ctx) -{ - if (ctx->src_stream_on || - ctx->dst_stream_on) - return; - - clk_set_min_rate(dev->clock, 0); - clk_disable_unprepare(dev->clock); -} - -/* Always starts the clock if it isn't already on this ctx */ -static int start_clock(struct hevc_d_dev *dev, struct hevc_d_ctx *ctx) -{ - int rv; - - rv = clk_set_min_rate(dev->clock, dev->max_clock_rate); - if (rv) { - dev_err(dev->dev, "Failed to set clock rate\n"); - return rv; - } - - rv = clk_prepare_enable(dev->clock); - if (rv) { - dev_err(dev->dev, "Failed to enable clock\n"); - return rv; - } - - return 0; -} - static int hevc_d_start_streaming(struct vb2_queue *vq, unsigned int count) { struct hevc_d_ctx *ctx = vb2_get_drv_priv(vq); struct hevc_d_dev *dev = ctx->dev; int ret = 0; - if (!V4L2_TYPE_IS_OUTPUT(vq->type)) { - ctx->dst_stream_on = 1; - goto ok; - } - - if (ctx->src_fmt.pixelformat != V4L2_PIX_FMT_HEVC_SLICE) { - ret = -EINVAL; - goto fail_cleanup; - } + v4l2_m2m_update_start_streaming_state(ctx->fh.m2m_ctx, vq); - if (ctx->src_stream_on) - goto ok; - - ret = start_clock(dev, ctx); - if (ret) - goto fail_cleanup; + if (V4L2_TYPE_IS_OUTPUT(vq->type)) { + ret = hevc_d_hw_start_clock(dev); + if (ret) + goto fail_cleanup; - ret = hevc_d_h265_start(ctx); - if (ret) - goto fail_stop_clock; + ret = hevc_d_h265_start(ctx); + if (ret) + goto fail_stop_clock; + } - ctx->src_stream_on = 1; -ok: return 0; fail_stop_clock: - stop_clock(dev, ctx); + hevc_d_hw_stop_clock(dev); fail_cleanup: v4l2_err(&dev->v4l2_dev, "%s: qtype=%d: FAIL\n", __func__, vq->type); hevc_d_queue_cleanup(vq, VB2_BUF_STATE_QUEUED); @@ -673,17 +620,15 @@ static void hevc_d_stop_streaming(struct vb2_queue *vq) struct hevc_d_dev *dev = ctx->dev; if (V4L2_TYPE_IS_OUTPUT(vq->type)) { - ctx->src_stream_on = 0; hevc_d_h265_stop(ctx); - } else { - ctx->dst_stream_on = 0; + hevc_d_hw_stop_clock(dev); } hevc_d_queue_cleanup(vq, VB2_BUF_STATE_ERROR); vb2_wait_for_all_buffers(vq); - stop_clock(dev, ctx); + v4l2_m2m_update_stop_streaming_state(ctx->fh.m2m_ctx, vq); } static void hevc_d_buf_queue(struct vb2_buffer *vb) @@ -721,6 +666,7 @@ int hevc_d_queue_init(void *priv, struct vb2_queue *src_vq, src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; src_vq->io_modes = VB2_MMAP | VB2_DMABUF; + src_vq->dma_attrs = DMA_ATTR_NO_KERNEL_MAPPING; src_vq->drv_priv = ctx; src_vq->buf_struct_size = sizeof(struct hevc_d_buffer); src_vq->ops = &hevc_d_qops; @@ -737,6 +683,7 @@ int hevc_d_queue_init(void *priv, struct vb2_queue *src_vq, dst_vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; dst_vq->io_modes = VB2_MMAP | VB2_DMABUF; + dst_vq->dma_attrs = DMA_ATTR_NO_KERNEL_MAPPING; dst_vq->drv_priv = ctx; dst_vq->buf_struct_size = sizeof(struct hevc_d_buffer); dst_vq->min_queued_buffers = 1; diff --git a/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_video.h b/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_video.h index 3ea193423194da..fb894bc9c813c2 100644 --- a/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_video.h +++ b/drivers/media/platform/raspberrypi/hevc_dec/hevc_d_video.h @@ -2,7 +2,7 @@ /* * Raspberry Pi HEVC driver * - * Copyright (C) 2024 Raspberry Pi Ltd + * Copyright (C) 2025 Raspberry Pi Ltd * * Based on the Cedrus VPU driver, that is: *