diff --git a/drivers/rknpu/include/rknpu_drv.h b/drivers/rknpu/include/rknpu_drv.h index 360b8f3cbfc2..98fba97c4af2 100644 --- a/drivers/rknpu/include/rknpu_drv.h +++ b/drivers/rknpu/include/rknpu_drv.h @@ -28,10 +28,10 @@ #define DRIVER_NAME "rknpu" #define DRIVER_DESC "RKNPU driver" -#define DRIVER_DATE "20231018" +#define DRIVER_DATE "20231121" #define DRIVER_MAJOR 0 #define DRIVER_MINOR 9 -#define DRIVER_PATCHLEVEL 2 +#define DRIVER_PATCHLEVEL 3 #define LOG_TAG "RKNPU" @@ -74,11 +74,12 @@ struct rknpu_config { __u64 nbuf_phyaddr; __u64 nbuf_size; __u64 max_submit_number; + __u32 core_mask; }; struct rknpu_timer { - __u32 busy_time; - __u32 busy_time_record; + ktime_t busy_time; + ktime_t total_busy_time; }; struct rknpu_subcore_data { diff --git a/drivers/rknpu/include/rknpu_ioctl.h b/drivers/rknpu/include/rknpu_ioctl.h index 3b0b857108ce..35b46701c789 100644 --- a/drivers/rknpu/include/rknpu_ioctl.h +++ b/drivers/rknpu/include/rknpu_ioctl.h @@ -251,7 +251,7 @@ struct rknpu_subcore_task { * @task_obj_addr: address of task object * @regcfg_obj_addr: address of register config object * @task_base_addr: task base address - * @user_data: (optional) user data + * @hw_elapse_time: hardware elapse time * @core_mask: core mask of rknpu * @fence_fd: dma fence fd * @subcore_task: subcore task @@ -267,7 +267,7 @@ struct rknpu_submit { __u64 task_obj_addr; __u64 regcfg_obj_addr; __u64 task_base_addr; - __u64 user_data; + __s64 hw_elapse_time; __u32 core_mask; __s32 fence_fd; struct rknpu_subcore_task subcore_task[5]; diff --git a/drivers/rknpu/include/rknpu_job.h b/drivers/rknpu/include/rknpu_job.h index c62b1bf8e0ae..cd0d1dfb8363 100644 --- a/drivers/rknpu/include/rknpu_job.h +++ b/drivers/rknpu/include/rknpu_job.h @@ -44,8 +44,9 @@ struct rknpu_job { uint32_t use_core_num; atomic_t run_count; atomic_t interrupt_count; + ktime_t hw_commit_time; ktime_t hw_recoder_time; - ktime_t commit_pc_time; + ktime_t hw_elapse_time; atomic_t submit_count[RKNPU_MAX_CORES]; }; diff --git a/drivers/rknpu/rknpu_debugger.c b/drivers/rknpu/rknpu_debugger.c index 97c87521575c..0cfec7fe7c19 100644 --- a/drivers/rknpu/rknpu_debugger.c +++ b/drivers/rknpu/rknpu_debugger.c @@ -46,7 +46,7 @@ static int rknpu_load_show(struct seq_file *m, void *data) unsigned long flags; int i; int load; - uint64_t busy_time_total, div_value; + uint64_t total_busy_time, div_value; seq_puts(m, "NPU load: "); for (i = 0; i < rknpu_dev->config->num_irqs; i++) { @@ -57,13 +57,13 @@ static int rknpu_load_show(struct seq_file *m, void *data) spin_lock_irqsave(&rknpu_dev->irq_lock, flags); - busy_time_total = subcore_data->timer.busy_time_record; + total_busy_time = subcore_data->timer.total_busy_time; spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); - div_value = (RKNPU_LOAD_INTERVAL / 100000); - do_div(busy_time_total, div_value); - load = busy_time_total > 100 ? 100 : busy_time_total; + div_value = (RKNPU_LOAD_INTERVAL / 100); + do_div(total_busy_time, div_value); + load = total_busy_time > 100 ? 100 : total_busy_time; if (rknpu_dev->config->num_irqs > 1) seq_printf(m, "%2.d%%,", load); diff --git a/drivers/rknpu/rknpu_devfreq.c b/drivers/rknpu/rknpu_devfreq.c index 9f8dd672ad26..ef46785fbebc 100644 --- a/drivers/rknpu/rknpu_devfreq.c +++ b/drivers/rknpu/rknpu_devfreq.c @@ -162,6 +162,7 @@ static int rk3588_npu_set_read_margin(struct device *dev, struct rockchip_opp_info *opp_info, u32 rm) { + struct rknpu_device *rknpu_dev = dev_get_drvdata(dev); u32 offset = 0, val = 0; int i, ret = 0; @@ -173,7 +174,7 @@ static int rk3588_npu_set_read_margin(struct device *dev, LOG_DEV_DEBUG(dev, "set rm to %d\n", rm); - for (i = 0; i < 3; i++) { + for (i = 0; i < rknpu_dev->config->num_irqs; i++) { ret = regmap_read(opp_info->grf, offset, &val); if (ret < 0) { LOG_DEV_ERROR(dev, "failed to get rm from 0x%x\n", diff --git a/drivers/rknpu/rknpu_drv.c b/drivers/rknpu/rknpu_drv.c index f0a1c37d5be9..1df364f28316 100644 --- a/drivers/rknpu/rknpu_drv.c +++ b/drivers/rknpu/rknpu_drv.c @@ -112,7 +112,8 @@ static const struct rknpu_config rk356x_rknpu_config = { .num_resets = ARRAY_SIZE(rknpu_resets), .nbuf_phyaddr = 0, .nbuf_size = 0, - .max_submit_number = (1 << 12) - 1 + .max_submit_number = (1 << 12) - 1, + .core_mask = 0x1, }; static const struct rknpu_config rk3588_rknpu_config = { @@ -131,7 +132,28 @@ static const struct rknpu_config rk3588_rknpu_config = { .num_resets = ARRAY_SIZE(rk3588_npu_resets), .nbuf_phyaddr = 0, .nbuf_size = 0, - .max_submit_number = (1 << 12) - 1 + .max_submit_number = (1 << 12) - 1, + .core_mask = 0x7, +}; + +static const struct rknpu_config rk3583_rknpu_config = { + .bw_priority_addr = 0x0, + .bw_priority_length = 0x0, + .dma_mask = DMA_BIT_MASK(40), + .pc_data_amount_scale = 2, + .pc_task_number_bits = 12, + .pc_task_number_mask = 0xfff, + .pc_task_status_offset = 0x3c, + .pc_dma_ctrl = 0, + .bw_enable = 0, + .irqs = rk3588_npu_irqs, + .resets = rk3588_npu_resets, + .num_irqs = 2, + .num_resets = 2, + .nbuf_phyaddr = 0, + .nbuf_size = 0, + .max_submit_number = (1 << 12) - 1, + .core_mask = 0x3, }; static const struct rknpu_config rv1106_rknpu_config = { @@ -150,7 +172,8 @@ static const struct rknpu_config rv1106_rknpu_config = { .num_resets = ARRAY_SIZE(rknpu_resets), .nbuf_phyaddr = 0, .nbuf_size = 0, - .max_submit_number = (1 << 16) - 1 + .max_submit_number = (1 << 16) - 1, + .core_mask = 0x1, }; static const struct rknpu_config rk3562_rknpu_config = { @@ -169,7 +192,8 @@ static const struct rknpu_config rk3562_rknpu_config = { .num_resets = ARRAY_SIZE(rknpu_resets), .nbuf_phyaddr = 0xfe400000, .nbuf_size = 256 * 1024, - .max_submit_number = (1 << 16) - 1 + .max_submit_number = (1 << 16) - 1, + .core_mask = 0x1, }; /* driver probe and init */ @@ -621,13 +645,14 @@ static enum hrtimer_restart hrtimer_handler(struct hrtimer *timer) if (job) { now = ktime_get(); subcore_data->timer.busy_time += - ktime_us_delta(now, job->hw_recoder_time); + ktime_sub(now, job->hw_recoder_time); job->hw_recoder_time = now; } - subcore_data->timer.busy_time_record = + subcore_data->timer.total_busy_time = subcore_data->timer.busy_time; subcore_data->timer.busy_time = 0; + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); } @@ -1047,6 +1072,28 @@ static int rknpu_find_nbuf_resource(struct rknpu_device *rknpu_dev) return 0; } +static int rknpu_get_invalid_core_mask(struct device *dev) +{ + int ret = 0; + u8 invalid_core_mask = 0; + + if (of_property_match_string(dev->of_node, "nvmem-cell-names", + "cores") >= 0) { + ret = rockchip_nvmem_cell_read_u8(dev->of_node, "cores", + &invalid_core_mask); + /* The default valid npu cores for RK3583 are core0 and core1 */ + invalid_core_mask |= RKNPU_CORE2_MASK; + if (ret) { + LOG_DEV_ERROR( + dev, + "failed to get specification_serial_number\n"); + return invalid_core_mask; + } + } + + return (int)invalid_core_mask; +} + static int rknpu_probe(struct platform_device *pdev) { struct resource *res = NULL; @@ -1078,6 +1125,22 @@ static int rknpu_probe(struct platform_device *pdev) if (!config) return -EINVAL; + if (match->data == (void *)&rk3588_rknpu_config) { + int invalid_core_mask = rknpu_get_invalid_core_mask(dev); + /* The default valid npu cores for RK3583 are core0 and core1 */ + if (invalid_core_mask & RKNPU_CORE2_MASK) { + if ((invalid_core_mask & RKNPU_CORE0_MASK) || + (invalid_core_mask & RKNPU_CORE1_MASK)) { + LOG_DEV_ERROR( + dev, + "rknpu core invalid, invalid core mask: %#x\n", + invalid_core_mask); + return -ENODEV; + } + config = &rk3583_rknpu_config; + } + } + rknpu_dev->config = config; rknpu_dev->dev = dev; @@ -1232,9 +1295,11 @@ static int rknpu_probe(struct platform_device *pdev) virt_dev = dev_pm_domain_attach_by_name(dev, "npu1"); if (!IS_ERR(virt_dev)) rknpu_dev->genpd_dev_npu1 = virt_dev; - virt_dev = dev_pm_domain_attach_by_name(dev, "npu2"); - if (!IS_ERR(virt_dev)) - rknpu_dev->genpd_dev_npu2 = virt_dev; + if (config->num_irqs > 2) { + virt_dev = dev_pm_domain_attach_by_name(dev, "npu2"); + if (!IS_ERR(virt_dev)) + rknpu_dev->genpd_dev_npu2 = virt_dev; + } rknpu_dev->multiple_domains = true; } diff --git a/drivers/rknpu/rknpu_job.c b/drivers/rknpu/rknpu_job.c index 92f2df7430ec..6dc94b59b3dd 100644 --- a/drivers/rknpu/rknpu_job.c +++ b/drivers/rknpu/rknpu_job.c @@ -27,7 +27,7 @@ static int rknpu_wait_core_index(int core_mask) { int index = 0; - switch (core_mask & ((1 << RKNPU_MAX_CORES) - 1)) { + switch (core_mask) { case RKNPU_CORE0_MASK: case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK: case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK | RKNPU_CORE2_MASK: @@ -73,7 +73,7 @@ static int rknpu_get_task_number(struct rknpu_job *job, int core_index) int task_num = job->args->task_number; if (core_index >= RKNPU_MAX_CORES || core_index < 0) { - LOG_ERROR("core_index: %d set error!", core_index); + LOG_ERROR("invalid rknpu core index: %d", core_index); return 0; } @@ -131,8 +131,6 @@ static inline struct rknpu_job *rknpu_job_alloc(struct rknpu_device *rknpu_dev, #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM struct rknpu_gem_object *task_obj = NULL; #endif - if (rknpu_dev->config->num_irqs == 1) - args->core_mask = RKNPU_CORE0_MASK; job = kzalloc(sizeof(*job), GFP_KERNEL); if (!job) @@ -197,19 +195,19 @@ static inline int rknpu_job_wait(struct rknpu_job *job) break; if (ret == 0) { - int64_t commit_time = 0; + int64_t elapse_time_us = 0; spin_lock_irqsave(&rknpu_dev->irq_lock, flags); - commit_time = ktime_us_delta(ktime_get(), - job->commit_pc_time); + elapse_time_us = ktime_us_delta(ktime_get(), + job->hw_commit_time); continue_wait = - job->commit_pc_time == 0 ? + job->hw_commit_time == 0 ? true : - (commit_time < args->timeout * 1000); + (elapse_time_us < args->timeout * 1000); spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); LOG_ERROR( - "job: %p, wait_count: %d, continue_wait: %d, commit time: %lldus, wait time: %lldus, timeout time: %uus\n", + "job: %p, wait_count: %d, continue wait: %d, commit elapse time: %lldus, wait time: %lldus, timeout: %uus\n", job, wait_count, continue_wait, - (job->commit_pc_time == 0 ? 0 : commit_time), + (job->hw_commit_time == 0 ? 0 : elapse_time_us), ktime_us_delta(ktime_get(), job->timestamp), args->timeout * 1000); } @@ -259,6 +257,7 @@ static inline int rknpu_job_wait(struct rknpu_job *job) return -EINVAL; args->task_counter = args->task_number; + args->hw_elapse_time = job->hw_elapse_time; return 0; } @@ -368,7 +367,8 @@ static inline int rknpu_job_subcore_commit_pc(struct rknpu_job *job, return 0; } -static inline int rknpu_job_subcore_commit(struct rknpu_job *job, int core_index) +static inline int rknpu_job_subcore_commit(struct rknpu_job *job, + int core_index) { struct rknpu_device *rknpu_dev = job->rknpu_dev; struct rknpu_submit *args = job->args; @@ -394,7 +394,7 @@ static inline int rknpu_job_subcore_commit(struct rknpu_job *job, int core_index static void rknpu_job_commit(struct rknpu_job *job) { - switch (job->args->core_mask & ((1 << RKNPU_MAX_CORES) - 1)) { + switch (job->args->core_mask) { case RKNPU_CORE0_MASK: rknpu_job_subcore_commit(job, 0); break; @@ -442,8 +442,8 @@ static void rknpu_job_next(struct rknpu_device *rknpu_dev, int core_index) list_del_init(&job->head[core_index]); subcore_data->job = job; - job->hw_recoder_time = ktime_get(); - job->commit_pc_time = job->hw_recoder_time; + job->hw_commit_time = ktime_get(); + job->hw_recoder_time = job->hw_commit_time; spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); if (atomic_dec_and_test(&job->run_count)) { @@ -455,6 +455,7 @@ static void rknpu_job_done(struct rknpu_job *job, int ret, int core_index) { struct rknpu_device *rknpu_dev = job->rknpu_dev; struct rknpu_subcore_data *subcore_data = NULL; + ktime_t now; unsigned long flags; int max_submit_number = rknpu_dev->config->max_submit_number; @@ -470,8 +471,9 @@ static void rknpu_job_done(struct rknpu_job *job, int ret, int core_index) spin_lock_irqsave(&rknpu_dev->irq_lock, flags); subcore_data->job = NULL; subcore_data->task_num -= rknpu_get_task_number(job, core_index); - subcore_data->timer.busy_time += - ktime_us_delta(ktime_get(), job->hw_recoder_time); + now = ktime_get(); + job->hw_elapse_time = ktime_sub(now, job->hw_commit_time); + subcore_data->timer.busy_time += ktime_sub(now, job->hw_recoder_time); spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); if (atomic_dec_and_test(&job->interrupt_count)) { @@ -495,44 +497,32 @@ static void rknpu_job_done(struct rknpu_job *job, int ret, int core_index) rknpu_job_next(rknpu_dev, core_index); } +static int rknpu_schedule_core_index(struct rknpu_device *rknpu_dev) +{ + int core_num = rknpu_dev->config->num_irqs; + int task_num = rknpu_dev->subcore_datas[0].task_num; + int core_index = 0; + int i = 0; + + for (i = 1; i < core_num; i++) { + if (task_num > rknpu_dev->subcore_datas[i].task_num) { + core_index = i; + task_num = rknpu_dev->subcore_datas[i].task_num; + } + } + + return core_index; +} + static void rknpu_job_schedule(struct rknpu_job *job) { struct rknpu_device *rknpu_dev = job->rknpu_dev; struct rknpu_subcore_data *subcore_data = NULL; int i = 0, core_index = 0; unsigned long flags; - int task_num_list[3] = { 0, 1, 2 }; - int tmp = 0; - - if ((job->args->core_mask & ((1 << RKNPU_MAX_CORES) - 1)) == - RKNPU_CORE_AUTO_MASK) { - if (rknpu_dev->subcore_datas[0].task_num > - rknpu_dev->subcore_datas[1].task_num) { - tmp = task_num_list[1]; - task_num_list[1] = task_num_list[0]; - task_num_list[0] = tmp; - } - if (rknpu_dev->subcore_datas[task_num_list[0]].task_num > - rknpu_dev->subcore_datas[2].task_num) { - tmp = task_num_list[2]; - task_num_list[2] = task_num_list[1]; - task_num_list[1] = task_num_list[0]; - task_num_list[0] = tmp; - } else if (rknpu_dev->subcore_datas[task_num_list[1]].task_num > - rknpu_dev->subcore_datas[2].task_num) { - tmp = task_num_list[2]; - task_num_list[2] = task_num_list[1]; - task_num_list[1] = tmp; - } - if (!rknpu_dev->subcore_datas[task_num_list[0]].job) - core_index = task_num_list[0]; - else if (!rknpu_dev->subcore_datas[task_num_list[1]].job) - core_index = task_num_list[1]; - else if (!rknpu_dev->subcore_datas[task_num_list[2]].job) - core_index = task_num_list[2]; - else - core_index = task_num_list[0]; + if (job->args->core_mask == RKNPU_CORE_AUTO_MASK) { + core_index = rknpu_schedule_core_index(rknpu_dev); job->args->core_mask = rknpu_core_mask(core_index); job->use_core_num = 1; atomic_set(&job->run_count, job->use_core_num); @@ -749,6 +739,11 @@ static int rknpu_submit(struct rknpu_device *rknpu_dev, return -EINVAL; } + if (args->core_mask > rknpu_dev->config->core_mask) { + LOG_ERROR("invalid rknpu core mask: %#x", args->core_mask); + return -EINVAL; + } + job = rknpu_job_alloc(rknpu_dev, args); if (!job) { LOG_ERROR("failed to allocate rknpu job!\n"); diff --git a/drivers/rknpu/rknpu_mem.c b/drivers/rknpu/rknpu_mem.c index 5242f1506280..858c21f484f5 100644 --- a/drivers/rknpu/rknpu_mem.c +++ b/drivers/rknpu/rknpu_mem.c @@ -109,22 +109,27 @@ int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, unsigned long data, __LINE__, &phys, length); } - page_count = length >> PAGE_SHIFT; - pages = vmalloc(page_count * sizeof(struct page)); - if (!pages) { - LOG_ERROR("alloc pages failed\n"); - ret = -ENOMEM; - goto err_detach_dma_buf; - } + if (args.flags & RKNPU_MEM_KERNEL_MAPPING) { + page_count = length >> PAGE_SHIFT; + pages = vmalloc(page_count * sizeof(struct page)); + if (!pages) { + LOG_ERROR("alloc pages failed\n"); + ret = -ENOMEM; + goto err_detach_dma_buf; + } - for (i = 0; i < page_count; i++) - pages[i] = &page[i]; + for (i = 0; i < page_count; i++) + pages[i] = &page[i]; - rknpu_obj->kv_addr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL); - if (!rknpu_obj->kv_addr) { - LOG_ERROR("vmap pages addr failed\n"); - ret = -ENOMEM; - goto err_free_pages; + rknpu_obj->kv_addr = + vmap(pages, page_count, VM_MAP, PAGE_KERNEL); + if (!rknpu_obj->kv_addr) { + LOG_ERROR("vmap pages addr failed\n"); + ret = -ENOMEM; + goto err_free_pages; + } + vfree(pages); + pages = NULL; } rknpu_obj->size = PAGE_ALIGN(args.size); @@ -148,8 +153,6 @@ int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, unsigned long data, goto err_unmap_kv_addr; } - vfree(pages); - pages = NULL; dma_buf_unmap_attachment(attachment, table, DMA_BIDIRECTIONAL); dma_buf_detach(dmabuf, attachment);