diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 4bc24852a283..8e4dc1441fff 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -143,6 +143,7 @@ static void hl_fence_init(struct hl_fence *fence, u64 sequence) fence->cs_sequence = sequence; fence->error = 0; fence->timestamp = ktime_set(0, 0); + fence->mcs_handling_done = false; init_completion(&fence->completion); } @@ -535,10 +536,21 @@ static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs) mcs_compl->timestamp = ktime_to_ns(fence->timestamp); complete_all(&mcs_compl->completion); + + /* + * Setting mcs_handling_done inside the lock ensures + * at least one fence have mcs_handling_done set to + * true before wait for mcs finish. This ensures at + * least one CS will be set as completed when polling + * mcs fences. + */ + fence->mcs_handling_done = true; } spin_unlock(&mcs_compl->lock); } + /* In case CS completed without mcs completion initialized */ + fence->mcs_handling_done = true; } static inline void cs_release_sob_reset_handler(struct hl_device *hdev, @@ -2372,7 +2384,13 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data) mcs_data->stream_master_qid_map |= fence->stream_master_qid_map; - if (status == CS_WAIT_STATUS_BUSY) + /* + * Using mcs_handling_done to avoid possibility of mcs_data + * returns to user indicating CS completed before it finished + * all of its mcs handling, to avoid race the next time the + * user waits for mcs. + */ + if (status == CS_WAIT_STATUS_BUSY || !fence->mcs_handling_done) continue; mcs_data->completion_bitmap |= BIT(i); diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index a06135155b57..48f0e52cd5be 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -610,6 +610,9 @@ struct asic_fixed_properties { * @error: mark this fence with error * @timestamp: timestamp upon completion * @take_timestamp: timestamp shall be taken upon completion + * @mcs_handling_done: indicates that corresponding command submission has + * finished msc handling, this does not mean it was part + * of the mcs */ struct hl_fence { struct completion completion; @@ -619,6 +622,7 @@ struct hl_fence { int error; ktime_t timestamp; u8 take_timestamp; + u8 mcs_handling_done; }; /**