Merge tag 'gvt-fixes-2017-05-11' of https://github.com/01org/gvt-linux into drm-intel...
[muen/linux.git] / drivers / gpu / drm / i915 / gvt / sched_policy.c
index 6ac77f8cc405d897caa373b95bd993b1ce4e3aa3..f25ff133865f1327936483ac166b97c41e0baa60 100644 (file)
@@ -47,11 +47,87 @@ static bool vgpu_has_pending_workload(struct intel_vgpu *vgpu)
        return false;
 }
 
+struct vgpu_sched_data {
+       struct list_head lru_list;
+       struct intel_vgpu *vgpu;
+
+       ktime_t sched_in_time;
+       ktime_t sched_out_time;
+       ktime_t sched_time;
+       ktime_t left_ts;
+       ktime_t allocated_ts;
+
+       struct vgpu_sched_ctl sched_ctl;
+};
+
+struct gvt_sched_data {
+       struct intel_gvt *gvt;
+       struct hrtimer timer;
+       unsigned long period;
+       struct list_head lru_runq_head;
+};
+
+static void vgpu_update_timeslice(struct intel_vgpu *pre_vgpu)
+{
+       ktime_t delta_ts;
+       struct vgpu_sched_data *vgpu_data = pre_vgpu->sched_data;
+
+       delta_ts = vgpu_data->sched_out_time - vgpu_data->sched_in_time;
+
+       vgpu_data->sched_time += delta_ts;
+       vgpu_data->left_ts -= delta_ts;
+}
+
+#define GVT_TS_BALANCE_PERIOD_MS 100
+#define GVT_TS_BALANCE_STAGE_NUM 10
+
+static void gvt_balance_timeslice(struct gvt_sched_data *sched_data)
+{
+       struct vgpu_sched_data *vgpu_data;
+       struct list_head *pos;
+       static uint64_t stage_check;
+       int stage = stage_check++ % GVT_TS_BALANCE_STAGE_NUM;
+
+       /* The timeslice accumulation reset at stage 0, which is
+        * allocated again without adding previous debt.
+        */
+       if (stage == 0) {
+               int total_weight = 0;
+               ktime_t fair_timeslice;
+
+               list_for_each(pos, &sched_data->lru_runq_head) {
+                       vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
+                       total_weight += vgpu_data->sched_ctl.weight;
+               }
+
+               list_for_each(pos, &sched_data->lru_runq_head) {
+                       vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
+                       fair_timeslice = ms_to_ktime(GVT_TS_BALANCE_PERIOD_MS) *
+                                               vgpu_data->sched_ctl.weight /
+                                               total_weight;
+
+                       vgpu_data->allocated_ts = fair_timeslice;
+                       vgpu_data->left_ts = vgpu_data->allocated_ts;
+               }
+       } else {
+               list_for_each(pos, &sched_data->lru_runq_head) {
+                       vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
+
+                       /* timeslice for next 100ms should add the left/debt
+                        * slice of previous stages.
+                        */
+                       vgpu_data->left_ts += vgpu_data->allocated_ts;
+               }
+       }
+}
+
 static void try_to_schedule_next_vgpu(struct intel_gvt *gvt)
 {
        struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
        enum intel_engine_id i;
        struct intel_engine_cs *engine;
+       struct vgpu_sched_data *vgpu_data;
+       ktime_t cur_time;
 
        /* no need to schedule if next_vgpu is the same with current_vgpu,
         * let scheduler chose next_vgpu again by setting it to NULL.
@@ -61,9 +137,6 @@ static void try_to_schedule_next_vgpu(struct intel_gvt *gvt)
                return;
        }
 
-       gvt_dbg_sched("try to schedule next vgpu %d\n",
-                       scheduler->next_vgpu->id);
-
        /*
         * after the flag is set, workload dispatch thread will
         * stop dispatching workload for current vgpu
@@ -72,14 +145,18 @@ static void try_to_schedule_next_vgpu(struct intel_gvt *gvt)
 
        /* still have uncompleted workload? */
        for_each_engine(engine, gvt->dev_priv, i) {
-               if (scheduler->current_workload[i]) {
-                       gvt_dbg_sched("still have running workload\n");
+               if (scheduler->current_workload[i])
                        return;
-               }
        }
 
-       gvt_dbg_sched("switch to next vgpu %d\n",
-                       scheduler->next_vgpu->id);
+       cur_time = ktime_get();
+       if (scheduler->current_vgpu) {
+               vgpu_data = scheduler->current_vgpu->sched_data;
+               vgpu_data->sched_out_time = cur_time;
+               vgpu_update_timeslice(scheduler->current_vgpu);
+       }
+       vgpu_data = scheduler->next_vgpu->sched_data;
+       vgpu_data->sched_in_time = cur_time;
 
        /* switch current vgpu */
        scheduler->current_vgpu = scheduler->next_vgpu;
@@ -92,97 +169,106 @@ static void try_to_schedule_next_vgpu(struct intel_gvt *gvt)
                wake_up(&scheduler->waitq[i]);
 }
 
-struct tbs_vgpu_data {
-       struct list_head list;
-       struct intel_vgpu *vgpu;
-       /* put some per-vgpu sched stats here */
-};
-
-struct tbs_sched_data {
-       struct intel_gvt *gvt;
-       struct delayed_work work;
-       unsigned long period;
-       struct list_head runq_head;
-};
-
-#define GVT_DEFAULT_TIME_SLICE (msecs_to_jiffies(1))
-
-static void tbs_sched_func(struct work_struct *work)
+static struct intel_vgpu *find_busy_vgpu(struct gvt_sched_data *sched_data)
 {
-       struct tbs_sched_data *sched_data = container_of(work,
-                       struct tbs_sched_data, work.work);
-       struct tbs_vgpu_data *vgpu_data;
-
-       struct intel_gvt *gvt = sched_data->gvt;
-       struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
-
+       struct vgpu_sched_data *vgpu_data;
        struct intel_vgpu *vgpu = NULL;
-       struct list_head *pos, *head;
-
-       mutex_lock(&gvt->lock);
-
-       /* no vgpu or has already had a target */
-       if (list_empty(&sched_data->runq_head) || scheduler->next_vgpu)
-               goto out;
-
-       if (scheduler->current_vgpu) {
-               vgpu_data = scheduler->current_vgpu->sched_data;
-               head = &vgpu_data->list;
-       } else {
-               head = &sched_data->runq_head;
-       }
+       struct list_head *head = &sched_data->lru_runq_head;
+       struct list_head *pos;
 
        /* search a vgpu with pending workload */
        list_for_each(pos, head) {
-               if (pos == &sched_data->runq_head)
-                       continue;
 
-               vgpu_data = container_of(pos, struct tbs_vgpu_data, list);
+               vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
                if (!vgpu_has_pending_workload(vgpu_data->vgpu))
                        continue;
 
-               vgpu = vgpu_data->vgpu;
-               break;
+               /* Return the vGPU only if it has time slice left */
+               if (vgpu_data->left_ts > 0) {
+                       vgpu = vgpu_data->vgpu;
+                       break;
+               }
        }
 
+       return vgpu;
+}
+
+/* in nanosecond */
+#define GVT_DEFAULT_TIME_SLICE 1000000
+
+static void tbs_sched_func(struct gvt_sched_data *sched_data)
+{
+       struct intel_gvt *gvt = sched_data->gvt;
+       struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
+       struct vgpu_sched_data *vgpu_data;
+       struct intel_vgpu *vgpu = NULL;
+       static uint64_t timer_check;
+
+       if (!(timer_check++ % GVT_TS_BALANCE_PERIOD_MS))
+               gvt_balance_timeslice(sched_data);
+
+       /* no active vgpu or has already had a target */
+       if (list_empty(&sched_data->lru_runq_head) || scheduler->next_vgpu)
+               goto out;
+
+       vgpu = find_busy_vgpu(sched_data);
        if (vgpu) {
                scheduler->next_vgpu = vgpu;
-               gvt_dbg_sched("pick next vgpu %d\n", vgpu->id);
+
+               /* Move the last used vGPU to the tail of lru_list */
+               vgpu_data = vgpu->sched_data;
+               list_del_init(&vgpu_data->lru_list);
+               list_add_tail(&vgpu_data->lru_list,
+                               &sched_data->lru_runq_head);
+       } else {
+               scheduler->next_vgpu = gvt->idle_vgpu;
        }
 out:
-       if (scheduler->next_vgpu) {
-               gvt_dbg_sched("try to schedule next vgpu %d\n",
-                               scheduler->next_vgpu->id);
+       if (scheduler->next_vgpu)
                try_to_schedule_next_vgpu(gvt);
-       }
+}
 
-       /*
-        * still have vgpu on runq
-        * or last schedule haven't finished due to running workload
-        */
-       if (!list_empty(&sched_data->runq_head) || scheduler->next_vgpu)
-               schedule_delayed_work(&sched_data->work, sched_data->period);
+void intel_gvt_schedule(struct intel_gvt *gvt)
+{
+       struct gvt_sched_data *sched_data = gvt->scheduler.sched_data;
 
+       mutex_lock(&gvt->lock);
+       tbs_sched_func(sched_data);
        mutex_unlock(&gvt->lock);
 }
 
+static enum hrtimer_restart tbs_timer_fn(struct hrtimer *timer_data)
+{
+       struct gvt_sched_data *data;
+
+       data = container_of(timer_data, struct gvt_sched_data, timer);
+
+       intel_gvt_request_service(data->gvt, INTEL_GVT_REQUEST_SCHED);
+
+       hrtimer_add_expires_ns(&data->timer, data->period);
+
+       return HRTIMER_RESTART;
+}
+
 static int tbs_sched_init(struct intel_gvt *gvt)
 {
        struct intel_gvt_workload_scheduler *scheduler =
                &gvt->scheduler;
 
-       struct tbs_sched_data *data;
+       struct gvt_sched_data *data;
 
        data = kzalloc(sizeof(*data), GFP_KERNEL);
        if (!data)
                return -ENOMEM;
 
-       INIT_LIST_HEAD(&data->runq_head);
-       INIT_DELAYED_WORK(&data->work, tbs_sched_func);
+       INIT_LIST_HEAD(&data->lru_runq_head);
+       hrtimer_init(&data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+       data->timer.function = tbs_timer_fn;
        data->period = GVT_DEFAULT_TIME_SLICE;
        data->gvt = gvt;
 
        scheduler->sched_data = data;
+
        return 0;
 }
 
@@ -190,25 +276,28 @@ static void tbs_sched_clean(struct intel_gvt *gvt)
 {
        struct intel_gvt_workload_scheduler *scheduler =
                &gvt->scheduler;
-       struct tbs_sched_data *data = scheduler->sched_data;
+       struct gvt_sched_data *data = scheduler->sched_data;
+
+       hrtimer_cancel(&data->timer);
 
-       cancel_delayed_work(&data->work);
        kfree(data);
        scheduler->sched_data = NULL;
 }
 
 static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu)
 {
-       struct tbs_vgpu_data *data;
+       struct vgpu_sched_data *data;
 
        data = kzalloc(sizeof(*data), GFP_KERNEL);
        if (!data)
                return -ENOMEM;
 
+       data->sched_ctl.weight = vgpu->sched_ctl.weight;
        data->vgpu = vgpu;
-       INIT_LIST_HEAD(&data->list);
+       INIT_LIST_HEAD(&data->lru_list);
 
        vgpu->sched_data = data;
+
        return 0;
 }
 
@@ -220,21 +309,24 @@ static void tbs_sched_clean_vgpu(struct intel_vgpu *vgpu)
 
 static void tbs_sched_start_schedule(struct intel_vgpu *vgpu)
 {
-       struct tbs_sched_data *sched_data = vgpu->gvt->scheduler.sched_data;
-       struct tbs_vgpu_data *vgpu_data = vgpu->sched_data;
+       struct gvt_sched_data *sched_data = vgpu->gvt->scheduler.sched_data;
+       struct vgpu_sched_data *vgpu_data = vgpu->sched_data;
 
-       if (!list_empty(&vgpu_data->list))
+       if (!list_empty(&vgpu_data->lru_list))
                return;
 
-       list_add_tail(&vgpu_data->list, &sched_data->runq_head);
-       schedule_delayed_work(&sched_data->work, 0);
+       list_add_tail(&vgpu_data->lru_list, &sched_data->lru_runq_head);
+
+       if (!hrtimer_active(&sched_data->timer))
+               hrtimer_start(&sched_data->timer, ktime_add_ns(ktime_get(),
+                       sched_data->period), HRTIMER_MODE_ABS);
 }
 
 static void tbs_sched_stop_schedule(struct intel_vgpu *vgpu)
 {
-       struct tbs_vgpu_data *vgpu_data = vgpu->sched_data;
+       struct vgpu_sched_data *vgpu_data = vgpu->sched_data;
 
-       list_del_init(&vgpu_data->list);
+       list_del_init(&vgpu_data->lru_list);
 }
 
 static struct intel_gvt_sched_policy_ops tbs_schedule_ops = {