Merge drm/drm-next into drm-intel-next-queued
[muen/linux.git] / drivers / gpu / drm / i915 / intel_lrc.c
index e3a5f67..57396a2 100644 (file)
 #include "i915_gem_render_state.h"
 #include "intel_lrc_reg.h"
 #include "intel_mocs.h"
+#include "intel_workarounds.h"
 
 #define RING_EXECLIST_QFULL            (1 << 0x2)
 #define RING_EXECLIST1_VALID           (1 << 0x3)
@@ -176,14 +177,16 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 
 static inline int rq_prio(const struct i915_request *rq)
 {
-       return rq->priotree.priority;
+       return rq->sched.attr.priority;
 }
 
 static inline bool need_preempt(const struct intel_engine_cs *engine,
                                const struct i915_request *last,
                                int prio)
 {
-       return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
+       return (intel_engine_has_preemption(engine) &&
+               __execlists_need_preempt(prio, rq_prio(last)) &&
+               !i915_request_completed(last));
 }
 
 /**
@@ -221,7 +224,7 @@ static void
 intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
                                   struct intel_engine_cs *engine)
 {
-       struct intel_context *ce = &ctx->engine[engine->id];
+       struct intel_context *ce = to_intel_context(ctx, engine);
        u64 desc;
 
        BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
@@ -256,7 +259,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
 
 static struct i915_priolist *
 lookup_priolist(struct intel_engine_cs *engine,
-               struct i915_priotree *pt,
+               struct i915_sched_node *node,
                int prio)
 {
        struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -342,10 +345,10 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
                GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
                if (rq_prio(rq) != last_prio) {
                        last_prio = rq_prio(rq);
-                       p = lookup_priolist(engine, &rq->priotree, last_prio);
+                       p = lookup_priolist(engine, &rq->sched, last_prio);
                }
 
-               list_add(&rq->priotree.link, &p->requests);
+               list_add(&rq->sched.link, &p->requests);
        }
 }
 
@@ -374,6 +377,19 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status)
                                   status, rq);
 }
 
+inline void
+execlists_user_begin(struct intel_engine_execlists *execlists,
+                    const struct execlist_port *port)
+{
+       execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER);
+}
+
+inline void
+execlists_user_end(struct intel_engine_execlists *execlists)
+{
+       execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
+}
+
 static inline void
 execlists_context_schedule_in(struct i915_request *rq)
 {
@@ -399,7 +415,7 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
 
 static u64 execlists_update_context(struct i915_request *rq)
 {
-       struct intel_context *ce = &rq->ctx->engine[rq->engine->id];
+       struct intel_context *ce = to_intel_context(rq->ctx, rq->engine);
        struct i915_hw_ppgtt *ppgtt =
                rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
        u32 *reg_state = ce->lrc_reg_state;
@@ -454,10 +470,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
                        desc = execlists_update_context(rq);
                        GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
 
-                       GEM_TRACE("%s in[%d]:  ctx=%d.%d, seqno=%x, prio=%d\n",
+                       GEM_TRACE("%s in[%d]:  ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n",
                                  engine->name, n,
                                  port[n].context_id, count,
                                  rq->global_seqno,
+                                 rq->fence.context, rq->fence.seqno,
+                                 intel_engine_get_seqno(engine),
                                  rq_prio(rq));
                } else {
                        GEM_BUG_ON(!n);
@@ -506,7 +524,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
 {
        struct intel_engine_execlists *execlists = &engine->execlists;
        struct intel_context *ce =
-               &engine->i915->preempt_context->engine[engine->id];
+               to_intel_context(engine->i915->preempt_context, engine);
        unsigned int n;
 
        GEM_BUG_ON(execlists->preempt_complete_status !=
@@ -637,7 +655,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                struct i915_priolist *p = to_priolist(rb);
                struct i915_request *rq, *rn;
 
-               list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
+               list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
                        /*
                         * Can we combine this request with the current port?
                         * It has to be the same context/ringbuffer and not
@@ -657,7 +675,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                                 */
                                if (port == last_port) {
                                        __list_del_many(&p->requests,
-                                                       &rq->priotree.link);
+                                                       &rq->sched.link);
                                        goto done;
                                }
 
@@ -671,7 +689,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                                if (ctx_single_port_submission(last->ctx) ||
                                    ctx_single_port_submission(rq->ctx)) {
                                        __list_del_many(&p->requests,
-                                                       &rq->priotree.link);
+                                                       &rq->sched.link);
                                        goto done;
                                }
 
@@ -684,7 +702,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                                GEM_BUG_ON(port_isset(port));
                        }
 
-                       INIT_LIST_HEAD(&rq->priotree.link);
+                       INIT_LIST_HEAD(&rq->sched.link);
                        __i915_request_submit(rq);
                        trace_i915_request_in(rq, port_index(port, execlists));
                        last = rq;
@@ -697,8 +715,27 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                if (p->priority != I915_PRIORITY_NORMAL)
                        kmem_cache_free(engine->i915->priorities, p);
        }
+
 done:
-       execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
+       /*
+        * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
+        *
+        * We choose queue_priority such that if we add a request of greater
+        * priority than this, we kick the submission tasklet to decide on
+        * the right order of submitting the requests to hardware. We must
+        * also be prepared to reorder requests as they are in-flight on the
+        * HW. We derive the queue_priority then as the first "hole" in
+        * the HW submission ports and if there are no available slots,
+        * the priority of the lowest executing request, i.e. last.
+        *
+        * When we do receive a higher priority request ready to run from the
+        * user, see queue_request(), the queue_priority is bumped to that
+        * request triggering preemption on the next dequeue (or subsequent
+        * interrupt for secondary ports).
+        */
+       execlists->queue_priority =
+               port != execlists->port ? rq_prio(last) : INT_MIN;
+
        execlists->first = rb;
        if (submit)
                port_assign(port, last);
@@ -710,7 +747,7 @@ unlock:
        spin_unlock_irq(&engine->timeline->lock);
 
        if (submit) {
-               execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
+               execlists_user_begin(execlists, execlists->port);
                execlists_submit_ports(engine);
        }
 
@@ -727,6 +764,13 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
        while (num_ports-- && port_isset(port)) {
                struct i915_request *rq = port_request(port);
 
+               GEM_TRACE("%s:port%u global=%d (fence %llx:%d), (current %d)\n",
+                         rq->engine->name,
+                         (unsigned int)(port - execlists->port),
+                         rq->global_seqno,
+                         rq->fence.context, rq->fence.seqno,
+                         intel_engine_get_seqno(rq->engine));
+
                GEM_BUG_ON(!execlists->active);
                intel_engine_context_out(rq->engine);
 
@@ -742,6 +786,82 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
        }
 
        execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
+       execlists_user_end(execlists);
+}
+
+static void clear_gtiir(struct intel_engine_cs *engine)
+{
+       struct drm_i915_private *dev_priv = engine->i915;
+       int i;
+
+       /*
+        * Clear any pending interrupt state.
+        *
+        * We do it twice out of paranoia that some of the IIR are
+        * double buffered, and so if we only reset it once there may
+        * still be an interrupt pending.
+        */
+       if (INTEL_GEN(dev_priv) >= 11) {
+               static const struct {
+                       u8 bank;
+                       u8 bit;
+               } gen11_gtiir[] = {
+                       [RCS] = {0, GEN11_RCS0},
+                       [BCS] = {0, GEN11_BCS},
+                       [_VCS(0)] = {1, GEN11_VCS(0)},
+                       [_VCS(1)] = {1, GEN11_VCS(1)},
+                       [_VCS(2)] = {1, GEN11_VCS(2)},
+                       [_VCS(3)] = {1, GEN11_VCS(3)},
+                       [_VECS(0)] = {1, GEN11_VECS(0)},
+                       [_VECS(1)] = {1, GEN11_VECS(1)},
+               };
+               unsigned long irqflags;
+
+               GEM_BUG_ON(engine->id >= ARRAY_SIZE(gen11_gtiir));
+
+               spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+               for (i = 0; i < 2; i++) {
+                       gen11_reset_one_iir(dev_priv,
+                                           gen11_gtiir[engine->id].bank,
+                                           gen11_gtiir[engine->id].bit);
+               }
+               spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+       } else {
+               static const u8 gtiir[] = {
+                       [RCS]  = 0,
+                       [BCS]  = 0,
+                       [VCS]  = 1,
+                       [VCS2] = 1,
+                       [VECS] = 3,
+               };
+
+               GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir));
+
+               for (i = 0; i < 2; i++) {
+                       I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
+                                  engine->irq_keep_mask);
+                       POSTING_READ(GEN8_GT_IIR(gtiir[engine->id]));
+               }
+               GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) &
+                          engine->irq_keep_mask);
+       }
+}
+
+static void reset_irq(struct intel_engine_cs *engine)
+{
+       /* Mark all CS interrupts as complete */
+       smp_store_mb(engine->execlists.active, 0);
+       synchronize_hardirq(engine->i915->drm.irq);
+
+       clear_gtiir(engine);
+
+       /*
+        * The port is checked prior to scheduling a tasklet, but
+        * just in case we have suspended the tasklet to do the
+        * wedging make sure that when it wakes, it decides there
+        * is no work to do by clearing the irq_posted bit.
+        */
+       clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 }
 
 static void execlists_cancel_requests(struct intel_engine_cs *engine)
@@ -751,7 +871,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
        struct rb_node *rb;
        unsigned long flags;
 
-       GEM_TRACE("%s\n", engine->name);
+       GEM_TRACE("%s current %d\n",
+                 engine->name, intel_engine_get_seqno(engine));
 
        /*
         * Before we call engine->cancel_requests(), we should have exclusive
@@ -771,6 +892,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 
        /* Cancel the requests on the HW and clear the ELSP tracker. */
        execlists_cancel_port_requests(execlists);
+       reset_irq(engine);
 
        spin_lock(&engine->timeline->lock);
 
@@ -786,8 +908,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
        while (rb) {
                struct i915_priolist *p = to_priolist(rb);
 
-               list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
-                       INIT_LIST_HEAD(&rq->priotree.link);
+               list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
+                       INIT_LIST_HEAD(&rq->sched.link);
 
                        dma_fence_set_error(&rq->fence, -EIO);
                        __i915_request_submit(rq);
@@ -809,17 +931,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 
        spin_unlock(&engine->timeline->lock);
 
-       /*
-        * The port is checked prior to scheduling a tasklet, but
-        * just in case we have suspended the tasklet to do the
-        * wedging make sure that when it wakes, it decides there
-        * is no work to do by clearing the irq_posted bit.
-        */
-       clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-
-       /* Mark all CS interrupts as complete */
-       execlists->active = 0;
-
        local_irq_restore(flags);
 }
 
@@ -831,7 +942,7 @@ static void execlists_submission_tasklet(unsigned long data)
 {
        struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
        struct intel_engine_execlists * const execlists = &engine->execlists;
-       struct execlist_port * const port = execlists->port;
+       struct execlist_port *port = execlists->port;
        struct drm_i915_private *dev_priv = engine->i915;
        bool fw = false;
 
@@ -958,10 +1069,13 @@ static void execlists_submission_tasklet(unsigned long data)
                                                        EXECLISTS_ACTIVE_USER));
 
                        rq = port_unpack(port, &count);
-                       GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
+                       GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n",
                                  engine->name,
                                  port->context_id, count,
                                  rq ? rq->global_seqno : 0,
+                                 rq ? rq->fence.context : 0,
+                                 rq ? rq->fence.seqno : 0,
+                                 intel_engine_get_seqno(engine),
                                  rq ? rq_prio(rq) : 0);
 
                        /* Check the context/desc id for this event matches */
@@ -969,10 +1083,28 @@ static void execlists_submission_tasklet(unsigned long data)
 
                        GEM_BUG_ON(count == 0);
                        if (--count == 0) {
+                               /*
+                                * On the final event corresponding to the
+                                * submission of this context, we expect either
+                                * an element-switch event or a completion
+                                * event (and on completion, the active-idle
+                                * marker). No more preemptions, lite-restore
+                                * or otherwise.
+                                */
                                GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
                                GEM_BUG_ON(port_isset(&port[1]) &&
                                           !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
+                               GEM_BUG_ON(!port_isset(&port[1]) &&
+                                          !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
+
+                               /*
+                                * We rely on the hardware being strongly
+                                * ordered, that the breadcrumb write is
+                                * coherent (visible from the CPU) before the
+                                * user interrupt and CSB is processed.
+                                */
                                GEM_BUG_ON(!i915_request_completed(rq));
+
                                execlists_context_schedule_out(rq);
                                trace_i915_request_out(rq);
                                i915_request_put(rq);
@@ -980,17 +1112,14 @@ static void execlists_submission_tasklet(unsigned long data)
                                GEM_TRACE("%s completed ctx=%d\n",
                                          engine->name, port->context_id);
 
-                               execlists_port_complete(execlists, port);
+                               port = execlists_port_complete(execlists, port);
+                               if (port_isset(port))
+                                       execlists_user_begin(execlists, port);
+                               else
+                                       execlists_user_end(execlists);
                        } else {
                                port_set(port, port_pack(rq, count));
                        }
-
-                       /* After the final element, the hw should be idle */
-                       GEM_BUG_ON(port_count(port) == 0 &&
-                                  !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
-                       if (port_count(port) == 0)
-                               execlists_clear_active(execlists,
-                                                      EXECLISTS_ACTIVE_USER);
                }
 
                if (head != execlists->csb_head) {
@@ -1013,18 +1142,23 @@ static void execlists_submission_tasklet(unsigned long data)
 }
 
 static void queue_request(struct intel_engine_cs *engine,
-                         struct i915_priotree *pt,
+                         struct i915_sched_node *node,
                          int prio)
 {
-       list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests);
+       list_add_tail(&node->link,
+                     &lookup_priolist(engine, node, prio)->requests);
+}
+
+static void __submit_queue(struct intel_engine_cs *engine, int prio)
+{
+       engine->execlists.queue_priority = prio;
+       tasklet_hi_schedule(&engine->execlists.tasklet);
 }
 
 static void submit_queue(struct intel_engine_cs *engine, int prio)
 {
-       if (prio > engine->execlists.queue_priority) {
-               engine->execlists.queue_priority = prio;
-               tasklet_hi_schedule(&engine->execlists.tasklet);
-       }
+       if (prio > engine->execlists.queue_priority)
+               __submit_queue(engine, prio);
 }
 
 static void execlists_submit_request(struct i915_request *request)
@@ -1035,24 +1169,24 @@ static void execlists_submit_request(struct i915_request *request)
        /* Will be called from irq-context when using foreign fences. */
        spin_lock_irqsave(&engine->timeline->lock, flags);
 
-       queue_request(engine, &request->priotree, rq_prio(request));
+       queue_request(engine, &request->sched, rq_prio(request));
        submit_queue(engine, rq_prio(request));
 
        GEM_BUG_ON(!engine->execlists.first);
-       GEM_BUG_ON(list_empty(&request->priotree.link));
+       GEM_BUG_ON(list_empty(&request->sched.link));
 
        spin_unlock_irqrestore(&engine->timeline->lock, flags);
 }
 
-static struct i915_request *pt_to_request(struct i915_priotree *pt)
+static struct i915_request *sched_to_request(struct i915_sched_node *node)
 {
-       return container_of(pt, struct i915_request, priotree);
+       return container_of(node, struct i915_request, sched);
 }
 
 static struct intel_engine_cs *
-pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
+sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
 {
-       struct intel_engine_cs *engine = pt_to_request(pt)->engine;
+       struct intel_engine_cs *engine = sched_to_request(node)->engine;
 
        GEM_BUG_ON(!locked);
 
@@ -1064,11 +1198,13 @@ pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
        return engine;
 }
 
-static void execlists_schedule(struct i915_request *request, int prio)
+static void execlists_schedule(struct i915_request *request,
+                              const struct i915_sched_attr *attr)
 {
        struct intel_engine_cs *engine;
        struct i915_dependency *dep, *p;
        struct i915_dependency stack;
+       const int prio = attr->priority;
        LIST_HEAD(dfs);
 
        GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
@@ -1076,23 +1212,23 @@ static void execlists_schedule(struct i915_request *request, int prio)
        if (i915_request_completed(request))
                return;
 
-       if (prio <= READ_ONCE(request->priotree.priority))
+       if (prio <= READ_ONCE(request->sched.attr.priority))
                return;
 
        /* Need BKL in order to use the temporary link inside i915_dependency */
        lockdep_assert_held(&request->i915->drm.struct_mutex);
 
-       stack.signaler = &request->priotree;
+       stack.signaler = &request->sched;
        list_add(&stack.dfs_link, &dfs);
 
        /*
         * Recursively bump all dependent priorities to match the new request.
         *
         * A naive approach would be to use recursion:
-        * static void update_priorities(struct i915_priotree *pt, prio) {
-        *      list_for_each_entry(dep, &pt->signalers_list, signal_link)
+        * static void update_priorities(struct i915_sched_node *node, prio) {
+        *      list_for_each_entry(dep, &node->signalers_list, signal_link)
         *              update_priorities(dep->signal, prio)
-        *      queue_request(pt);
+        *      queue_request(node);
         * }
         * but that may have unlimited recursion depth and so runs a very
         * real risk of overunning the kernel stack. Instead, we build
@@ -1104,7 +1240,7 @@ static void execlists_schedule(struct i915_request *request, int prio)
         * last element in the list is the request we must execute first.
         */
        list_for_each_entry(dep, &dfs, dfs_link) {
-               struct i915_priotree *pt = dep->signaler;
+               struct i915_sched_node *node = dep->signaler;
 
                /*
                 * Within an engine, there can be no cycle, but we may
@@ -1112,14 +1248,14 @@ static void execlists_schedule(struct i915_request *request, int prio)
                 * (redundant dependencies are not eliminated) and across
                 * engines.
                 */
-               list_for_each_entry(p, &pt->signalers_list, signal_link) {
+               list_for_each_entry(p, &node->signalers_list, signal_link) {
                        GEM_BUG_ON(p == dep); /* no cycles! */
 
-                       if (i915_priotree_signaled(p->signaler))
+                       if (i915_sched_node_signaled(p->signaler))
                                continue;
 
-                       GEM_BUG_ON(p->signaler->priority < pt->priority);
-                       if (prio > READ_ONCE(p->signaler->priority))
+                       GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority);
+                       if (prio > READ_ONCE(p->signaler->attr.priority))
                                list_move_tail(&p->dfs_link, &dfs);
                }
        }
@@ -1130,9 +1266,9 @@ static void execlists_schedule(struct i915_request *request, int prio)
         * execlists_submit_request()), we can set our own priority and skip
         * acquiring the engine locks.
         */
-       if (request->priotree.priority == I915_PRIORITY_INVALID) {
-               GEM_BUG_ON(!list_empty(&request->priotree.link));
-               request->priotree.priority = prio;
+       if (request->sched.attr.priority == I915_PRIORITY_INVALID) {
+               GEM_BUG_ON(!list_empty(&request->sched.link));
+               request->sched.attr = *attr;
                if (stack.dfs_link.next == stack.dfs_link.prev)
                        return;
                __list_del_entry(&stack.dfs_link);
@@ -1143,21 +1279,24 @@ static void execlists_schedule(struct i915_request *request, int prio)
 
        /* Fifo and depth-first replacement ensure our deps execute before us */
        list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
-               struct i915_priotree *pt = dep->signaler;
+               struct i915_sched_node *node = dep->signaler;
 
                INIT_LIST_HEAD(&dep->dfs_link);
 
-               engine = pt_lock_engine(pt, engine);
+               engine = sched_lock_engine(node, engine);
 
-               if (prio <= pt->priority)
+               if (prio <= node->attr.priority)
                        continue;
 
-               pt->priority = prio;
-               if (!list_empty(&pt->link)) {
-                       __list_del_entry(&pt->link);
-                       queue_request(engine, pt, prio);
+               node->attr.priority = prio;
+               if (!list_empty(&node->link)) {
+                       __list_del_entry(&node->link);
+                       queue_request(engine, node, prio);
                }
-               submit_queue(engine, prio);
+
+               if (prio > engine->execlists.queue_priority &&
+                   i915_sw_fence_done(&sched_to_request(node)->submit))
+                       __submit_queue(engine, prio);
        }
 
        spin_unlock_irq(&engine->timeline->lock);
@@ -1190,7 +1329,7 @@ static struct intel_ring *
 execlists_context_pin(struct intel_engine_cs *engine,
                      struct i915_gem_context *ctx)
 {
-       struct intel_context *ce = &ctx->engine[engine->id];
+       struct intel_context *ce = to_intel_context(ctx, engine);
        void *vaddr;
        int ret;
 
@@ -1224,6 +1363,7 @@ execlists_context_pin(struct intel_engine_cs *engine,
        ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
        ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
                i915_ggtt_offset(ce->ring->vma);
+       ce->lrc_reg_state[CTX_RING_HEAD+1] = ce->ring->head;
 
        ce->state->obj->pin_global++;
        i915_gem_context_get(ctx);
@@ -1242,7 +1382,7 @@ err:
 static void execlists_context_unpin(struct intel_engine_cs *engine,
                                    struct i915_gem_context *ctx)
 {
-       struct intel_context *ce = &ctx->engine[engine->id];
+       struct intel_context *ce = to_intel_context(ctx, engine);
 
        lockdep_assert_held(&ctx->i915->drm.struct_mutex);
        GEM_BUG_ON(ce->pin_count == 0);
@@ -1261,8 +1401,8 @@ static void execlists_context_unpin(struct intel_engine_cs *engine,
 
 static int execlists_request_alloc(struct i915_request *request)
 {
-       struct intel_engine_cs *engine = request->engine;
-       struct intel_context *ce = &request->ctx->engine[engine->id];
+       struct intel_context *ce =
+               to_intel_context(request->ctx, request->engine);
        int ret;
 
        GEM_BUG_ON(!ce->pin_count);
@@ -1574,14 +1714,6 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
        return ret;
 }
 
-static u8 gtiir[] = {
-       [RCS] = 0,
-       [BCS] = 0,
-       [VCS] = 1,
-       [VCS2] = 1,
-       [VECS] = 3,
-};
-
 static void enable_execlists(struct intel_engine_cs *engine)
 {
        struct drm_i915_private *dev_priv = engine->i915;
@@ -1641,6 +1773,8 @@ static int gen8_init_render_ring(struct intel_engine_cs *engine)
        if (ret)
                return ret;
 
+       intel_whitelist_workarounds_apply(engine);
+
        /* We need to disable the AsyncFlip performance optimisations in order
         * to use MI_WAIT_FOR_EVENT within the CS. It should already be
         * programmed to '1' on all products.
@@ -1651,7 +1785,7 @@ static int gen8_init_render_ring(struct intel_engine_cs *engine)
 
        I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
 
-       return init_workarounds_ring(engine);
+       return 0;
 }
 
 static int gen9_init_render_ring(struct intel_engine_cs *engine)
@@ -1662,49 +1796,25 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine)
        if (ret)
                return ret;
 
-       return init_workarounds_ring(engine);
-}
-
-static void reset_irq(struct intel_engine_cs *engine)
-{
-       struct drm_i915_private *dev_priv = engine->i915;
-       int i;
-
-       GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir));
-
-       /*
-        * Clear any pending interrupt state.
-        *
-        * We do it twice out of paranoia that some of the IIR are double
-        * buffered, and if we only reset it once there may still be
-        * an interrupt pending.
-        */
-       for (i = 0; i < 2; i++) {
-               I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
-                          GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
-               POSTING_READ(GEN8_GT_IIR(gtiir[engine->id]));
-       }
-       GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) &
-                  (GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift));
+       intel_whitelist_workarounds_apply(engine);
 
-       clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+       return 0;
 }
 
 static void reset_common_ring(struct intel_engine_cs *engine,
                              struct i915_request *request)
 {
        struct intel_engine_execlists * const execlists = &engine->execlists;
-       struct intel_context *ce;
        unsigned long flags;
+       u32 *regs;
 
-       GEM_TRACE("%s seqno=%x\n",
-                 engine->name, request ? request->global_seqno : 0);
+       GEM_TRACE("%s request global=%x, current=%d\n",
+                 engine->name, request ? request->global_seqno : 0,
+                 intel_engine_get_seqno(engine));
 
        /* See execlists_cancel_requests() for the irq/spinlock split. */
        local_irq_save(flags);
 
-       reset_irq(engine);
-
        /*
         * Catch up with any missed context-switch interrupts.
         *
@@ -1715,15 +1825,13 @@ static void reset_common_ring(struct intel_engine_cs *engine,
         * requests were completed.
         */
        execlists_cancel_port_requests(execlists);
+       reset_irq(engine);
 
        /* Push back any incomplete requests for replay after the reset. */
        spin_lock(&engine->timeline->lock);
        __unwind_incomplete_requests(engine);
        spin_unlock(&engine->timeline->lock);
 
-       /* Mark all CS interrupts as complete */
-       execlists->active = 0;
-
        local_irq_restore(flags);
 
        /*
@@ -1748,14 +1856,24 @@ static void reset_common_ring(struct intel_engine_cs *engine,
         * future request will be after userspace has had the opportunity
         * to recreate its own state.
         */
-       ce = &request->ctx->engine[engine->id];
-       execlists_init_reg_state(ce->lrc_reg_state,
-                                request->ctx, engine, ce->ring);
+       regs = to_intel_context(request->ctx, engine)->lrc_reg_state;
+       if (engine->default_state) {
+               void *defaults;
+
+               defaults = i915_gem_object_pin_map(engine->default_state,
+                                                  I915_MAP_WB);
+               if (!IS_ERR(defaults)) {
+                       memcpy(regs, /* skip restoring the vanilla PPHWSP */
+                              defaults + LRC_STATE_PN * PAGE_SIZE,
+                              engine->context_size - PAGE_SIZE);
+                       i915_gem_object_unpin_map(engine->default_state);
+               }
+       }
+       execlists_init_reg_state(regs, request->ctx, engine, request->ring);
 
        /* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
-       ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
-               i915_ggtt_offset(ce->ring->vma);
-       ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix;
+       regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma);
+       regs[CTX_RING_HEAD + 1] = request->postfix;
 
        request->ring->head = request->postfix;
        intel_ring_update_space(request->ring);
@@ -2015,7 +2133,7 @@ static int gen8_init_rcs_context(struct i915_request *rq)
 {
        int ret;
 
-       ret = intel_ring_workarounds_emit(rq);
+       ret = intel_ctx_workarounds_emit(rq);
        if (ret)
                return ret;
 
@@ -2075,11 +2193,13 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
        engine->unpark = NULL;
 
        engine->flags |= I915_ENGINE_SUPPORTS_STATS;
+       if (engine->i915->preempt_context)
+               engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 
        engine->i915->caps.scheduler =
                I915_SCHEDULER_CAP_ENABLED |
                I915_SCHEDULER_CAP_PRIORITY;
-       if (engine->i915->preempt_context)
+       if (intel_engine_has_preemption(engine))
                engine->i915->caps.scheduler |= I915_SCHEDULER_CAP_PREEMPTION;
 }
 
@@ -2118,7 +2238,20 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 static inline void
 logical_ring_default_irqs(struct intel_engine_cs *engine)
 {
-       unsigned shift = engine->irq_shift;
+       unsigned int shift = 0;
+
+       if (INTEL_GEN(engine->i915) < 11) {
+               const u8 irq_shifts[] = {
+                       [RCS]  = GEN8_RCS_IRQ_SHIFT,
+                       [BCS]  = GEN8_BCS_IRQ_SHIFT,
+                       [VCS]  = GEN8_VCS1_IRQ_SHIFT,
+                       [VCS2] = GEN8_VCS2_IRQ_SHIFT,
+                       [VECS] = GEN8_VECS_IRQ_SHIFT,
+               };
+
+               shift = irq_shifts[engine->id];
+       }
+
        engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
        engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
 }
@@ -2174,9 +2307,13 @@ static int logical_ring_init(struct intel_engine_cs *engine)
        }
 
        engine->execlists.preempt_complete_status = ~0u;
-       if (engine->i915->preempt_context)
+       if (engine->i915->preempt_context) {
+               struct intel_context *ce =
+                       to_intel_context(engine->i915->preempt_context, engine);
+
                engine->execlists.preempt_complete_status =
-                       upper_32_bits(engine->i915->preempt_context->engine[engine->id].lrc_desc);
+                       upper_32_bits(ce->lrc_desc);
+       }
 
        return 0;
 
@@ -2458,7 +2595,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
                                            struct intel_engine_cs *engine)
 {
        struct drm_i915_gem_object *ctx_obj;
-       struct intel_context *ce = &ctx->engine[engine->id];
+       struct intel_context *ce = to_intel_context(ctx, engine);
        struct i915_vma *vma;
        uint32_t context_size;
        struct intel_ring *ring;
@@ -2529,7 +2666,8 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
         */
        list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
                for_each_engine(engine, dev_priv, id) {
-                       struct intel_context *ce = &ctx->engine[engine->id];
+                       struct intel_context *ce =
+                               to_intel_context(ctx, engine);
                        u32 *reg;
 
                        if (!ce->state)
@@ -2551,3 +2689,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
                }
        }
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/intel_lrc.c"
+#endif