@@ -52,6 +52,7 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
5252{
5353 struct pi_desc * pi_desc = vcpu_to_pi_desc (vcpu );
5454 struct pi_desc old , new ;
55+ unsigned long flags ;
5556 unsigned int dest ;
5657
5758 /*
@@ -62,34 +63,57 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
6263 if (!enable_apicv || !lapic_in_kernel (vcpu ))
6364 return ;
6465
65- /* Nothing to do if PI.SN and PI.NDST both have the desired value. */
66- if (!pi_test_sn (pi_desc ) && vcpu -> cpu == cpu )
66+ /*
67+ * If the vCPU wasn't on the wakeup list and wasn't migrated, then the
68+ * full update can be skipped as neither the vector nor the destination
69+ * needs to be changed.
70+ */
71+ if (pi_desc -> nv != POSTED_INTR_WAKEUP_VECTOR && vcpu -> cpu == cpu ) {
72+ /*
73+ * Clear SN if it was set due to being preempted. Again, do
74+ * this even if there is no assigned device for simplicity.
75+ */
76+ if (pi_test_and_clear_sn (pi_desc ))
77+ goto after_clear_sn ;
6778 return ;
79+ }
80+
81+ local_irq_save (flags );
6882
6983 /*
70- * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
71- * PI.NDST: pi_post_block is the one expected to change PID.NDST and the
72- * wakeup handler expects the vCPU to be on the blocked_vcpu_list that
73- * matches PI.NDST. Otherwise, a vcpu may not be able to be woken up
74- * correctly.
84+ * If the vCPU was waiting for wakeup, remove the vCPU from the wakeup
85+ * list of the _previous_ pCPU, which will not be the same as the
86+ * current pCPU if the task was migrated.
7587 */
76- if (pi_desc -> nv == POSTED_INTR_WAKEUP_VECTOR || vcpu -> cpu == cpu ) {
77- pi_clear_sn (pi_desc );
78- goto after_clear_sn ;
88+ if (pi_desc -> nv == POSTED_INTR_WAKEUP_VECTOR ) {
89+ raw_spin_lock (& per_cpu (blocked_vcpu_on_cpu_lock , vcpu -> cpu ));
90+ list_del (& vcpu -> blocked_vcpu_list );
91+ raw_spin_unlock (& per_cpu (blocked_vcpu_on_cpu_lock , vcpu -> cpu ));
7992 }
8093
81- /* The full case. Set the new destination and clear SN. */
8294 dest = cpu_physical_id (cpu );
8395 if (!x2apic_mode )
8496 dest = (dest << 8 ) & 0xFF00 ;
8597
8698 do {
8799 old .control = new .control = READ_ONCE (pi_desc -> control );
88100
101+ /*
102+ * Clear SN (as above) and refresh the destination APIC ID to
103+ * handle task migration (@cpu != vcpu->cpu).
104+ */
89105 new .ndst = dest ;
90106 new .sn = 0 ;
107+
108+ /*
109+ * Restore the notification vector; in the blocking case, the
110+ * descriptor was modified on "put" to use the wakeup vector.
111+ */
112+ new .nv = POSTED_INTR_VECTOR ;
91113 } while (pi_try_set_control (pi_desc , old .control , new .control ));
92114
115+ local_irq_restore (flags );
116+
93117after_clear_sn :
94118
95119 /*
@@ -111,83 +135,24 @@ static bool vmx_can_use_vtd_pi(struct kvm *kvm)
111135 irq_remapping_cap (IRQ_POSTING_CAP );
112136}
113137
114- void vmx_vcpu_pi_put (struct kvm_vcpu * vcpu )
115- {
116- struct pi_desc * pi_desc = vcpu_to_pi_desc (vcpu );
117-
118- if (!vmx_can_use_vtd_pi (vcpu -> kvm ))
119- return ;
120-
121- /* Set SN when the vCPU is preempted */
122- if (vcpu -> preempted )
123- pi_set_sn (pi_desc );
124- }
125-
126- static void __pi_post_block (struct kvm_vcpu * vcpu )
127- {
128- struct pi_desc * pi_desc = vcpu_to_pi_desc (vcpu );
129- struct pi_desc old , new ;
130- unsigned int dest ;
131-
132- /*
133- * Remove the vCPU from the wakeup list of the _previous_ pCPU, which
134- * will not be the same as the current pCPU if the task was migrated.
135- */
136- raw_spin_lock (& per_cpu (blocked_vcpu_on_cpu_lock , vcpu -> pre_pcpu ));
137- list_del (& vcpu -> blocked_vcpu_list );
138- raw_spin_unlock (& per_cpu (blocked_vcpu_on_cpu_lock , vcpu -> pre_pcpu ));
139-
140- dest = cpu_physical_id (vcpu -> cpu );
141- if (!x2apic_mode )
142- dest = (dest << 8 ) & 0xFF00 ;
143-
144- WARN (pi_desc -> nv != POSTED_INTR_WAKEUP_VECTOR ,
145- "Wakeup handler not enabled while the vCPU was blocking" );
146-
147- do {
148- old .control = new .control = READ_ONCE (pi_desc -> control );
149-
150- new .ndst = dest ;
151-
152- /* set 'NV' to 'notification vector' */
153- new .nv = POSTED_INTR_VECTOR ;
154- } while (pi_try_set_control (pi_desc , old .control , new .control ));
155-
156- vcpu -> pre_pcpu = -1 ;
157- }
158-
159138/*
160- * This routine does the following things for vCPU which is going
161- * to be blocked if VT-d PI is enabled.
162- * - Store the vCPU to the wakeup list, so when interrupts happen
163- * we can find the right vCPU to wake up.
164- * - Change the Posted-interrupt descriptor as below:
165- * 'NV' <-- POSTED_INTR_WAKEUP_VECTOR
166- * - If 'ON' is set during this process, which means at least one
167- * interrupt is posted for this vCPU, we cannot block it, in
168- * this case, return 1, otherwise, return 0.
169- *
139+ * Put the vCPU on this pCPU's list of vCPUs that needs to be awakened and set
140+ * WAKEUP as the notification vector in the PI descriptor.
170141 */
171- int pi_pre_block (struct kvm_vcpu * vcpu )
142+ static void pi_enable_wakeup_handler (struct kvm_vcpu * vcpu )
172143{
173- struct pi_desc old , new ;
174144 struct pi_desc * pi_desc = vcpu_to_pi_desc (vcpu );
145+ struct pi_desc old , new ;
175146 unsigned long flags ;
176147
177- if (!vmx_can_use_vtd_pi (vcpu -> kvm ) ||
178- vmx_interrupt_blocked (vcpu ))
179- return 0 ;
180-
181148 local_irq_save (flags );
182149
183- vcpu -> pre_pcpu = vcpu -> cpu ;
184150 raw_spin_lock (& per_cpu (blocked_vcpu_on_cpu_lock , vcpu -> cpu ));
185151 list_add_tail (& vcpu -> blocked_vcpu_list ,
186152 & per_cpu (blocked_vcpu_on_cpu , vcpu -> cpu ));
187153 raw_spin_unlock (& per_cpu (blocked_vcpu_on_cpu_lock , vcpu -> cpu ));
188154
189- WARN (pi_desc -> sn == 1 ,
190- "Posted Interrupt Suppress Notification set before blocking" );
155+ WARN (pi_desc -> sn , "PI descriptor SN field set before blocking" );
191156
192157 do {
193158 old .control = new .control = READ_ONCE (pi_desc -> control );
@@ -196,24 +161,37 @@ int pi_pre_block(struct kvm_vcpu *vcpu)
196161 new .nv = POSTED_INTR_WAKEUP_VECTOR ;
197162 } while (pi_try_set_control (pi_desc , old .control , new .control ));
198163
199- /* We should not block the vCPU if an interrupt is posted for it. */
200- if (pi_test_on (pi_desc ))
201- __pi_post_block (vcpu );
164+ /*
165+ * Send a wakeup IPI to this CPU if an interrupt may have been posted
166+ * before the notification vector was updated, in which case the IRQ
167+ * will arrive on the non-wakeup vector. An IPI is needed as calling
168+ * try_to_wake_up() from ->sched_out() isn't allowed (IRQs are not
169+ * enabled until it is safe to call try_to_wake_up() on the task being
170+ * scheduled out).
171+ */
172+ if (pi_test_on (& new ))
173+ apic -> send_IPI_self (POSTED_INTR_WAKEUP_VECTOR );
202174
203175 local_irq_restore (flags );
204- return (vcpu -> pre_pcpu == -1 );
205176}
206177
207- void pi_post_block (struct kvm_vcpu * vcpu )
178+ void vmx_vcpu_pi_put (struct kvm_vcpu * vcpu )
208179{
209- unsigned long flags ;
180+ struct pi_desc * pi_desc = vcpu_to_pi_desc ( vcpu ) ;
210181
211- if (vcpu -> pre_pcpu == -1 )
182+ if (! vmx_can_use_vtd_pi ( vcpu -> kvm ) )
212183 return ;
213184
214- local_irq_save (flags );
215- __pi_post_block (vcpu );
216- local_irq_restore (flags );
185+ if (kvm_vcpu_is_blocking (vcpu ) && !vmx_interrupt_blocked (vcpu ))
186+ pi_enable_wakeup_handler (vcpu );
187+
188+ /*
189+ * Set SN when the vCPU is preempted. Note, the vCPU can both be seen
190+ * as blocking and preempted, e.g. if it's preempted between setting
191+ * its wait state and manually scheduling out.
192+ */
193+ if (vcpu -> preempted )
194+ pi_set_sn (pi_desc );
217195}
218196
219197/*
@@ -254,7 +232,7 @@ bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu)
254232 * Bail out of the block loop if the VM has an assigned
255233 * device, but the blocking vCPU didn't reconfigure the
256234 * PI.NV to the wakeup vector, i.e. the assigned device
257- * came along after the initial check in pi_pre_block ().
235+ * came along after the initial check in vmx_vcpu_pi_put ().
258236 */
259237void vmx_pi_start_assignment (struct kvm * kvm )
260238{
0 commit comments