[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [QEMU PATCH 1/1] virtgpu: do not destroy resources when guest suspend


  • To: Robert Beckett <bob.beckett@xxxxxxxxxxxxx>
  • From: "Chen, Jiqian" <Jiqian.Chen@xxxxxxx>
  • Date: Thu, 15 Jun 2023 07:23:23 +0000
  • Accept-language: en-US
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=amd.com; dmarc=pass action=none header.from=amd.com; dkim=pass header.d=amd.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=ZxxLaNYQNm8DqVphkGJwTfNyDob1aRqd8yFgUa3gSwM=; b=CY9KRieAJuQl3SF3tBguyF+hSP0M+LyvswR0jR81C2JlqxC6FxVWciL6T6dnoFUmpP/A7RyFblklwwJZ0UV7mg5be4okPOLiZOvgP/CDN+km6H1h9mkUzeQpIa/lnNk8Zu4abbJZJXHvgiRzUOUVAfxZlqU/zRUH+/S9QnYEDyv+/Z0F9Qj0k4aXQ+UTY5UqBR99FyNh+AmczArDzarV4l7OXO7veKgM7jBygmlAinqrlVGE1dJWpWo64hQNtR4yBEu5qcBxnX74kZljb+e+sMKvIUQu0fG/r9w096pY7aT38GBW2tgFuVJ2RcgdSHnEE9hYwErZ7oKK6RS2laeoAQ==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=D+WpOIDSbQyvy011IaJLJIaxgDPz6KVpYGomFOJsGyirWjdghfQNqCduawLR+KZVi4EcnIm/fJbtAufNIxlnt4orS9GzYP/RnqRxB9eNnd+Vxrfogq3SbUkaVQonQU+76ZHZBD4icvCFokNrqC6E3TTG0x9oXMlXTwem5mSAuLolHxjiu+e2uFItmz0gmtmSvCRv5WWjhy98eaZetzvVeoYTW+PRg/qaQFgvC/Y+BmYTXO1RKwJq6HRymgX65Usyv7ojLuxSF7DnU3ALL4H/XeutrxEfE80rbaIpdxIm6/GRgOfA07M5M/n7THDPVJpeyu76yCMq2k3dJGAFNtYm8A==
  • Authentication-results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=amd.com;
  • Cc: "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>, "qemu-devel@xxxxxxxxxx" <qemu-devel@xxxxxxxxxx>, "Dr . David Alan Gilbert" <dgilbert@xxxxxxxxxx>, Jan Beulich <jbeulich@xxxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>, Anthony PERARD <anthony.perard@xxxxxxxxxx>, Stefano Stabellini <sstabellini@xxxxxxxxxx>, "Michael S . Tsirkin" <mst@xxxxxxxxxx>, Gerd Hoffmann <kraxel@xxxxxxxxxx>, "Deucher, Alexander" <Alexander.Deucher@xxxxxxx>, "Koenig, Christian" <Christian.Koenig@xxxxxxx>, "Hildebrand, Stewart" <Stewart.Hildebrand@xxxxxxx>, Xenia Ragiadakou <burzalodowa@xxxxxxxxx>, "Huang, Honglei1" <Honglei1.Huang@xxxxxxx>, "Zhang, Julia" <Julia.Zhang@xxxxxxx>, "Huang, Ray" <Ray.Huang@xxxxxxx>, "Chen, Jiqian" <Jiqian.Chen@xxxxxxx>
  • Delivery-date: Thu, 15 Jun 2023 07:23:42 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>
  • Thread-index: AQHZmbUjDQZRLEUzRUmYVwd7de4D46+BHLmAgArhWIA=
  • Thread-topic: [QEMU PATCH 1/1] virtgpu: do not destroy resources when guest suspend

On 2023/6/9 00:41, Robert Beckett wrote:
> 
> On 08/06/2023 03:56, Jiqian Chen wrote:
>> After suspending and resuming guest VM, you will get
>> a black screen, and the display can't come back.
>>
>> This is because when guest did suspending, it called
>> into qemu to call virtio_gpu_gl_reset. In function
>> virtio_gpu_gl_reset, it destroyed resources and reset
>> renderer, which were used for display. As a result,
>> guest's screen can't come back to the time when it was
>> suspended and only showed black.
>>
>> So, this patch adds a new ctrl message
>> VIRTIO_GPU_CMD_STATUS_FREEZING to get notification from
>> guest. If guest is during suspending, it sets freezing
>> status of virtgpu to true, this will prevent destroying
>> resources and resetting renderer when guest calls into
>> virtio_gpu_gl_reset. If guest is during resuming, it sets
>> freezing to false, and then virtio_gpu_gl_reset will keep
>> its origin actions and has no other impaction.
>>
>> Signed-off-by: Jiqian Chen <Jiqian.Chen@xxxxxxx>
>> ---
>>   hw/display/virtio-gpu-gl.c                  |  9 ++++++-
>>   hw/display/virtio-gpu-virgl.c               |  3 +++
>>   hw/display/virtio-gpu.c                     | 26 +++++++++++++++++++--
>>   include/hw/virtio/virtio-gpu.h              |  3 +++
>>   include/standard-headers/linux/virtio_gpu.h |  9 +++++++
>>   5 files changed, 47 insertions(+), 3 deletions(-)
>>
>> diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c
>> index e06be60dfb..e11ad233eb 100644
>> --- a/hw/display/virtio-gpu-gl.c
>> +++ b/hw/display/virtio-gpu-gl.c
>> @@ -100,7 +100,14 @@ static void virtio_gpu_gl_reset(VirtIODevice *vdev)
>>        */
>>       if (gl->renderer_inited && !gl->renderer_reset) {
>>           virtio_gpu_virgl_reset_scanout(g);
>> -        gl->renderer_reset = true;
>> +        /*
>> +         * If guest is suspending, we shouldn't reset renderer,
>> +         * otherwise, the display can't come back to the time when
>> +         * it was suspended after guest resumed.
>> +         */
>> +        if (!g->freezing) {
>> +            gl->renderer_reset = true;
>> +        }
>>       }
>>   }
>>   diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c
>> index 73cb92c8d5..183ec92d53 100644
>> --- a/hw/display/virtio-gpu-virgl.c
>> +++ b/hw/display/virtio-gpu-virgl.c
>> @@ -464,6 +464,9 @@ void virtio_gpu_virgl_process_cmd(VirtIOGPU *g,
>>       case VIRTIO_GPU_CMD_GET_EDID:
>>           virtio_gpu_get_edid(g, cmd);
>>           break;
>> +    case VIRTIO_GPU_CMD_STATUS_FREEZING:
>> +        virtio_gpu_cmd_status_freezing(g, cmd);
>> +        break;
>>       default:
>>           cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
>>           break;
>> diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
>> index 5e15c79b94..8f235d7848 100644
>> --- a/hw/display/virtio-gpu.c
>> +++ b/hw/display/virtio-gpu.c
>> @@ -373,6 +373,16 @@ static void virtio_gpu_resource_create_blob(VirtIOGPU 
>> *g,
>>       QTAILQ_INSERT_HEAD(&g->reslist, res, next);
>>   }
>>   +void virtio_gpu_cmd_status_freezing(VirtIOGPU *g,
>> +                         struct virtio_gpu_ctrl_command *cmd)
>> +{
>> +    struct virtio_gpu_status_freezing sf;
>> +
>> +    VIRTIO_GPU_FILL_CMD(sf);
>> +    virtio_gpu_bswap_32(&sf, sizeof(sf));
>> +    g->freezing = sf.freezing;
>> +}
>> +
>>   static void virtio_gpu_disable_scanout(VirtIOGPU *g, int scanout_id)
>>   {
>>       struct virtio_gpu_scanout *scanout = 
>> &g->parent_obj.scanout[scanout_id];
>> @@ -986,6 +996,9 @@ void virtio_gpu_simple_process_cmd(VirtIOGPU *g,
>>       case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING:
>>           virtio_gpu_resource_detach_backing(g, cmd);
>>           break;
>> +    case VIRTIO_GPU_CMD_STATUS_FREEZING:
>> +        virtio_gpu_cmd_status_freezing(g, cmd);
>> +        break;
>>       default:
>>           cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
>>           break;
>> @@ -1344,6 +1357,8 @@ void virtio_gpu_device_realize(DeviceState *qdev, 
>> Error **errp)
>>       QTAILQ_INIT(&g->reslist);
>>       QTAILQ_INIT(&g->cmdq);
>>       QTAILQ_INIT(&g->fenceq);
>> +
>> +    g->freezing = false;
>>   }
>>     void virtio_gpu_reset(VirtIODevice *vdev)
>> @@ -1352,8 +1367,15 @@ void virtio_gpu_reset(VirtIODevice *vdev)
>>       struct virtio_gpu_simple_resource *res, *tmp;
>>       struct virtio_gpu_ctrl_command *cmd;
>>   -    QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
>> -        virtio_gpu_resource_destroy(g, res);
>> +    /*
>> +     * If guest is suspending, we shouldn't destroy resources,
>> +     * otherwise, the display can't come back to the time when
>> +     * it was suspended after guest resumed.
>> +     */
> 
> 
> What should happen if qemu is torn down while the guest is suspended.

I don't know if there is a place to reclaim all memory in Qemu. If not, in the 
case you said, the memory of resources may leak.
But I ran "sudo xl destroy <domain id>" to destroy Qemu without suspending 
guest, and then I found it didn't destroy resources too (By adding printings in 
virtio_gpu_reset, but the function didn't be called when destroying Qemu). So, 
the leak may be an existing problem before using my patch.

> Currently there is no other place where the resources will be destroyed. 
> While it is likely viable to rely on process auto tear down of mem and files 
> from the host cleanup point of view, it feels bad to rely on that.
> Perhaps an inverted conditional with destroy loop in 
> virtio_gpu_device_unrealize() would suffice?

But may the inverted conditional also need to rely on guest?
And I also added printings in virtio_gpu_device_unrealize(), but didn't get 
them when destroying Qemu.
If there is a more suitable place to destroy resources instead of during 
suspending process, the leak and the dependency problem will go.

> 
> I am not a qemu expert, but I am assuming that the unrealize call will be 
> called during machine destruction if the guest is suspended.
> Also if qemu supports (or intends to support in future) hotplugging of the 
> device, the would help avoid leaks until qemu exit too.
> 

I think so too. The hotplug function is beneficial to the leaks. But it seems 
Qemu doesn't support virtgpu hotplug now.
If virtgpu supports hotplug, its call stack may be:
main_destroy
        libxl_domain_destroy
                libxl__device_pci_destroy_all
                        libxl__device_pci_remove_common
                                do_pci_remove
                                        qmp_device_del
                                                hotplug_handler_unplug
                                                        unrealize
> 
>> +    if (!g->freezing) {
>> +        QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
>> +            virtio_gpu_resource_destroy(g, res);
>> +        }
>>       }
>>         while (!QTAILQ_EMPTY(&g->cmdq)) {
>> diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
>> index 2e28507efe..c21c2990fb 100644
>> --- a/include/hw/virtio/virtio-gpu.h
>> +++ b/include/hw/virtio/virtio-gpu.h
>> @@ -173,6 +173,7 @@ struct VirtIOGPU {
>>         uint64_t hostmem;
>>   +    bool freezing;
>>       bool processing_cmdq;
>>       QEMUTimer *fence_poll;
>>       QEMUTimer *print_stats;
>> @@ -284,5 +285,7 @@ void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g);
>>   void virtio_gpu_virgl_reset(VirtIOGPU *g);
>>   int virtio_gpu_virgl_init(VirtIOGPU *g);
>>   int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g);
>> +void virtio_gpu_cmd_status_freezing(VirtIOGPU *g,
>> +                         struct virtio_gpu_ctrl_command *cmd);
>>     #endif
>> diff --git a/include/standard-headers/linux/virtio_gpu.h 
>> b/include/standard-headers/linux/virtio_gpu.h
>> index 2da48d3d4c..aefffbd751 100644
>> --- a/include/standard-headers/linux/virtio_gpu.h
>> +++ b/include/standard-headers/linux/virtio_gpu.h
>> @@ -116,6 +116,9 @@ enum virtio_gpu_ctrl_type {
>>       VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID,
>>       VIRTIO_GPU_RESP_ERR_INVALID_CONTEXT_ID,
>>       VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER,
>> +
>> +    /* status */
>> +    VIRTIO_GPU_CMD_STATUS_FREEZING = 0x1300,
>>   };
>>     enum virtio_gpu_shm_id {
>> @@ -453,4 +456,10 @@ struct virtio_gpu_resource_unmap_blob {
>>       uint32_t padding;
>>   };
>>   +/* VIRTIO_GPU_CMD_STATUS_FREEZING */
>> +struct virtio_gpu_status_freezing {
>> +    struct virtio_gpu_ctrl_hdr hdr;
>> +    __u32 freezing;
>> +};
>> +
>>   #endif

-- 
Best regards,
Jiqian Chen.

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.