[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Fwd: [BUG] Windows is frozen after restore from snapshot



# Abstract

After `xl save win win.mem` and then `xl restore win.hvm win.mem`
the Windows 10 VM remain frozen for about a minute. After the
minute it becomes responsive.

During the freeze the OS remains semi-responsive: on `Ctrl+Shift+Esc`
press the wait cursor appears (blue circle indicator).

This is an intermittent fault been reproduced only twice.

# Technical notes

It have been noticed that there were no timer interrupts during
the freeze.

zaytsevgu@xxxxxxxxx has debugged the received Xen state file and
noticed that the flag HPET_TN_PERIODIC been set after unfreeze.

Based on that he provided two Python scripts: one to check the
value and one to patch it.

Both "broken" state files we have been detected and patched
successfully.

# Other information

## Target machine

```bash
$ uname -a
Linux localhost 5.4.0-66-generic #74~18.04.2-Ubuntu SMP
Fri Feb 5 11:17:31 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux
```

## Xen version

Build from source on tag RELEASE-4.12.4

## OS version

* Windows 10 build 1803 x64
* Hibernation, sleep and other disabled with powershell commands:
```
powercfg /hibernate off
powercfg /change standby-timeout-ac 0
powercfg /change standby-timeout-dc 0
powercfg /change monitor-timeout-ac 0
powercfg /change monitor-timeout-dc 0
powercfg /change disk-timeout-ac 0
powercfg /change disk-timeout-dc 0
```

## Configuration file

Build with envsubst from template:

```
name = "$VM_NAME"
type = "hvm"

vcpus = 2
maxvcpus = 2

memory = 2048
maxmem = 2048

on_poweroff = "destroy"
on_reboot = "destroy"
on_watchdog = "destroy"
on_crash = "destroy"
on_soft_reset = "soft-reset"

nomigrate = 1

disk = [ "format=qcow2, vdev=hda, target=$VM_DISK_IMAGE_PATH" ]

vif = [ "type=ioemu, model=e1000" ]

hdtype = "ahci"

shadow_memory = 16

altp2m = "external"

viridian = [ "defaults" ]

videoram = 128
vga = "stdvga"

vnc = 1
vncunused = 1

soundhw = "hda"

usb = 1
usbdevice = [ "usb-tablet" ]
```

## Check script

The script has been provided by zaytsevgu@xxxxxxxxx
(with little refactoring).

It checks that image is broken.

```python
#!/usr/bin/env python3


import logging
from pathlib import Path
import sys
import struct


def check_snapshot_hpet(snapshot: Path) -> bool:
    def get_b32(file):
        data = file.read(4)
        return struct.unpack('>L', data)[0]

    def get_l32(file):
        data = file.read(4)
        return struct.unpack('<L', data)[0]

    def get_l64(file):
        data = file.read(8)
        return struct.unpack('<Q', data)[0]

    def get_hpet_loc_by_tag9(file):
        while True:
            tag = get_l32(file)
            tlen = get_l32(file)
            if tag == 12:
                break
            file.seek(tlen, 1)
        _ = get_l64(file) # caps
        _ = [get_l64(file) for i in range(31)]
        timer1_conf = get_l64(file)
        # Basic check
        if timer1_conf & 0xff == 0x34:
            return file.tell() - 8
        return None

    def get_hpet(file):
        _ = get_l32(file)  # x1
        _ = get_l32(file)  # x2
        hdr = file.read(4)
        if hdr != b'XENF':
            return None
        _ = get_b32(file)  # version
        get_b32(file)
        get_b32(file)
        _ = get_l32(file)  # dmt
        _ = get_l32(file)  # page_shift
        _ = get_l32(file)  # xmj
        _ = get_l32(file)  # xmn

        while True:
            tag_type = get_l32(file)
            rlen = get_l32(file)
            if tag_type == 9:
                break
            else:
                file.seek(rlen, 1)
        return get_hpet_loc_by_tag9(file)

    original = open(snapshot, 'rb')

    header = original.read(0x1000)
    xl_offset = header.index(b'LibxlFmt')
    original.seek(xl_offset)
    magic = original.read(8)
    if magic != b'LibxlFmt':
        logging.error('Invalid snapshot format')
        raise RuntimeError

    _ = get_b32(original)  # version
    _ = get_b32(original)  # options
    record_type = get_l32(original)
    _ = get_l32(original)  # blen
    if record_type != 1:
        logging.error('Invalid snapshot record type')
        raise RuntimeError
    hpet_flag_byte_offset = get_hpet(original)
    if hpet_flag_byte_offset is not None:
        original.close()
        return False
    else:
        original.close()
        return True


if check_snapshot_hpet(sys.argv[1]):
    print('The image is good! :)')
    sys.exit(0)
else:
    print('The image is so bad... :(')
    sys.exit(1)
```

The image could be fixed with a little addition:
```python
hpet_new = hpet[0] ^ 0x8
```
, on `hpet_flag_byte_offset`

## Patch script

```python
import sys
import struct
import io

def get_b32(file):
    data = file.read(4)
    return struct.unpack(">L", data)[0]

def get_l32(file):
    data = file.read(4)
    return struct.unpack("<L", data)[0]

def get_l64(file):
    data = file.read(8)
    return struct.unpack("<Q", data)[0]


def get_hpet_loc_by_tag9(file, rlen):
    while True:
        tag = get_l32(file)
        tlen = get_l32(file)
        if tag == 12:
            break
        file.seek(tlen, 1)
    caps = get_l64(file)
    [get_l64(file) for i in range(31)]
    timer1_conf = get_l64(file)
    print(hex(timer1_conf))
    if timer1_conf & 0xff == 0x34: #VERY DUMMY CHECK
        return file.tell() - 8
    return None

def get_hpet(file):
    x1 = get_l32(file)
    x2 = get_l32(file)
    hdr = file.read(4)
    # print(hdr)
    if hdr != b"XENF":
        return None
    version = get_b32(file)
    get_b32(file)
    get_b32(file)
    dmt = get_l32(file)
    page_shift = get_l32(file)
    xmj = get_l32(file)
    xmn = get_l32(file)

    while True:
        tag_type = get_l32(file)
        # print(tag_type)
        rlen = get_l32(file)
        if tag_type == 9:
            break
        else:
            file.seek(rlen, 1)
    print("Found tag 9!")
    return get_hpet_loc_by_tag9(file, rlen)


original = open(sys.argv[1], "rb")
new = open(sys.argv[1]+".hpet_enable_periodic", "wb")

header = original.read(0x1000)
xl_offset = header.index(b"LibxlFmt")
print("Found offset to xl data: {:x}".format(xl_offset))
original.seek(xl_offset)
magic = original.read(8)
if magic != b"LibxlFmt":
    print("ERROR INVALID FORMAT")
else:
    version = get_b32(original)
    options = get_b32(original)
    record_type = get_l32(original)
    blen = get_l32(original)
    # print(record_type, blen)
    if record_type != 1:
        0/0
    hpet_flag_byte_offset = get_hpet(original)
    if hpet_flag_byte_offset != None:
        print("Got hpet timer flag!")
        file_size = 0
        original.seek(0, 2)
        file_size = original.tell()
        original.seek(0,0)
        pos = 0
        block_size = 4*1024*1024
        print(hex(hpet_flag_byte_offset))
        while pos != hpet_flag_byte_offset:
            if hpet_flag_byte_offset - pos < block_size:
                block_size = hpet_flag_byte_offset - pos
            data = original.read(block_size)
            new.write(data)
            pos += block_size
        hpet = original.read(8)
        # print(hpet)
        hpet_new = hpet[0] ^ 0x8
        # print(hpet_new)
        new.write(bytes((hpet_new,)))
        new.write(hpet[1:])
        pos = pos + 8
        block_size = 4*1024*1024
        while pos != file_size:
            if file_size - pos < block_size:
                block_size = file_size - pos
            data = original.read(block_size)
            new.write(data)
            pos += block_size
    else:
        print("can't find")
original.close()
new.close()
```

--
With best regards,
Sergey Kovalev




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.