[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] RE: XCP: Patch to XenAPI stack to allow domain resume, HAP control and a few other features.
Hi Roger, Thanks for your patch. Two initial minor requests: 1. Could you add your comments to the patch metadata itself? 2. Also, could you add a âSigned-off-by: ââ line? Iâll do a more thorough review tomorrow â Cheers, Dave From: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx [mailto:xen-devel-bounces@xxxxxxxxxxxxxxxxxxx] On Behalf Of Roger Cruz Sent: 18 November 2009 20:00 To: xen-devel@xxxxxxxxxxxxxxxxxxx Subject: [Xen-devel] XCP: Patch to XenAPI stack to allow domain resume, HAP control and a few other features. These changes incorporate several features that allow a domain to be resumed, control HAP on a per VM basis, donât force restart of some PIFs when XAPI is restarted, allows VDIs to be shared across VMs under limited circumstances, changes the periodic pool mod event to an âsmâ event, etc. Enjoy. Roger Cruz Marathon Technologies Corp # HG changeset patch # User root@xxxxxxxxxxxxxxxxxxxxx # Node ID 80df1bc7ce62a3fd8c6eab7d3493a107d946ae79 # Parent 3e8c0167940d249deeca80f93759bd1adf2c0a45 Merge Citrix MNR stream with open source diff -r 3e8c0167940d -r 80df1bc7ce62 ocaml/xapi/dbsync_slave.ml --- a/ocaml/xapi/dbsync_slave.ml Wed Nov 11 16:55:51 2009 +0000 +++ b/ocaml/xapi/dbsync_slave.ml Wed Nov 18 14:49:01 2009 -0500 @@ -613,7 +613,8 @@ let resynchronise_pif_currently_attached let is_management_pif = Xapi_pif.is_my_management_pif ~__context ~self in let was_pif_brought_up_at_start_of_day = List.mem self (List.map fst pifs_brought_up) in (* Mark important interfaces as attached *) - let mark_as_attached = is_management_pif || was_pif_brought_up_at_start_of_day in + let mark_as_attached = is_management_pif || was_pif_brought_up_at_start_of_day || + (Mtc.is_pif_attached_to_mtc_vms_and_should_not_be_offline ~__context ~self) in Db.PIF.set_currently_attached ~__context ~self ~value:mark_as_attached; Db.PIF.set_management ~__context ~self ~value:is_management_pif; debug "Marking PIF device %s as %s" (Db.PIF.get_device ~__context ~self) (if mark_as_attached then "attached" else "offline") diff -r 3e8c0167940d -r 80df1bc7ce62 ocaml/xapi/message_forwarding.ml --- a/ocaml/xapi/message_forwarding.ml Wed Nov 11 16:55:51 2009 +0000 +++ b/ocaml/xapi/message_forwarding.ml Wed Nov 18 14:49:01 2009 -0500 @@ -2693,8 +2693,10 @@ end (* -------------------------------------------------------------------------- *) let set_sharable ~__context ~self ~value = + if not (Mtc.is_vdi_accessed_by_protected_VM ~__context ~vdi:self) then begin let sr = Db.VDI.get_SR ~__context ~self in Sm.assert_session_has_internal_sr_access ~__context ~sr; + end; Local.VDI.set_sharable ~__context ~self ~value let set_managed ~__context ~self ~value = diff -r 3e8c0167940d -r 80df1bc7ce62 ocaml/xapi/mtc.ml --- a/ocaml/xapi/mtc.ml Wed Nov 11 16:55:51 2009 +0000 +++ b/ocaml/xapi/mtc.ml Wed Nov 18 14:49:01 2009 -0500 @@ -52,6 +52,8 @@ open DD *) let vm_protected_key = "vm_protected" let vm_peer_uuid_key = "vm_peer_uuid" +let mtc_pvm_key = "mtc_pvm" +let mtc_vdi_share_key = "mtc_vdi_shareable" (* * This function looks at the 'other-config' field in the VM's configuration @@ -240,7 +242,6 @@ let event_notify_task_status ~__context the foreground phase (ie, domain has been suspended). Called only by the source of a migration. *) let event_notify_entering_suspend ~__context ~self = - if (is_this_vm_protected ~__context ~self) then ( with_xc_and_xs (fun xc xs -> let key = (migration_base_path ~xs ~__context ~vm:self) ^ @@ -248,13 +249,11 @@ let event_notify_entering_suspend ~__con debug "Entering suspend. Key: %s" key; xs.Xs.write key "1"; ) - ) (* A blocking wait. Wait for external party to acknowlege the suspend stage has been entered. If no response is received within timeout, routine simply returns `ACKED to simulate a response. *) let event_wait_entering_suspend_acked ?timeout ~__context ~self = - if (is_this_vm_protected ~__context ~self) then ( with_xc_and_xs (fun xc xs -> let ack_key = (migration_base_path ~xs ~__context ~vm:self) ^ @@ -287,7 +286,6 @@ let event_wait_entering_suspend_acked ?t debug "Timed-out waiting for suspend ack on key: %s" ack_key; `TIMED_OUT ) - ) else `ACKED (* Check to see if an abort request has been made through XenStore *) let event_check_for_abort_req ~__context ~self = @@ -304,6 +302,77 @@ let event_check_for_abort_req ~__context value = "1" ) ) else false + + + +(* + * ----------------------------------------------------------------------------- + * Network Functions + * ----------------------------------------------------------------------------- + *) +(* Determine if we should allow the specified PIF to be marked not online when XAPI is + * restarted. Returns TRUE if this is a PIF fielding a VIF attached to an MTC- + * protected VM and we don't want it marked offline because we have checked here that the + * PIF and its bridge are already up. + *) +let is_pif_attached_to_mtc_vms_and_should_not_be_offline ~__context ~self = + try + + (* Get the VMs that are hooked up to this PIF *) + let network = Db.PIF.get_network ~__context ~self in + let vifs = Db.Network.get_VIFs ~__context ~self:network in + + + (* Figure out the VIFs attached to local MTC VMs and then derive their networks, bridges and PIFs *) + let vms = List.map (fun vif -> + Db.VIF.get_VM ~__context ~self:vif) + vifs in + let localhost = Helpers.get_localhost ~__context in + let resident_vms = List.filter (fun vm -> + localhost = (Db.VM.get_resident_on ~__context ~self:vm)) + vms in + let protected_vms = List.filter (fun vm -> + List.mem_assoc mtc_pvm_key (Db.VM.get_other_config ~__context ~self:vm)) + resident_vms in + + let protected_vms_uuid = List.map (fun vm -> + Db.VM.get_uuid ~__context ~self:vm) + protected_vms in + + + (* If we have protected VMs using this PIF, then decide whether it should be marked offline *) + if protected_vms <> [] then begin + let current = Netdev.Bridge.list () in + let bridge = Db.Network.get_bridge ~__context ~self:network in + let nic = Db.PIF.get_device ~__context ~self in + debug "The following MTC VMs are using %s for PIF %s: [%s]" + nic + (Db.PIF.get_uuid ~__context ~self) + (String.concat "; " protected_vms_uuid); + + let nic_device_path = Printf.sprintf "/sys/class/net/%s/operstate" nic in + let nic_device_state = Netdev.Internal.read_one_line nic_device_path in + + let bridge_device_path = Printf.sprintf "/sys/class/net/%s/operstate" bridge in + let bridge_device_state = Netdev.Internal.read_one_line bridge_device_path in + + (* The PIF should be marked online if: + 1) its network has a bridge created in dom0 and + 2) the bridge link is up and + 3) the physical NIC is up and + 4) the bridge operational state is up (unknown is also up). + *) + let mark_online = (List.mem bridge current) && + (Netdev.Link.is_up bridge) && + nic_device_state = "up" && + (bridge_device_state = "up" || + bridge_device_state = "unknown") in + + debug "Its current operational state is %s. Therefore we'll be marking it as %s" + nic_device_state (if mark_online then "online" else "offline"); + mark_online + end else false + with _ -> false (* * ----------------------------------------------------------------------------- @@ -328,3 +397,22 @@ let update_vm_state_if_necessary ~__cont raise e end +(* Raises an exception if the destination VM is not in the expected power state: halted *) +let verify_dest_vm_power_state ~__context ~vm = + let actual = Db.VM.get_power_state ~__context ~self:vm in + if actual != `Halted then + raise(Api_errors.Server_error(Api_errors.vm_bad_power_state, [Ref.string_of vm; "halted"; (Record_util.power_to_string actual)])) + +(* Returns true if VDI is accessed by an MTC-protected VM *) +let is_vdi_accessed_by_protected_VM ~__context ~vdi = + + let uuid = Uuid.of_string (Db.VDI.get_uuid ~__context ~self:vdi) in + + let protected_vdi = List.mem_assoc mtc_vdi_share_key (Db.VDI.get_other_config ~__context ~self:vdi) in + + (* Return TRUE if this VDI is attached to a protected VM *) + if protected_vdi then begin + debug "VDI %s is attached to a Marathon-protected VM" (Uuid.to_string uuid); + true + end else + false diff -r 3e8c0167940d -r 80df1bc7ce62 ocaml/xapi/storage_access.ml --- a/ocaml/xapi/storage_access.ml Wed Nov 11 16:55:51 2009 +0000 +++ b/ocaml/xapi/storage_access.ml Wed Nov 18 14:49:01 2009 -0500 @@ -183,7 +183,9 @@ module VDI = let uuid = Uuid.of_string (Db.VDI.get_uuid ~__context ~self) in with_vdi_lock (fun () -> - if is_already_attached uuid && (mode <> get_mode uuid) then + (* MTC: A protected VM needs to have its disks mounted into two VMs: one as R+W and another as RO *) + if is_already_attached uuid && (mode <> get_mode uuid) && + not (Mtc.is_vdi_accessed_by_protected_VM ~__context ~vdi:self) then failwith (Printf.sprintf "The VDI %s is already attached in %s mode; it can't be attached in %s mode!" (Uuid.to_string uuid) (string_of_mode (get_mode uuid)) (string_of_mode mode)); let attach_path = Sm.call_sm_vdi_functions ~__context ~vdi:self diff -r 3e8c0167940d -r 80df1bc7ce62 ocaml/xapi/vmops.ml --- a/ocaml/xapi/vmops.ml Wed Nov 11 16:55:51 2009 +0000 +++ b/ocaml/xapi/vmops.ml Wed Nov 18 14:49:01 2009 -0500 @@ -362,11 +362,26 @@ let create ~__context ~xc ~xs ~self (sna Xapi_xenops_errors.handle_xenops_error (fun () -> info "Memory free = %Ld; scrub = %Ld" (Memory.get_free_memory_kib ~xc) (Memory.get_scrub_memory_kib ~xc); + let hap = if hvm then ( + if (List.mem_assoc "hap" platformdata) then ( + if (List.assoc "hap" platformdata) = "false" then ( + debug "HAP will be disabled for VM %s." (Uuid.to_string uuid); + false + ) else if (List.assoc "hap" platformdata) = "true" then ( + debug "HAP will be enabled for VM %s." (Uuid.to_string uuid); + true + ) else ( + debug "Unrecognized HAP platform value. Assuming default settings for VM %s." (Uuid.to_string uuid); + true + ) + ) else + true + ) else false in let domid = (try let info = { Domain.ssidref = 0l; Domain.hvm = hvm; - Domain.hap = hvm; + Domain.hap = hap; Domain.name = snapshot.API.vM_name_label; Domain.platformdata = platformdata; Domain.xsdata = xsdata; @@ -793,7 +808,7 @@ exception Domain_shutdown_for_wrong_reas exception Domain_shutdown_for_wrong_reason of Xal.died_reason (** Tells a VM to shutdown with a specific reason (reboot/halt/poweroff). *) -let clean_shutdown_with_reason ?(at = fun _ -> ()) ~xal ~__context ~self domid reason = +let clean_shutdown_with_reason ?(at = fun _ -> ()) ~xal ~__context ~self ?(rel_timeout = 5.) domid reason = (* Set the task allowed_operations to include cancel *) if reason <> Domain.Suspend then TaskHelper.set_cancellable ~__context; @@ -818,7 +833,7 @@ let clean_shutdown_with_reason ?(at = fu let finished = ref false in while (Unix.gettimeofday () -. start < total_timeout) && not(!finished) do try - let r = Xal.wait_release xal ~timeout:5. domid in + let r = Xal.wait_release xal ~timeout:rel_timeout domid in if not (match_xal_and_shutdown r reason) then begin let errmsg = Printf.sprintf "Domain died with reason: %s when it should have been %s" diff -r 3e8c0167940d -r 80df1bc7ce62 ocaml/xapi/xapi_event.ml --- a/ocaml/xapi/xapi_event.ml Wed Nov 11 16:55:51 2009 +0000 +++ b/ocaml/xapi/xapi_event.ml Wed Nov 18 14:49:01 2009 -0500 @@ -282,10 +282,10 @@ let heartbeat ~__context = (fun () -> (* We must hold the database lock since we are sending an update for a real object and we don't want to accidentally transmit an older snapshot. *) - let pool = Helpers.get_pool ~__context in - let pool_r = Db.Pool.get_record ~__context ~self:pool in - let pool_xml = API.To.pool_t pool_r in - event_add ~snapshot:pool_xml "pool" "mod" (Ref.string_of pool) + let sm = List.hd (Db.SM.get_all ~__context) in + let sm_r = Db.SM.get_record ~__context ~self:sm in + let sm_xml = API.To.sM_t sm_r in + event_add ~snapshot:sm_xml "SM" "mod" (Ref.string_of sm) ) with e -> error "Caught exception sending event heartbeat: %s" (ExnHelper.string_of_exn e) diff -r 3e8c0167940d -r 80df1bc7ce62 ocaml/xapi/xapi_vm_migrate.ml --- a/ocaml/xapi/xapi_vm_migrate.ml Wed Nov 11 16:55:51 2009 +0000 +++ b/ocaml/xapi/xapi_vm_migrate.ml Wed Nov 18 14:49:01 2009 -0500 @@ -105,6 +105,10 @@ let extra_debug_paths __context vm = (* MTC: Routine to report migration progress via task and events *) let migration_progress_cb ~__context vm_migrate_failed ~vm progress = + + if TaskHelper.is_cancelling ~__context + then raise (Api_errors.Server_error(Api_errors.task_cancelled, [ Ref.string_of (Context.get_task_id __context) ])); + TaskHelper.set_progress ~__context progress; Mtc.event_notify_task_status ~__context ~vm ~status:`pending progress; if Mtc.event_check_for_abort_req ~__context ~self:vm then @@ -115,17 +119,23 @@ let migration_progress_cb ~__context vm_ requires that an external agent acknowledge the transition prior to continuing. *) let migration_suspend_cb ~xal ~xc ~xs ~__context vm_migrate_failed ~self domid reason = - Mtc.event_notify_entering_suspend ~__context ~self; - - let ack = Mtc.event_wait_entering_suspend_acked ~timeout:60. ~__context ~self in - - (* If we got the ack, then proceed to shutdown the domain with the suspend - reason. If we failed to get the ack, then raise an exception to abort - the migration *) - if (ack = `ACKED) then - Vmops.clean_shutdown_with_reason ~xal ~__context ~self domid Domain.Suspend - else - vm_migrate_failed "Failed to receive suspend acknowledgement within timeout period or an abort was requested." + + if TaskHelper.is_cancelling ~__context + then raise (Api_errors.Server_error(Api_errors.task_cancelled, [ Ref.string_of (Context.get_task_id __context) ])); + + if (Mtc.is_this_vm_protected ~__context ~self) then ( + Mtc.event_notify_entering_suspend ~__context ~self; + let ack = Mtc.event_wait_entering_suspend_acked ~timeout:60. ~__context ~self in + + (* If we got the ack, then proceed to shutdown the domain with the suspend + reason. If we failed to get the ack, then raise an exception to abort + the migration *) + if (ack = `ACKED) then + Vmops.clean_shutdown_with_reason ~xal ~__context ~self ~rel_timeout:0.25 domid Domain.Suspend + else + vm_migrate_failed "Failed to receive suspend acknowledgement within timeout period or an abort was requested." + ) else + Vmops.clean_shutdown_with_reason ~xal ~__context ~self domid Domain.Suspend (* ------------------------------------------------------------------- *) (* Part 2: transmitter and receiver functions *) @@ -239,6 +249,12 @@ let transmitter ~xal ~__context is_local memory image has been transmitted. We assume that we cannot recover this domain and that it must be destroyed. We must make sure we detect failure in the remote to complete the admin and set the VM to halted if this happens. *) + + (* Recover an MTC VM if abort was requested during the suspended phase *) + if Mtc.event_check_for_abort_req ~__context ~self:vm then ( + vm_migrate_failed "An external abort event was detected during the VM suspend phase."; + ); + Stats.time_this "VM migration downtime" (fun () -> (* Depending on where the exn in the try block happens, we may or may not want to deactivate VDIs in the finally clause. In the case of a non-localhost migration @@ -288,26 +304,38 @@ let transmitter ~xal ~__context is_local detach_in_finally_clause := false; end; - - (* Now send across the RRD *) - (try Monitor_rrds.migrate_push ~__context (Db.VM.get_uuid ~__context ~self:vm) host with e -> - debug "Caught exception while trying to push rrds: %s" (ExnHelper.string_of_exn e); - log_backtrace ()); + (* MTC: don't send RRDs since MTC VMs are not really migrated. *) + if not (Mtc.is_this_vm_protected ~__context ~self:vm) then ( + (* Now send across the RRD *) + (try Monitor_rrds.migrate_push ~__context (Db.VM.get_uuid ~__context ~self:vm) host with e -> + debug "Caught exception while trying to push rrds: %s" (ExnHelper.string_of_exn e); + log_backtrace ()); + ); (* We mustn't return to our caller (and release locks) until the remote confirms that it has reparented the VM by setting resident-on, domid *) debug "Sender 7. waiting for all-clear from remote"; (* <-- [4] Synchronisation point *) - Handshake.recv_success fd + Handshake.recv_success fd; + if Mtc.is_this_vm_protected ~__context ~self:vm then ( + let hvm = Helpers.has_booted_hvm ~__context ~self:vm in + debug "Sender 7a. resuming source domain"; + Domain.resume ~xc ~xs ~hvm ~cooperative:true domid + ); with e -> (* This should only happen if the receiver has died *) let msg = Printf.sprintf "Caught exception %s at last minute during migration" (ExnHelper.string_of_exn e) in debug "%s" msg; error "%s" msg; - Xapi_vm_lifecycle.force_state_reset ~__context ~self:vm ~value:`Halted; + (* MTC: don't reset state upon failure. MTC VMs will simply resume *) + if not (Mtc.is_this_vm_protected ~__context ~self:vm) then + Xapi_vm_lifecycle.force_state_reset ~__context ~self:vm ~value:`Halted; vm_migrate_failed msg ) (fun () -> + if Mtc.is_this_vm_protected ~__context ~self:vm then ( + debug "MTC: Sender won't clean up by destroying remains of local domain"; + ) else ( debug "Sender cleaning up by destroying remains of local domain"; if !deactivate_in_finally_clause then List.iter (fun vdi -> Storage_access.VDI.deactivate ~__context ~self:vdi) vdis; @@ -316,6 +344,7 @@ let transmitter ~xal ~__context is_local let preserve_xs_vm = (Helpers.get_localhost ~__context = host) in Vmops.destroy_domain ~preserve_xs_vm ~clear_currently_attached:false ~detach_devices:(not is_localhost_migration) ~deactivate_devices:(!deactivate_in_finally_clause) ~__context ~xc ~xs ~self:vm domid) + ) ) (* Stats.timethis *) with (* If the domain shuts down incorrectly, rely on the event thread for tidying up *) @@ -554,6 +583,9 @@ let pool_migrate_nolock ~__context ~vm (fun () -> Unixext.set_tcp_nodelay insecure_fd true; + (* Set the task allowed_operations to include cancel *) + TaskHelper.set_cancellable ~__context; + let secure_rpc = Helpers.make_rpc ~__context in debug "Sender 1. Logging into remote server"; let session_id = Client.Session.slave_login ~rpc:secure_rpc ~host @@ -621,6 +653,13 @@ let pool_migrate_nolock ~__context ~vm host session_id vm xc xs live); with e -> debug "Sender Caught exception: %s" (ExnHelper.string_of_exn e); + with_xc_and_xs (fun xc xs -> + if Mtc.is_this_vm_protected ~__context ~self:vm then ( + debug "MTC: exception encountered. Resuming source domain"; + let domid = Int64.to_int (Db.VM.get_domid ~__context ~self:vm) in + let hvm = Helpers.has_booted_hvm ~__context ~self:vm in + Domain.resume ~xc ~xs ~hvm ~cooperative:true domid + )); (* NB the domain might now be in a crashed state: rely on the event thread to do the cleanup asynchronously. *) raise_api_error e @@ -784,6 +823,10 @@ let handler req fd = debug "Receiver 2. checking we have enough free memory"; with_xc_and_xs (fun xc xs -> + (* MTC-3009: The dest VM of a Marathon protected VM MUST be in halted state. *) + if Mtc.is_this_vm_protected ~__context ~self:dest_vm then ( + Mtc.verify_dest_vm_power_state ~__context ~vm:dest_vm + ); (* XXX: on early failure consider calling TaskHelper.failed? *) (* Vmops.with_enough_memory ~__context ~xc ~xs ~memory_required_kib diff -r 3e8c0167940d -r 80df1bc7ce62 ocaml/xenguest/xenguest_main.ml --- a/ocaml/xenguest/xenguest_main.ml Wed Nov 11 16:55:51 2009 +0000 +++ b/ocaml/xenguest/xenguest_main.ml Wed Nov 18 14:49:01 2009 -0500 @@ -154,6 +154,8 @@ let fork_capture_stdout_stderr callback exit 0 with _ -> exit 0 end; + + Sys.set_signal Sys.sigterm (Sys.Signal_handle (fun i -> debug "Signal handler killing PID=%d" pid; Unix.kill pid Sys.sigterm)); List.iter Unix.close [ stdout_w; stderr_w; output_w ]; let finished = ref false in diff -r 3e8c0167940d -r 80df1bc7ce62 ocaml/xenops/device.ml --- a/ocaml/xenops/device.ml Wed Nov 11 16:55:51 2009 +0000 +++ b/ocaml/xenops/device.ml Wed Nov 18 14:49:01 2009 -0500 @@ -1398,7 +1398,10 @@ let signal ~xs ~domid ?wait_for ?param c match wait_for with | Some state -> let pw = cmdpath ^ "/state" in - Watch.wait_for ~xs (Watch.value_to_become pw state) + (* MTC: The default timeout for this operation was 20mins, which is + * way too long for someone to wait. + *) + Watch.wait_for ~xs ~timeout:30. (Watch.value_to_become pw state) | None -> () let get_state ~xs domid = _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |