# HG changeset patch # User David Scott # Date 1259362818 0 # Node ID f509339c8f7474c96d3ca6532d2d184443440cc5 # Parent 895c6ed73dbc4fa98dd9ea351a887ccbaa2c714b CA-35372: when setting maxmem to cap the allocations of a domain, set the target too. This required rewriting the logic which determines whether domains are active or inactive. Now we say that a domain is 'making progress' if it is moving towards its target. Domains which are 'making progress' are always active. We say that a domain is inactive if (i) it has not reached its target; and (ii) it hasn't been 'making progress' for more than the threshold (currently 5s). Signed-off-by: David Scott diff -r 895c6ed73dbc -r f509339c8f74 ocaml/xenops/squeeze.ml --- a/ocaml/xenops/squeeze.ml Fri Nov 27 23:00:17 2009 +0000 +++ b/ocaml/xenops/squeeze.ml Fri Nov 27 23:00:18 2009 +0000 @@ -188,62 +188,65 @@ *) let assume_balloon_driver_stuck_after = 5. (* seconds *) - type t = { memory_actual_updates: (int, int64 * float) Hashtbl.t; - has_hit_targets: (int, bool) Hashtbl.t } + type per_domain_state = { + mutable last_actual_kib: int64; (** last value of memory actual seen *) + mutable last_makingprogress_time: float; (** last time we saw progress towards the target *) + mutable stuck: bool; + } + type t = { + per_domain: (int, per_domain_state) Hashtbl.t; + } (** Make a monitoring object *) - let make () : t = { memory_actual_updates = Hashtbl.create 10; has_hit_targets = Hashtbl.create 10 } + let make () : t = + { per_domain = Hashtbl.create 10 } (** Update our internal state given a snapshot of the outside world *) - let update (x: t) (state: host) (now: float) = + let update (x: t) (host: host) (now: float) = List.iter - (fun domain -> - let hit_target = has_hit_target domain.inaccuracy_kib domain.memory_actual_kib domain.target_kib in - if not hit_target && Hashtbl.mem x.has_hit_targets domain.domid then begin - debug "domid %d is nolonger on its target; target = %Ld; memory_actual = %Ld" domain.domid domain.target_kib domain.memory_actual_kib; - Hashtbl.remove x.has_hit_targets domain.domid - end; - - let have_useful_update = - (* either I have no information at all *) - if not (Hashtbl.mem x.memory_actual_updates domain.domid) then begin - true - end else if domain.memory_actual_kib <> fst (Hashtbl.find x.memory_actual_updates domain.domid) then begin - (* or the information I have is out of date *) - true - end else if hit_target then begin - (* we assume that if the target has been hit then the domain is still active *) - if not (Hashtbl.mem x.has_hit_targets domain.domid) then begin - if domain.domid <> 0 (* dom0 is boring and sits on its target *) - then debug "domid %d has hit its target; target = %Ld; memory_actual = %Ld" domain.domid domain.target_kib domain.memory_actual_kib; - Hashtbl.replace x.has_hit_targets domain.domid true - end; - true - end else false in - if have_useful_update - then Hashtbl.replace x.memory_actual_updates domain.domid (domain.memory_actual_kib, now) + (fun (domain: domain) -> + let direction = direction_of_actual domain.inaccuracy_kib domain.memory_actual_kib domain.target_kib in + if not(Hashtbl.mem x.per_domain domain.domid) + then Hashtbl.replace x.per_domain domain.domid + (* new domains are considered to be making progress now and not stuck *) + { last_actual_kib = domain.memory_actual_kib; + last_makingprogress_time = now; + stuck = false }; + let state = Hashtbl.find x.per_domain domain.domid in + let delta_actual = domain.memory_actual_kib -* state.last_actual_kib in + state.last_actual_kib <- domain.memory_actual_kib; + (* If memory_actual is moving towards the target then we say we are makingprogress *) + let makingprogress = (delta_actual > 0L && direction = Some Down) || (delta_actual < 0L && direction = Some Up) in + (* We keep track of the last time we were makingprogress. If we are makingprogress now + then we are not stuck. *) + if makingprogress then begin + state.last_makingprogress_time <- now; + state.stuck <- false; + end; + (* If there is a request (ie work to do) and we haven't been makingprogress for more than the + assume_balloon_driver_stuck_after then declare this domain stuck. *) + let request = direction <> None in (* ie target <> actual *) + if request && (now -. state.last_makingprogress_time > assume_balloon_driver_stuck_after) + then state.stuck <- true; ) - state.domains; + host.domains; (* Clear out dead domains just in case someone keeps *) (* one of these things around for a long time. *) - let live_domids = List.map (fun domain -> domain.domid) state.domains in + let live_domids = List.map (fun domain -> domain.domid) host.domains in let to_delete = Hashtbl.fold (fun domid _ acc -> if List.mem domid live_domids then acc else domid :: acc ) - x.memory_actual_updates [] + x.per_domain [] in - List.iter (Hashtbl.remove x.memory_actual_updates) to_delete; - List.iter (Hashtbl.remove x.has_hit_targets) to_delete + List.iter (Hashtbl.remove x.per_domain) to_delete (** Return true if we think a particular driver is still making useful progress. If it is not making progress it may have either hit its target or it may have failed. *) let domid_is_active (x: t) domid (now: float) = - if not (Hashtbl.mem x.memory_actual_updates domid) + if not (Hashtbl.mem x.per_domain domid) then false (* it must have been destroyed *) - else - let _, time = Hashtbl.find x.memory_actual_updates domid in - now -. time <= assume_balloon_driver_stuck_after + else not (Hashtbl.find x.per_domain domid).stuck end type fistpoint = diff -r 895c6ed73dbc -r f509339c8f74 ocaml/xenops/squeeze_test.ml --- a/ocaml/xenops/squeeze_test.ml Fri Nov 27 23:00:17 2009 +0000 +++ b/ocaml/xenops/squeeze_test.ml Fri Nov 27 23:00:18 2009 +0000 @@ -360,9 +360,8 @@ end in let setmaxmem domid kib = - let domain = List.assoc domid domid_to_domain in - debug "setmaxmem domid = %d; kib = %Ld target = %Ld" domid kib (domain#get_domain.target_kib); - domain#set_maxmem kib + debug "setmaxmem domid = %d; kib = %Ld" domid kib; + execute_action { Squeeze.action_domid = domid; new_target_kib = kib } in (* Allow the simulated balloon drivers to change memory_actual_kib *) (* and update host_free_memory accordingly. *) diff -r 895c6ed73dbc -r f509339c8f74 ocaml/xenops/squeeze_xen.ml --- a/ocaml/xenops/squeeze_xen.ml Fri Nov 27 23:00:17 2009 +0000 +++ b/ocaml/xenops/squeeze_xen.ml Fri Nov 27 23:00:18 2009 +0000 @@ -305,7 +305,7 @@ let io ~xc ~xs = { Squeeze.gettimeofday = Unix.gettimeofday; make_host = (fun () -> make_host ~xc ~xs); - domain_setmaxmem = (fun domid kib -> domain_setmaxmem_noexn xc domid kib); + domain_setmaxmem = (fun domid kib -> execute_action ~xc ~xs { Squeeze.action_domid = domid; new_target_kib = kib }); wait = (fun delay -> ignore(Unix.select [] [] [] delay)); execute_action = (fun action -> execute_action ~xc ~xs action); target_host_free_mem_kib = target_host_free_mem_kib;