[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-API] [PATCH] silence a spurious alert



# HG changeset patch
# User David Scott <dave.scott@xxxxxxxxxxxxx>
# Date 1270118619 -3600
# Node ID 453a340d8fb0ebdf82af49b03dabd4ad0236fdab
# Parent  419d6473c6afec9cf87b2d8554ef5fb641294026
CA-34993: If a blob sync fails then log this as an 'error' to syslog but do not 
generate an alert since this isn't an earth-shattering failure.

For diagnostic purposes we store the last successful blob sync time in the 
Host.other_config.

Also remove an unused post-blob-sync plugin call.

Signed-off-by: David Scott <dave.scott@xxxxxxxxxxxxx>

diff -r 419d6473c6af -r 453a340d8fb0 ocaml/idl/api_messages.ml
--- a/ocaml/idl/api_messages.ml Tue Mar 30 14:22:11 2010 +0100
+++ b/ocaml/idl/api_messages.ml Thu Apr 01 11:43:39 2010 +0100
@@ -59,8 +59,6 @@
 let vm_crashed = addMessage "VM_CRASHED"
 let vm_cloned = addMessage "VM_CLONED"
 
-let host_sync_data_failed = addMessage "HOST_SYNC_DATA_FAILED"
-
 let host_clock_skew_detected = addMessage "HOST_CLOCK_SKEW_DETECTED"
 let host_clock_skew_detected_priority = 10L
 let host_clock_went_backwards = addMessage "HOST_CLOCK_WENT_BACKWARDS"
diff -r 419d6473c6af -r 453a340d8fb0 ocaml/xapi/xapi_globs.ml
--- a/ocaml/xapi/xapi_globs.ml  Tue Mar 30 14:22:11 2010 +0100
+++ b/ocaml/xapi/xapi_globs.ml  Thu Apr 01 11:43:39 2010 +0100
@@ -386,6 +386,8 @@
 
 let xapi_blob_location = "/var/xapi/blobs"
 
+let last_blob_sync_time = "last_blob_sync_time"
+
 (* Port on which to send network heartbeats *)
 let xha_udp_port = 694 (* same as linux-ha *)
 
diff -r 419d6473c6af -r 453a340d8fb0 ocaml/xapi/xapi_sync.ml
--- a/ocaml/xapi/xapi_sync.ml   Tue Mar 30 14:22:11 2010 +0100
+++ b/ocaml/xapi/xapi_sync.ml   Thu Apr 01 11:43:39 2010 +0100
@@ -20,15 +20,6 @@
 
 let sync_lock = Mutex.create ()
 
-let post_sync_hook __context host =
-  Helpers.call_api_functions ~__context (fun rpc session_id ->
-    try
-      let result = Client.Client.Host.call_plugin rpc session_id host 
"post-blob-sync" "run" [] in
-      debug "Result of sync: '%s'" result
-    with e ->
-      warn "Post sync hook failed: exception: %s" (ExnHelper.string_of_exn e)
-  )
-
 let sync_host ~__context host =
   Mutex.execute sync_lock (fun () ->
     try
@@ -44,24 +35,19 @@
        let env = Unix.environment () in
        let output,log = Forkhelpers.execute_command_get_output ~env 
"/usr/bin/rsync" 
["--delete";"-avz";localpath;remotepath;"-e";"/opt/xensource/bin/xsh"] in
        debug "sync output: '%s' log: '%s'" output log;
-       post_sync_hook __context host
+       (* Store the last blob sync time in the Host.other_config *)
+       (try Db.Host.remove_from_other_config ~__context ~self:host 
~key:Xapi_globs.last_blob_sync_time with _ -> ());
+       Db.Host.add_to_other_config ~__context ~self:host 
~key:Xapi_globs.last_blob_sync_time ~value:(string_of_float (Unix.gettimeofday 
()));
       end else begin
        debug "Ignoring host synchronise: localhost=%b host_has_storage=%b" 
localhost host_has_storage
       end; 
-      if host_has_storage && localhost then post_sync_hook __context host
     with Forkhelpers.Spawn_internal_error(log,output,status) ->
-      error "Error in rsyncing: log='%s' output='%s'" log output;
-      (* CA-20574: Supress the alert if we're in rolling upgrade mode -- we 
expect this to fail during rolling upgrade and we don't want
-        the user to see a scary error message *)
-      if not (Helpers.rolling_upgrade_in_progress ~__context) then
-       begin
-         let uuid = Db.Host.get_uuid ~__context ~self:host in
-         let name = Db.Host.get_name_label ~__context ~self:host in
-         ignore(Xapi_message.create ~__context 
~name:Api_messages.host_sync_data_failed ~priority:2L ~cls:`Host ~obj_uuid:uuid
-                  ~body:(Printf.sprintf "Failed to synchonise data with host 
'%s'. Rsync reported '%s'" name log))
-       end
-      else
-       debug "Not generating HOST_SYNC_DATA_FAILED_ALERT because we are in 
rolling upgrade mode"
+               (* Do we think the host is supposed to be online? *)
+               let online = try let m = Db.Host.get_metrics ~__context 
~self:host in Db.Host_metrics.get_live ~__context ~self:m with _ -> false in
+               (* In rolling upgrade mode we would also expect a failure *)
+               let rolling_upgrade = Helpers.rolling_upgrade_in_progress 
~__context in
+               if online && not rolling_upgrade 
+               then error "Unexpected failure synchronising blobs to host %s; 
log='%s'; output='%s'" (Ref.string_of host) log output;
   )
 
 let do_sync () =
3 files changed, 11 insertions(+), 25 deletions(-)
ocaml/idl/api_messages.ml |    2 --
ocaml/xapi/xapi_globs.ml  |    2 ++
ocaml/xapi/xapi_sync.ml   |   32 +++++++++-----------------------


Attachment: xen-api.hg.patch
Description: Text Data

_______________________________________________
xen-api mailing list
xen-api@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/mailman/listinfo/xen-api

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.