[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [UNIKRAFT PATCH 1/1] Add Undefined Behavior Sanitizer Library



Hello Vlad,

Thanks for the work, Please find the comments inline.

Thanks & Regards

Sharan

On 1/3/21 8:29 PM, Vlad-Andrei Badoiu wrote:
This patch adds an internal library that implements the functions needed
by UBSAN. The flag can either be enabled globally by enabling the
LIBUBSAN_GLOBAL config option or manually by adding the
-fsanitize=undefined flag.

UBSAN can catch runtime bugs such as dereferencing NULL or non-canonical
addresses, certain undefined overflow errors, shifting or multiplying
data which is out of bounds, and other errors.

For example, ubsan will catch the following addition overflow:
int k = 0x7fffffff;
k += 127;

Signed-off-by: Vlad-Andrei Badoiu <vlad_andrei.badoiu@xxxxxx>
---
  lib/Makefile.uk         |   1 +
  lib/ubsan/Config.uk     |  14 ++
  lib/ubsan/Makefile.uk   |   5 +
  lib/ubsan/exportsyms.uk |  19 +++
  lib/ubsan/ubsan.c       | 321 ++++++++++++++++++++++++++++++++++++++++
  5 files changed, 360 insertions(+)
  create mode 100644 lib/ubsan/Config.uk
  create mode 100644 lib/ubsan/Makefile.uk
  create mode 100644 lib/ubsan/exportsyms.uk
  create mode 100644 lib/ubsan/ubsan.c

diff --git a/lib/Makefile.uk b/lib/Makefile.uk
index 07e8a29..e9fc205 100644
--- a/lib/Makefile.uk
+++ b/lib/Makefile.uk
@@ -37,3 +37,4 @@ $(eval $(call _import_lib,$(CONFIG_UK_BASE)/lib/ukmmap))
  $(eval $(call _import_lib,$(CONFIG_UK_BASE)/lib/ukblkdev))
  $(eval $(call _import_lib,$(CONFIG_UK_BASE)/lib/posix-process))
  $(eval $(call _import_lib,$(CONFIG_UK_BASE)/lib/uksp))
+$(eval $(call _import_lib,$(CONFIG_UK_BASE)/lib/ubsan))
diff --git a/lib/ubsan/Config.uk b/lib/ubsan/Config.uk
new file mode 100644
index 0000000..92239a0
--- /dev/null
+++ b/lib/ubsan/Config.uk
@@ -0,0 +1,14 @@
+menuconfig LIBUBSAN
+       bool "ubsan: Undefined Behavior Sanitization"
+       default n
+
+if LIBUBSAN
+
+config LIBUBSAN_GLOBAL
+       bool "Global undefined sanitization"
+       default n
+       help

There is a trailing white space here.


+|       |       Enable undefined behavior sanitization globally.

There are some additional characters in the config.


+
+endif
+
diff --git a/lib/ubsan/Makefile.uk b/lib/ubsan/Makefile.uk
new file mode 100644
index 0000000..ffbb2e9
--- /dev/null
+++ b/lib/ubsan/Makefile.uk
@@ -0,0 +1,5 @@
+$(eval $(call addlib_s,libubsan,$(CONFIG_LIBUBSAN)))
+
+COMPFLAGS-$(CONFIG_LIBUBSAN_GLOBAL)    += -fsanitize=undefined
+
+LIBUBSAN_SRCS-y += $(LIBUBSAN_BASE)/ubsan.c
diff --git a/lib/ubsan/exportsyms.uk b/lib/ubsan/exportsyms.uk
new file mode 100644
index 0000000..50f6165
--- /dev/null
+++ b/lib/ubsan/exportsyms.uk
@@ -0,0 +1,19 @@
+__ubsan_handle_type_mismatch
+__ubsan_handle_type_mismatch_v1
+__ubsan_handle_mul_overflow
+__ubsan_handle_sub_overflow
+__ubsan_handle_pointer_overflow
+__ubsan_handle_add_overflow
+__ubsan_handle_negate_overflow
+__ubsan_handle_out_of_bounds
+__ubsan_handle_shift_out_of_bounds
+__ubsan_handle_nonnull_arg
+__ubsan_handle_divrem_overflow
+__ubsan_handle_vla_bound_not_positive
+__ubsan_handle_load_invalid_value
+__ubsan_handle_cfi_bad_icall
+__ubsan_handle_nonnull_return
+__ubsan_handle_function_type_mismatch
+__ubsan_handle_float_cast_overflow
+__ubsan_handle_builtin_unreachable
+__ubsan_handle_missing_return
diff --git a/lib/ubsan/ubsan.c b/lib/ubsan/ubsan.c
new file mode 100644
index 0000000..fa8feac
--- /dev/null
+++ b/lib/ubsan/ubsan.c
@@ -0,0 +1,321 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/*
+ * Authors: Vlad-Andrei Badoiu <vlad_andrei.badoiu@xxxxxx>
+ *
+ * Copyright (c) 2020, University Politehnica of Bucharest. All rights 
reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the copyright holder nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdint.h>
+#include <uk/assert.h>
+
+struct source_location {
+       const char *filename;
+       uint32_t line;
+       uint32_t column;
+};
+
+struct type_descriptor {
+       uint16_t kind;
+       uint16_t info;
+       char name[];
+};
+
+struct out_of_bounds_info {
+       struct source_location location;
+       struct type_descriptor left_type;
+       struct type_descriptor right_type;
+};
+
+struct type_mismatch_info {
+       struct source_location location;
+       struct type_descriptor *type;
+       uintptr_t alignment;

           The alignment field of the `type_mismatch_info` becomes a field of            type char in compiler version gcc-8 and llvm version 8. With this            change they introduced the type_mismatch_v1. If we want to handle            this in a type_mismatch_info, we probably should name it suitably            and make the alignment field as unsigned long alignment instead of
           the uintptr_t.

+       uint8_t type_check_kind;
+};
+
+static const struct source_location unknown_location = {
+
+       "<unknown file>",
+       0,
+       0,
+};
+
+typedef uintptr_t ubsan_value_handle_t;
+
+static void ubsan_log_location(const struct source_location *location,
+                       const char *violation) __noreturn;
+
+static void ubsan_log_location(const struct source_location *location,
+                       const char *violation)
+{
+       if (!location || !location->filename)
+               location = &unknown_location;
+
+       UK_CRASH("Undefined behavior at %s:%d:%d:%s",
+               location->filename, location->line,
+               location->column, violation);

          The default behavior we implement is to crash the application. If you

           run ubsan on user application it continues execution. The behavior

          is overriden by using another compiler flag -fnosanitize-recover. We
          probably need to stick with this behavior.
+}
+
+void __ubsan_handle_type_mismatch(void *data_raw,
+                                 void *pointer_raw)
+{
+       struct type_mismatch_info *data =
+               (struct type_mismatch_info *) data_raw;

          There is data type mismatch between type_mismatch_info and
          type_mismatch_info_v1, so we should careful in handling it
          in a common structure.


+       ubsan_value_handle_t pointer = (ubsan_value_handle_t) pointer_raw;
+       const char *violation = "type mismatch";
+
+       if (!pointer)
+               violation = "null pointer access";
+       else if (data->alignment && (pointer & (data->alignment - 1)))
+               violation = "unaligned access";
+
+       ubsan_log_location(&data->location, violation);
+}
+
+void __ubsan_handle_type_mismatch_v1(void *data_raw,
+                                 void *pointer_raw)
+{
+       __ubsan_handle_type_mismatch(data_raw, pointer_raw);
+}
+
+struct ubsan_overflow_data {
+       struct source_location location;
+       struct type_descriptor *type;
+};
+
+void __ubsan_handle_mul_overflow(void *data_raw,
+                                void *lhs_raw __unused,
+                                void *rhs_raw __unused)

Missing <uk/essentials.h>


+{
+       struct ubsan_overflow_data *data =
+                       (struct ubsan_overflow_data *) data_raw;
+
+       ubsan_log_location(&data->location, "multiplication overflow");
+}
+
+void __ubsan_handle_pointer_overflow(void *data_raw,
+                                void *base __unused,
+                                void *result __unused)
+{
+       struct ubsan_overflow_data *data =
+                       (struct ubsan_overflow_data *) data_raw;
          The data_raw is of type pointer_overflow. This type contains
          only the Location information. The `struct type_descriptor *type` is           missing pointer_overflow data type. We should be careful with this conversion.
+
+       ubsan_log_location(&data->location, "pointer overflow");
+}
+
+void __ubsan_handle_sub_overflow(void *data_raw,
+                                void *lhs_raw __unused,
+                                void *rhs_raw __unused)
+{
+       struct ubsan_overflow_data *data =
+                       (struct ubsan_overflow_data *) data_raw;
+
+       ubsan_log_location(&data->location, "subtraction overflow");
+}
+
+void __ubsan_handle_add_overflow(void *data_raw,
+                                void *lhs_raw __unused,
+                                void *rhs_raw __unused)
+{
+       struct ubsan_overflow_data *data =
+                       (struct ubsan_overflow_data *) data_raw;
+
+       ubsan_log_location(&data->location, "addition overflow");
+}
+
+void __ubsan_handle_negate_overflow(void *data_raw,
+                                   void *old_value_raw __unused)
+{
+       struct ubsan_overflow_data *data =
+                       (struct ubsan_overflow_data *) data_raw;
+
+       ubsan_log_location(&data->location, "negation overflow");
+}
+
+void __ubsan_handle_divrem_overflow(void *data_raw,
+                                   void *lhs_raw __unused,
+                                   void *rhs_raw __unused)
+{
+       struct ubsan_overflow_data *data =
+                       (struct ubsan_overflow_data *) data_raw;
+
+       ubsan_log_location(&data->location, "division remainder overflow");
+}
+
+
+struct ubsan_out_of_bounds_data {
+       struct source_location location;
+       struct type_descriptor *array_type;
+       struct type_descriptor *index_type;
+};
+
+void __ubsan_handle_out_of_bounds(void *data_raw,
+                                 void *index_raw __unused)
+{
+       struct ubsan_out_of_bounds_data *data =
+                       (struct ubsan_out_of_bounds_data *) data_raw;
+
+       ubsan_log_location(&data->location, "out of bounds");
Maybe the error message "Index out of bound" instead of "out of bound". We could also print which index caused it.
+}
+
+struct ubsan_shift_out_of_bounds_data {
+       struct source_location location;
+       struct type_descriptor *lhs_type;
+       struct type_descriptor *rhs_type;
+};
+
+void __ubsan_handle_shift_out_of_bounds(void *data_raw,
+                                       void *lhs_raw __unused,
+                                       void *rhs_raw __unused)
+{
+       struct ubsan_shift_out_of_bounds_data *data =
+               (struct ubsan_shift_out_of_bounds_data *) data_raw;
+
+       ubsan_log_location(&data->location, "shift out of bounds");
+}

          Shift out bound provides information on why this was caused. Do we           print the information here or postpone it to later revision of the library.

          Based on the provided input  arguments  we can identify whether there is problem with the shift           operand or the number itself. If we want to add this later maybe we can add a TODO
          comment in the callback.


+
+struct ubsan_nonnull_arg_data {
+       struct source_location location;
+       struct source_location attr_location;

Missing the index arg.


+};
+
+void __ubsan_handle_nonnull_arg(void *data_raw)
+{
+       struct ubsan_nonnull_arg_data *data =
+               (struct ubsan_nonnull_arg_data *) data_raw;
+
+       ubsan_log_location(&data->location, "null argument");
+}
+
+struct ubsan_vla_bound_data {
+       struct source_location location;
+       struct type_descriptor *type;
+};
+
+void __ubsan_handle_vla_bound_not_positive(void *data_raw,
+                                       void *bound_raw __unused)
+{
+       struct ubsan_vla_bound_data *data =
+                       (struct ubsan_vla_bound_data *) data_raw;
+
+       ubsan_log_location(&data->location, "negative variable array length");
+}
+
+struct ubsan_invalid_value_data {
+       struct source_location location;
+       struct type_descriptor *type;
+};
+
+void __ubsan_handle_load_invalid_value(void *data_raw,
+                               void *value_raw __unused)
+{
+       struct ubsan_invalid_value_data *data =
+               (struct ubsan_invalid_value_data *) data_raw;
+
+       ubsan_log_location(&data->location, "invalid value load");
It is wise to print which type along with the value.
+}
+
+struct ubsan_cfi_bad_icall_data {
CheckKind field is missing cfi_bad_icall_data
+       struct source_location location;
+       struct type_descriptor *type;
+};
+
+void __ubsan_handle_cfi_bad_icall(void *data_raw,
+                               void *value_raw __unused)
+{
+       struct ubsan_cfi_bad_icall_data *data =
+               (struct ubsan_cfi_bad_icall_data *) data_raw;
+
+       ubsan_log_location(&data->location,
+               "control flow integrity check failure during indirect call");
The value_raw give you a ptr to the pc. We might as well print it.
+}
+
+struct ubsan_nonnull_return_data {
+       struct source_location location;
+       struct source_location attr_location;
+};
  There are two version for the structure `nonnull_return_data` like the

  type_mismatch. The version v1 which is available in the later compilers

  `struct source_location location` is no longer available.

  This location information is passed as `data_raw` argument in the `__ubsan_handle_nonnull_return`


+
+void __ubsan_handle_nonnull_return(void *data_raw)
+{
+       struct ubsan_nonnull_return_data *data =
+               (struct ubsan_nonnull_return_data *) data_raw;
+
+       ubsan_log_location(&data->location, "null return");
+}
+
+struct ubsan_function_type_mismatch_data {
+       struct source_location location;
+       struct type_descriptor *type;
+};
+
+void __ubsan_handle_function_type_mismatch(void *data_raw,
+                               void *value_raw __unused)
+{
+       struct ubsan_function_type_mismatch_data *data =
+               (struct ubsan_function_type_mismatch_data *) data_raw;
+
+       ubsan_log_location(&data->location, "function type mismatch");
We could also print the location using the value_raw  as it point to the `pc`.
+}
+
+struct ubsan_float_cast_overflow_data {
+       struct source_location location;
+       struct type_descriptor *from_type;
+       struct type_descriptor *to_type;
+};
  There are two version for the floating point conversion data type. The version
  'v1' has only from_type and to_type, while 'v2' has the source
  location include in it. We should explicitly name the structure type. We
  may look to support both versions, else we need to at least add an comment

  on the supported version.


+
+void __ubsan_handle_float_cast_overflow(void *data_raw,
+                               void *from_raw __unused)
+{
+       struct ubsan_float_cast_overflow_data *data =
+               (struct ubsan_float_cast_overflow_data *) data_raw;
+
+       ubsan_log_location(&data->location, "float cast overflow");
+}
+
+struct ubsan_unreachable_data {
+       struct source_location location;
+};
+
+void __ubsan_handle_builtin_unreachable(void *data_raw)
+{
+       struct ubsan_unreachable_data *data =
+               (struct ubsan_unreachable_data *) data_raw;
+
+       ubsan_log_location(&data->location, "reached unreachable");
+}
+
+void __ubsan_handle_missing_return(void *data_raw) __noreturn;
+
+void __ubsan_handle_missing_return(void *data_raw)
+{
+       struct ubsan_unreachable_data *data =
+               (struct ubsan_unreachable_data *) data_raw;
+
+       ubsan_log_location(&data->location, "missing return");
+}



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.