diff --git a/CHANGELOG b/CHANGELOG
index f7cae9b8..d5e0a2ba 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -6,6 +6,7 @@ Development version (next release)
 - Fixed a missing cl_khr_fp64 when running double-precision on Intel CPUs
 - Fixed bugs in the half-precision routines HTBMV/HTPMV/HTRMV/HSYR2K/HTRMM
 - Tests now also exit with an error code when OpenCL errors or compilation errors occur
+- Tests now also check for the L2 error in case of half-precision
 - Added the OverrideParameters function to the API to be able to supply custom tuning parmeters
 - Various minor fixes and enhancements
 - Added tuned parameters for various devices (see README)
diff --git a/test/correctness/testblas.cpp b/test/correctness/testblas.cpp
index 7c64855a..5207c0ab 100644
--- a/test/correctness/testblas.cpp
+++ b/test/correctness/testblas.cpp
@@ -23,7 +23,7 @@ namespace clblast {
 template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kVectorDims = { 7, 93, 4096 };
 template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kIncrements = { 1, 2, 7 };
 template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kMatrixDims = { 7, 64 };
-template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kMatrixVectorDims = { 61, 512 };
+template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kMatrixVectorDims = { 61, 256 };
 template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kBandSizes = { 4, 19 };
 
 // Test settings for the invalid tests
@@ -182,23 +182,39 @@ void TestBlas<T,U>::TestRegular(std::vector<Arguments<U>> &test_vector, const st
     auto result1 = get_result_(args, buffers1, queue_);
     auto result2 = get_result_(args, buffers2, queue_);
 
+    // Computes the L2 error
+    const auto kErrorMarginL2 = getL2ErrorMargin<T>();
+    auto l2error = 0.0;
+    for (auto id1=size_t{0}; id1<get_id1_(args); ++id1) {
+      for (auto id2=size_t{0}; id2<get_id2_(args); ++id2) {
+        auto index = get_index_(args, id1, id2);
+        l2error += SquaredDifference(result1[index], result2[index]);
+      }
+    }
+    l2error /= (get_id1_(args) * get_id2_(args));
+
     // Checks for differences in the output
     auto errors = size_t{0};
     for (auto id1=size_t{0}; id1<get_id1_(args); ++id1) {
       for (auto id2=size_t{0}; id2<get_id2_(args); ++id2) {
         auto index = get_index_(args, id1, id2);
         if (!TestSimilarity(result1[index], result2[index])) {
-          errors++;
+          if (l2error >= kErrorMarginL2) { errors++; }
           if (verbose_) {
             if (get_id2_(args) == 1) { fprintf(stdout, "\n   Error at index %zu: ", id1); }
             else { fprintf(stdout, "\n   Error at %zu,%zu: ", id1, id2); }
             fprintf(stdout, " %s (reference) versus ", ToString(result1[index]).c_str());
             fprintf(stdout, " %s (CLBlast)", ToString(result2[index]).c_str());
+            if (l2error < kErrorMarginL2) {
+              fprintf(stdout, " - error suppressed by a low total L2 error\n");
+            }
           }
         }
       }
     }
-    if (verbose_ && errors > 0) { fprintf(stdout, "\n   "); }
+    if (verbose_ && errors > 0) {
+      fprintf(stdout, "\n   Combined L2 error: %.2e\n   ", l2error);
+    }
 
     // Tests the error count (should be zero)
     TestErrorCount(errors, get_id1_(args)*get_id2_(args), args);
diff --git a/test/correctness/tester.cpp b/test/correctness/tester.cpp
index eb79008d..6cafd7bc 100644
--- a/test/correctness/tester.cpp
+++ b/test/correctness/tester.cpp
@@ -22,22 +22,46 @@
 namespace clblast {
 // =================================================================================================
 
-// Eror margings (relative and absolute)
+// Relative error margins
 template <typename T>
 float getRelativeErrorMargin() {
   return 0.005f; // 0.5% is considered acceptable for float/double-precision
 }
+template float getRelativeErrorMargin<float>(); // as the above default
+template float getRelativeErrorMargin<double>(); // as the above default
+template float getRelativeErrorMargin<float2>(); // as the above default
+template float getRelativeErrorMargin<double2>(); // as the above default
 template <>
 float getRelativeErrorMargin<half>() {
   return 0.080f; // 8% (!) error is considered acceptable for half-precision
 }
+
+// Absolute error margins
 template <typename T>
 float getAbsoluteErrorMargin() {
   return 0.001f;
 }
+template float getAbsoluteErrorMargin<float>(); // as the above default
+template float getAbsoluteErrorMargin<double>(); // as the above default
+template float getAbsoluteErrorMargin<float2>(); // as the above default
+template float getAbsoluteErrorMargin<double2>(); // as the above default
 template <>
 float getAbsoluteErrorMargin<half>() {
-  return 0.10f; // especially small values are inaccurate for half-precision
+  return 0.15f; // especially small values are inaccurate for half-precision
+}
+
+// L2 error margins
+template <typename T>
+double getL2ErrorMargin() {
+  return 0.0f; // zero means don't look at the L2 error margin at all, use the other metrics
+}
+template double getL2ErrorMargin<float>(); // as the above default
+template double getL2ErrorMargin<double>(); // as the above default
+template double getL2ErrorMargin<float2>(); // as the above default
+template double getL2ErrorMargin<double2>(); // as the above default
+template <>
+double getL2ErrorMargin<half>() {
+  return 0.05; // half-precision results are considered OK as long as the L2 error is low enough
 }
 
 // Error margin: numbers beyond this value are considered equal to inf or NaN
@@ -144,6 +168,9 @@ Tester<T,U>::Tester(const std::vector<std::string> &arguments, const bool silent
           kUnsupportedReference.c_str());
   fprintf(stdout, "* Testing with error margins of %.1lf%% (relative) and %.3lf (absolute)\n",
           100.0f * getRelativeErrorMargin<T>(), getAbsoluteErrorMargin<T>());
+  if (getL2ErrorMargin<T>() != 0.0f) {
+    fprintf(stdout, "* and a combined maximum allowed L2 error of %.2e\n", getL2ErrorMargin<T>());
+  }
 
   // Initializes clBLAS
   #ifdef CLBLAST_REF_CLBLAS
@@ -405,7 +432,7 @@ template <typename T, typename U>
 void Tester<T,U>::PrintErrorLog(const std::vector<ErrorLogEntry> &error_log) {
   for (auto &entry: error_log) {
     if (entry.error_percentage != kStatusError) {
-      fprintf(stdout, "   Error rate %.1lf%%: ", entry.error_percentage);
+      fprintf(stdout, "   Error rate %.2lf%%: ", entry.error_percentage);
     }
     else {
       fprintf(stdout, "   Status code %d (expected %d): ",
@@ -499,6 +526,37 @@ bool TestSimilarity(const half val1, const half val2) {
 
 // =================================================================================================
 
+// Retrieves the squared difference, used for example for computing the L2 error
+template <typename T>
+double SquaredDifference(const T val1, const T val2) {
+  const auto difference = (val1 - val2);
+  return static_cast<double>(difference * difference);
+}
+
+// Compiles the default case for standard data-types
+template double SquaredDifference<float>(const float, const float);
+template double SquaredDifference<double>(const double, const double);
+
+// Specialisations for non-standard data-types
+template <>
+double SquaredDifference(const float2 val1, const float2 val2) {
+  const auto real = SquaredDifference(val1.real(), val2.real());
+  const auto imag = SquaredDifference(val1.imag(), val2.imag());
+  return real + imag;
+}
+template <>
+double SquaredDifference(const double2 val1, const double2 val2) {
+  const auto real = SquaredDifference(val1.real(), val2.real());
+  const auto imag = SquaredDifference(val1.imag(), val2.imag());
+  return real + imag;
+}
+template <>
+double SquaredDifference(const half val1, const half val2) {
+  return SquaredDifference(HalfToFloat(val1), HalfToFloat(val2));
+}
+
+// =================================================================================================
+
 // Retrieves a list of example scalar values, used for the alpha and beta arguments for the various
 // routines. This function is specialised for the different data-types.
 template <> const std::vector<float> GetExampleScalars(const bool full_test) {
diff --git a/test/correctness/tester.hpp b/test/correctness/tester.hpp
index 113f03ef..f60be04b 100644
--- a/test/correctness/tester.hpp
+++ b/test/correctness/tester.hpp
@@ -150,11 +150,20 @@ class Tester {
 // template specialization)
 // =================================================================================================
 
+// Error margins
+template <typename T> float getRelativeErrorMargin();
+template <typename T> float getAbsoluteErrorMargin();
+template <typename T> double getL2ErrorMargin();
+
 // Compares two floating point values and returns whether they are within an acceptable error
 // margin. This replaces GTest's EXPECT_NEAR().
 template <typename T>
 bool TestSimilarity(const T val1, const T val2);
 
+// Retrieves the squared difference, used for example for computing the L2 error
+template <typename T>
+double SquaredDifference(const T val1, const T val2);
+
 // Retrieves a list of example scalar values, used for the alpha and beta arguments for the various
 // routines. This function is specialised for the different data-types.
 template <typename T>