From f0cab01b6c9bb9c2f5085837ca86d70d144cca9d Mon Sep 17 00:00:00 2001
From: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Date: Fri, 6 Feb 2015 15:06:05 +0000
Subject: [PATCH 1/6] target-tricore: Fix RLC_ADDI, RLC_ADDIH using wrong
 microcode helper

Signed-off-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 target-tricore/translate.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target-tricore/translate.c b/target-tricore/translate.c
index 996435dbd0..0f305084f1 100644
--- a/target-tricore/translate.c
+++ b/target-tricore/translate.c
@@ -4183,10 +4183,10 @@ static void decode_rlc_opc(CPUTriCoreState *env, DisasContext *ctx,
 
     switch (op1) {
     case OPC1_32_RLC_ADDI:
-        gen_addi_CC(cpu_gpr_d[r2], cpu_gpr_d[r1], const16);
+        gen_addi_d(cpu_gpr_d[r2], cpu_gpr_d[r1], const16);
         break;
     case OPC1_32_RLC_ADDIH:
-        gen_addi_CC(cpu_gpr_d[r2], cpu_gpr_d[r1], const16 << 16);
+        gen_addi_d(cpu_gpr_d[r2], cpu_gpr_d[r1], const16 << 16);
         break;
     case OPC1_32_RLC_ADDIH_A:
         tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r1], const16 << 16);

From 3debbb5af5f63440b170b71bf3aecc0e778f5691 Mon Sep 17 00:00:00 2001
From: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Date: Wed, 28 Jan 2015 12:15:05 +0000
Subject: [PATCH 2/6] target-tricore: fix msub32_suov return wrong results

If the signed result of the multiplication overflows, we would get a negative
value, which would result in a addition instead of a subtraction.

Now we do the overflow calculation and saturation by hand instead of using
suov32_neg.

Signed-off-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 target-tricore/op_helper.c | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/target-tricore/op_helper.c b/target-tricore/op_helper.c
index ed26b302b0..08bf4ae0ed 100644
--- a/target-tricore/op_helper.c
+++ b/target-tricore/op_helper.c
@@ -443,13 +443,28 @@ target_ulong helper_msub32_ssov(CPUTriCoreState *env, target_ulong r1,
 target_ulong helper_msub32_suov(CPUTriCoreState *env, target_ulong r1,
                                 target_ulong r2, target_ulong r3)
 {
-    int64_t t1 = extract64(r1, 0, 32);
-    int64_t t2 = extract64(r2, 0, 32);
-    int64_t t3 = extract64(r3, 0, 32);
-    int64_t result;
+    uint64_t t1 = extract64(r1, 0, 32);
+    uint64_t t2 = extract64(r2, 0, 32);
+    uint64_t t3 = extract64(r3, 0, 32);
+    uint64_t result;
+    uint64_t mul;
 
-    result = t2 - (t1 * t3);
-    return suov32_neg(env, result);
+    mul = (t1 * t3);
+    result = t2 - mul;
+
+    env->PSW_USB_AV = result ^ result * 2u;
+    env->PSW_USB_SAV |= env->PSW_USB_AV;
+    /* we calculate ovf by hand here, because the multiplication can overflow on
+       the host, which would give false results if we compare to less than
+       zero */
+    if (mul > t2) {
+        env->PSW_USB_V = (1 << 31);
+        env->PSW_USB_SV = (1 << 31);
+        result = 0;
+    } else {
+        env->PSW_USB_V = 0;
+    }
+    return result;
 }
 
 uint64_t helper_msub64_ssov(CPUTriCoreState *env, target_ulong r1,

From 2984cfbdb4dbc31d614aaa0303374dff951e7a31 Mon Sep 17 00:00:00 2001
From: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Date: Wed, 21 Jan 2015 14:57:51 +0000
Subject: [PATCH 3/6] target-tricore: Add instructions of RRR2 opcode format

Signed-off-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 target-tricore/translate.c       | 149 ++++++++++++++++++++++++++++---
 target-tricore/tricore-opcodes.h |   2 +-
 2 files changed, 136 insertions(+), 15 deletions(-)

diff --git a/target-tricore/translate.c b/target-tricore/translate.c
index 0f305084f1..e74077ed01 100644
--- a/target-tricore/translate.c
+++ b/target-tricore/translate.c
@@ -1151,16 +1151,34 @@ static void gen_mulr_q(TCGv ret, TCGv arg1, TCGv arg2, uint32_t n)
     tcg_temp_free(temp);
 }
 
+static inline void
+gen_madds_64(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high,
+             TCGv r3)
+{
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+    tcg_gen_concat_i32_i64(temp64, r2_low, r2_high);
+    gen_helper_madd64_ssov(temp64, cpu_env, r1, temp64, r3);
+    tcg_gen_extr_i64_i32(ret_low, ret_high, temp64);
+    tcg_temp_free_i64(temp64);
+}
+
 static inline void
 gen_maddsi_64(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high,
               int32_t con)
 {
     TCGv temp = tcg_const_i32(con);
+    gen_madds_64(ret_low, ret_high, r1, r2_low, r2_high, temp);
+    tcg_temp_free(temp);
+}
+
+static inline void
+gen_maddsu_64(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high,
+             TCGv r3)
+{
     TCGv_i64 temp64 = tcg_temp_new_i64();
     tcg_gen_concat_i32_i64(temp64, r2_low, r2_high);
-    gen_helper_madd64_ssov(temp64, cpu_env, r1, temp64, temp);
+    gen_helper_madd64_suov(temp64, cpu_env, r1, temp64, r3);
     tcg_gen_extr_i64_i32(ret_low, ret_high, temp64);
-    tcg_temp_free(temp);
     tcg_temp_free_i64(temp64);
 }
 
@@ -1169,12 +1187,8 @@ gen_maddsui_64(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high,
                int32_t con)
 {
     TCGv temp = tcg_const_i32(con);
-    TCGv_i64 temp64 = tcg_temp_new_i64();
-    tcg_gen_concat_i32_i64(temp64, r2_low, r2_high);
-    gen_helper_madd64_suov(temp64, cpu_env, r1, temp64, temp);
-    tcg_gen_extr_i64_i32(ret_low, ret_high, temp64);
+    gen_maddsu_64(ret_low, ret_high, r1, r2_low, r2_high, temp);
     tcg_temp_free(temp);
-    tcg_temp_free_i64(temp64);
 }
 
 static inline void gen_msubsi_32(TCGv ret, TCGv r1, TCGv r2, int32_t con)
@@ -1191,16 +1205,34 @@ static inline void gen_msubsui_32(TCGv ret, TCGv r1, TCGv r2, int32_t con)
     tcg_temp_free(temp);
 }
 
+static inline void
+gen_msubs_64(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high,
+             TCGv r3)
+{
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+    tcg_gen_concat_i32_i64(temp64, r2_low, r2_high);
+    gen_helper_msub64_ssov(temp64, cpu_env, r1, temp64, r3);
+    tcg_gen_extr_i64_i32(ret_low, ret_high, temp64);
+    tcg_temp_free_i64(temp64);
+}
+
 static inline void
 gen_msubsi_64(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high,
               int32_t con)
 {
     TCGv temp = tcg_const_i32(con);
+    gen_msubs_64(ret_low, ret_high, r1, r2_low, r2_high, temp);
+    tcg_temp_free(temp);
+}
+
+static inline void
+gen_msubsu_64(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high,
+             TCGv r3)
+{
     TCGv_i64 temp64 = tcg_temp_new_i64();
     tcg_gen_concat_i32_i64(temp64, r2_low, r2_high);
-    gen_helper_msub64_ssov(temp64, cpu_env, r1, temp64, temp);
+    gen_helper_msub64_suov(temp64, cpu_env, r1, temp64, r3);
     tcg_gen_extr_i64_i32(ret_low, ret_high, temp64);
-    tcg_temp_free(temp);
     tcg_temp_free_i64(temp64);
 }
 
@@ -1209,12 +1241,8 @@ gen_msubsui_64(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high,
                int32_t con)
 {
     TCGv temp = tcg_const_i32(con);
-    TCGv_i64 temp64 = tcg_temp_new_i64();
-    tcg_gen_concat_i32_i64(temp64, r2_low, r2_high);
-    gen_helper_msub64_suov(temp64, cpu_env, r1, temp64, temp);
-    tcg_gen_extr_i64_i32(ret_low, ret_high, temp64);
+    gen_msubsu_64(ret_low, ret_high, r1, r2_low, r2_high, temp);
     tcg_temp_free(temp);
-    tcg_temp_free_i64(temp64);
 }
 
 static void gen_saturate(TCGv ret, TCGv arg, int32_t up, int32_t low)
@@ -5186,6 +5214,92 @@ static void decode_rrr_divide(CPUTriCoreState *env, DisasContext *ctx)
     }
 }
 
+/* RRR2 format */
+static void decode_rrr2_madd(CPUTriCoreState *env, DisasContext *ctx)
+{
+    uint32_t op2;
+    uint32_t r1, r2, r3, r4;
+
+    op2 = MASK_OP_RRR2_OP2(ctx->opcode);
+    r1 = MASK_OP_RRR2_S1(ctx->opcode);
+    r2 = MASK_OP_RRR2_S2(ctx->opcode);
+    r3 = MASK_OP_RRR2_S3(ctx->opcode);
+    r4 = MASK_OP_RRR2_D(ctx->opcode);
+    switch (op2) {
+    case OPC2_32_RRR2_MADD_32:
+        gen_madd32_d(cpu_gpr_d[r4], cpu_gpr_d[r1], cpu_gpr_d[r3],
+                     cpu_gpr_d[r2]);
+        break;
+    case OPC2_32_RRR2_MADD_64:
+        gen_madd64_d(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1],
+                     cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r2]);
+        break;
+    case OPC2_32_RRR2_MADDS_32:
+        gen_helper_madd32_ssov(cpu_gpr_d[r4], cpu_env, cpu_gpr_d[r1],
+                               cpu_gpr_d[r3], cpu_gpr_d[r2]);
+        break;
+    case OPC2_32_RRR2_MADDS_64:
+        gen_madds_64(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1],
+                     cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r2]);
+        break;
+    case OPC2_32_RRR2_MADD_U_64:
+        gen_maddu64_d(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1],
+                      cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r2]);
+        break;
+    case OPC2_32_RRR2_MADDS_U_32:
+        gen_helper_madd32_suov(cpu_gpr_d[r4], cpu_env, cpu_gpr_d[r1],
+                               cpu_gpr_d[r3], cpu_gpr_d[r2]);
+        break;
+    case OPC2_32_RRR2_MADDS_U_64:
+        gen_maddsu_64(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1],
+                      cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r2]);
+        break;
+    }
+}
+
+static void decode_rrr2_msub(CPUTriCoreState *env, DisasContext *ctx)
+{
+    uint32_t op2;
+    uint32_t r1, r2, r3, r4;
+
+    op2 = MASK_OP_RRR2_OP2(ctx->opcode);
+    r1 = MASK_OP_RRR2_S1(ctx->opcode);
+    r2 = MASK_OP_RRR2_S2(ctx->opcode);
+    r3 = MASK_OP_RRR2_S3(ctx->opcode);
+    r4 = MASK_OP_RRR2_D(ctx->opcode);
+
+    switch (op2) {
+    case OPC2_32_RRR2_MSUB_32:
+        gen_msub32_d(cpu_gpr_d[r4], cpu_gpr_d[r1], cpu_gpr_d[r3],
+                      cpu_gpr_d[r2]);
+        break;
+    case OPC2_32_RRR2_MSUB_64:
+        gen_msub64_d(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1],
+                     cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r2]);
+        break;
+    case OPC2_32_RRR2_MSUBS_32:
+        gen_helper_msub32_ssov(cpu_gpr_d[r4], cpu_env, cpu_gpr_d[r1],
+                               cpu_gpr_d[r3], cpu_gpr_d[r2]);
+        break;
+    case OPC2_32_RRR2_MSUBS_64:
+        gen_msubs_64(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1],
+                     cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r2]);
+        break;
+    case OPC2_32_RRR2_MSUB_U_64:
+        gen_msubu64_d(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1],
+                      cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r2]);
+        break;
+    case OPC2_32_RRR2_MSUBS_U_32:
+        gen_helper_msub32_suov(cpu_gpr_d[r4], cpu_env, cpu_gpr_d[r1],
+                               cpu_gpr_d[r3], cpu_gpr_d[r2]);
+        break;
+    case OPC2_32_RRR2_MSUBS_U_64:
+        gen_msubsu_64(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1],
+                      cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r2]);
+        break;
+    }
+}
+
 static void decode_32Bit_opc(CPUTriCoreState *env, DisasContext *ctx)
 {
     int op1;
@@ -5475,6 +5589,13 @@ static void decode_32Bit_opc(CPUTriCoreState *env, DisasContext *ctx)
         break;
     case OPCM_32_RRR_DIVIDE:
         decode_rrr_divide(env, ctx);
+/* RRR2 Format */
+    case OPCM_32_RRR2_MADD:
+        decode_rrr2_madd(env, ctx);
+        break;
+    case OPCM_32_RRR2_MSUB:
+        decode_rrr2_msub(env, ctx);
+        break;
     }
 }
 
diff --git a/target-tricore/tricore-opcodes.h b/target-tricore/tricore-opcodes.h
index baf537f160..30d780ea80 100644
--- a/target-tricore/tricore-opcodes.h
+++ b/target-tricore/tricore-opcodes.h
@@ -1371,7 +1371,7 @@ enum {
     OPC2_32_RRR2_MADD_64                         = 0x6a,
     OPC2_32_RRR2_MADDS_32                        = 0x8a,
     OPC2_32_RRR2_MADDS_64                        = 0xea,
-    OPC2_32_RRR2_MADD_U_32                       = 0x68,
+    OPC2_32_RRR2_MADD_U_64                       = 0x68,
     OPC2_32_RRR2_MADDS_U_32                      = 0x88,
     OPC2_32_RRR2_MADDS_U_64                      = 0xe8,
 };

From 2e430e1cdcbac8825bc44b42844cbb011b859847 Mon Sep 17 00:00:00 2001
From: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Date: Thu, 29 Jan 2015 15:35:56 +0000
Subject: [PATCH 4/6] target-tricore: Add instructions of RRR1 opcode format,
 which have 0x83 as first opcode

Add helpers:
    * add64_ssov: adds two 64 bit values and saturates the result.
    * addr_h/_ssov: adds two halfwords with two words in q-format with rounding
                    / and saturates each result independetly.

Add microcode generator:
    * gen_add64_d: adds two 64 bit values.
    * gen_addsub64_h: adds/subtracts one halfwords with a word and adds/
                      subtracts another halftword with another word.
    * gen_madd_h/s_h: multiply four halfwords, add each result left justfied
                      to two word values / and saturate each result.
    * gen_maddm_h/s_h: multiply four halfwords, add each result left justfied
                       to two words values in q-format / and saturate each
                       result.
    * gen_maddr32/64_h/s_h: multiply four halfwords, add each result left
                            justfied to two halftwords/words values in q-format
                            / and saturate each result.

Signed-off-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 target-tricore/helper.h    |   3 +
 target-tricore/op_helper.c | 110 ++++++++++
 target-tricore/translate.c | 421 +++++++++++++++++++++++++++++++++++++
 3 files changed, 534 insertions(+)

diff --git a/target-tricore/helper.h b/target-tricore/helper.h
index 7405feee88..a1aa12e414 100644
--- a/target-tricore/helper.h
+++ b/target-tricore/helper.h
@@ -17,9 +17,11 @@
 
 /* Arithmetic */
 DEF_HELPER_3(add_ssov, i32, env, i32, i32)
+DEF_HELPER_3(add64_ssov, i64, env, i64, i64)
 DEF_HELPER_3(add_suov, i32, env, i32, i32)
 DEF_HELPER_3(add_h_ssov, i32, env, i32, i32)
 DEF_HELPER_3(add_h_suov, i32, env, i32, i32)
+DEF_HELPER_4(addr_h_ssov, i32, env, i64, i32, i32)
 DEF_HELPER_3(sub_ssov, i32, env, i32, i32)
 DEF_HELPER_3(sub_suov, i32, env, i32, i32)
 DEF_HELPER_3(sub_h_ssov, i32, env, i32, i32)
@@ -44,6 +46,7 @@ DEF_HELPER_2(abs_b, i32, env, i32)
 DEF_HELPER_2(abs_h, i32, env, i32)
 DEF_HELPER_3(absdif_b, i32, env, i32, i32)
 DEF_HELPER_3(absdif_h, i32, env, i32, i32)
+DEF_HELPER_4(addr_h, i32, env, i64, i32, i32)
 DEF_HELPER_3(add_b, i32, env, i32, i32)
 DEF_HELPER_3(add_h, i32, env, i32, i32)
 DEF_HELPER_3(sub_b, i32, env, i32, i32)
diff --git a/target-tricore/op_helper.c b/target-tricore/op_helper.c
index 08bf4ae0ed..8cd154b6b9 100644
--- a/target-tricore/op_helper.c
+++ b/target-tricore/op_helper.c
@@ -184,6 +184,31 @@ target_ulong helper_add_ssov(CPUTriCoreState *env, target_ulong r1,
     return ssov32(env, result);
 }
 
+uint64_t helper_add64_ssov(CPUTriCoreState *env, uint64_t r1, uint64_t r2)
+{
+    uint64_t result;
+    int64_t ovf;
+
+    result = r1 + r2;
+    ovf = (result ^ r1) & ~(r1 ^ r2);
+    env->PSW_USB_AV = (result ^ result * 2u) >> 32;
+    env->PSW_USB_SAV |= env->PSW_USB_AV;
+    if (ovf < 0) {
+        env->PSW_USB_V = (1 << 31);
+        env->PSW_USB_SV = (1 << 31);
+        /* ext_ret > MAX_INT */
+        if ((int64_t)r1 >= 0) {
+            result = INT64_MAX;
+        /* ext_ret < MIN_INT */
+        } else {
+            result = INT64_MIN;
+        }
+    } else {
+        env->PSW_USB_V = 0;
+    }
+    return result;
+}
+
 target_ulong helper_add_h_ssov(CPUTriCoreState *env, target_ulong r1,
                                target_ulong r2)
 {
@@ -194,6 +219,53 @@ target_ulong helper_add_h_ssov(CPUTriCoreState *env, target_ulong r1,
     return ssov16(env, ret_hw0, ret_hw1);
 }
 
+uint32_t helper_addr_h_ssov(CPUTriCoreState *env, uint64_t r1, uint32_t r2_l,
+                            uint32_t r2_h)
+{
+    int64_t mul_res0 = sextract64(r1, 0, 32);
+    int64_t mul_res1 = sextract64(r1, 32, 32);
+    int64_t r2_low = sextract64(r2_l, 0, 32);
+    int64_t r2_high = sextract64(r2_h, 0, 32);
+    int64_t result0, result1;
+    uint32_t ovf0, ovf1;
+    uint32_t avf0, avf1;
+
+    ovf0 = ovf1 = 0;
+
+    result0 = r2_low + mul_res0 + 0x8000;
+    result1 = r2_high + mul_res1 + 0x8000;
+
+    avf0 = result0 * 2u;
+    avf0 = result0 ^ avf0;
+    avf1 = result1 * 2u;
+    avf1 = result1 ^ avf1;
+
+    if (result0 > INT32_MAX) {
+        ovf0 = (1 << 31);
+        result0 = INT32_MAX;
+    } else if (result0 < INT32_MIN) {
+        ovf0 = (1 << 31);
+        result0 = INT32_MIN;
+    }
+
+    if (result1 > INT32_MAX) {
+        ovf1 = (1 << 31);
+        result1 = INT32_MAX;
+    } else if (result1 < INT32_MIN) {
+        ovf1 = (1 << 31);
+        result1 = INT32_MIN;
+    }
+
+    env->PSW_USB_V = ovf0 | ovf1;
+    env->PSW_USB_SV |= env->PSW_USB_V;
+
+    env->PSW_USB_AV = avf0 | avf1;
+    env->PSW_USB_SAV |= env->PSW_USB_AV;
+
+    return (result1 & 0xffff0000ULL) | ((result0 >> 16) & 0xffffULL);
+}
+
+
 target_ulong helper_add_suov(CPUTriCoreState *env, target_ulong r1,
                              target_ulong r2)
 {
@@ -619,6 +691,44 @@ uint32_t helper_absdif_h(CPUTriCoreState *env, target_ulong r1, target_ulong r2)
     return ret;
 }
 
+uint32_t helper_addr_h(CPUTriCoreState *env, uint64_t r1, uint32_t r2_l,
+                       uint32_t r2_h)
+{
+    int64_t mul_res0 = sextract64(r1, 0, 32);
+    int64_t mul_res1 = sextract64(r1, 32, 32);
+    int64_t r2_low = sextract64(r2_l, 0, 32);
+    int64_t r2_high = sextract64(r2_h, 0, 32);
+    int64_t result0, result1;
+    uint32_t ovf0, ovf1;
+    uint32_t avf0, avf1;
+
+    ovf0 = ovf1 = 0;
+
+    result0 = r2_low + mul_res0 + 0x8000;
+    result1 = r2_high + mul_res1 + 0x8000;
+
+    if ((result0 > INT32_MAX) || (result0 < INT32_MIN)) {
+        ovf0 = (1 << 31);
+    }
+
+    if ((result1 > INT32_MAX) || (result1 < INT32_MIN)) {
+        ovf1 = (1 << 31);
+    }
+
+    env->PSW_USB_V = ovf0 | ovf1;
+    env->PSW_USB_SV |= env->PSW_USB_V;
+
+    avf0 = result0 * 2u;
+    avf0 = result0 ^ avf0;
+    avf1 = result1 * 2u;
+    avf1 = result1 ^ avf1;
+
+    env->PSW_USB_AV = avf0 | avf1;
+    env->PSW_USB_SAV |= env->PSW_USB_AV;
+
+    return (result1 & 0xffff0000ULL) | ((result0 >> 16) & 0xffffULL);
+}
+
 uint32_t helper_add_b(CPUTriCoreState *env, target_ulong r1, target_ulong r2)
 {
     int32_t b, i;
diff --git a/target-tricore/translate.c b/target-tricore/translate.c
index e74077ed01..086db3d1bb 100644
--- a/target-tricore/translate.c
+++ b/target-tricore/translate.c
@@ -80,6 +80,13 @@ enum {
     BS_EXCP   = 3,
 };
 
+enum {
+    MODE_LL = 0,
+    MODE_LU = 1,
+    MODE_UL = 2,
+    MODE_UU = 3,
+};
+
 void tricore_cpu_dump_state(CPUState *cs, FILE *f,
                             fprintf_function cpu_fprintf, int flags)
 {
@@ -395,6 +402,88 @@ static inline void gen_add_d(TCGv ret, TCGv r1, TCGv r2)
     tcg_temp_free(t0);
 }
 
+static inline void
+gen_add64_d(TCGv_i64 ret, TCGv_i64 r1, TCGv_i64 r2)
+{
+    TCGv temp = tcg_temp_new();
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 result = tcg_temp_new_i64();
+
+    tcg_gen_add_i64(result, r1, r2);
+    /* calc v bit */
+    tcg_gen_xor_i64(t1, result, r1);
+    tcg_gen_xor_i64(t0, r1, r2);
+    tcg_gen_andc_i64(t1, t1, t0);
+    tcg_gen_trunc_shr_i64_i32(cpu_PSW_V, t1, 32);
+    /* calc SV bit */
+    tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V);
+    /* calc AV/SAV bits */
+    tcg_gen_trunc_shr_i64_i32(temp, result, 32);
+    tcg_gen_add_tl(cpu_PSW_AV, temp, temp);
+    tcg_gen_xor_tl(cpu_PSW_AV, temp, cpu_PSW_AV);
+    /* calc SAV */
+    tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV);
+    /* write back result */
+    tcg_gen_mov_i64(ret, result);
+
+    tcg_temp_free(temp);
+    tcg_temp_free_i64(result);
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+}
+
+static inline void
+gen_addsub64_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2,
+               TCGv r3, void(*op1)(TCGv, TCGv, TCGv),
+               void(*op2)(TCGv, TCGv, TCGv))
+{
+    TCGv temp = tcg_temp_new();
+    TCGv temp2 = tcg_temp_new();
+    TCGv temp3 = tcg_temp_new();
+    TCGv temp4 = tcg_temp_new();
+
+    (*op1)(temp, r1_low, r2);
+    /* calc V0 bit */
+    tcg_gen_xor_tl(temp2, temp, r1_low);
+    tcg_gen_xor_tl(temp3, r1_low, r2);
+    if (op1 == tcg_gen_add_tl) {
+        tcg_gen_andc_tl(temp2, temp2, temp3);
+    } else {
+        tcg_gen_and_tl(temp2, temp2, temp3);
+    }
+
+    (*op2)(temp3, r1_high, r3);
+    /* calc V1 bit */
+    tcg_gen_xor_tl(cpu_PSW_V, temp3, r1_high);
+    tcg_gen_xor_tl(temp4, r1_high, r3);
+    if (op2 == tcg_gen_add_tl) {
+        tcg_gen_andc_tl(cpu_PSW_V, cpu_PSW_V, temp4);
+    } else {
+        tcg_gen_and_tl(cpu_PSW_V, cpu_PSW_V, temp4);
+    }
+    /* combine V0/V1 bits */
+    tcg_gen_or_tl(cpu_PSW_V, cpu_PSW_V, temp2);
+    /* calc sv bit */
+    tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V);
+    /* write result */
+    tcg_gen_mov_tl(ret_low, temp);
+    tcg_gen_mov_tl(ret_high, temp3);
+    /* calc AV bit */
+    tcg_gen_add_tl(temp, ret_low, ret_low);
+    tcg_gen_xor_tl(temp, temp, ret_low);
+    tcg_gen_add_tl(cpu_PSW_AV, ret_high, ret_high);
+    tcg_gen_xor_tl(cpu_PSW_AV, cpu_PSW_AV, ret_high);
+    tcg_gen_or_tl(cpu_PSW_AV, cpu_PSW_AV, temp);
+    /* calc SAV bit */
+    tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV);
+
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+    tcg_temp_free(temp3);
+    tcg_temp_free(temp4);
+}
+
 /* ret = r2 + (r1 * r3); */
 static inline void gen_madd32_d(TCGv ret, TCGv r1, TCGv r2, TCGv r3)
 {
@@ -523,6 +612,221 @@ gen_maddui64_d(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high,
     tcg_temp_free(temp);
 }
 
+static inline void
+gen_madd_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2,
+           TCGv r3, uint32_t n, uint32_t mode)
+{
+    TCGv temp = tcg_const_i32(n);
+    TCGv temp2 = tcg_temp_new();
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+    switch (mode) {
+    case MODE_LL:
+        GEN_HELPER_LL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_LU:
+        GEN_HELPER_LU(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UL:
+        GEN_HELPER_UL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UU:
+        GEN_HELPER_UU(mul_h, temp64, r2, r3, temp);
+        break;
+    }
+    tcg_gen_extr_i64_i32(temp, temp2, temp64);
+    gen_addsub64_h(ret_low, ret_high, r1_low, r1_high, temp, temp2,
+                   tcg_gen_add_tl, tcg_gen_add_tl);
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+    tcg_temp_free_i64(temp64);
+}
+
+static inline void gen_adds(TCGv ret, TCGv r1, TCGv r2);
+
+static inline void
+gen_madds_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2,
+           TCGv r3, uint32_t n, uint32_t mode)
+{
+    TCGv temp = tcg_const_i32(n);
+    TCGv temp2 = tcg_temp_new();
+    TCGv temp3 = tcg_temp_new();
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+
+    switch (mode) {
+    case MODE_LL:
+        GEN_HELPER_LL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_LU:
+        GEN_HELPER_LU(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UL:
+        GEN_HELPER_UL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UU:
+        GEN_HELPER_UU(mul_h, temp64, r2, r3, temp);
+        break;
+    }
+    tcg_gen_extr_i64_i32(temp, temp2, temp64);
+    gen_adds(ret_low, r1_low, temp);
+    tcg_gen_mov_tl(temp, cpu_PSW_V);
+    tcg_gen_mov_tl(temp3, cpu_PSW_AV);
+    gen_adds(ret_high, r1_high, temp2);
+    /* combine v bits */
+    tcg_gen_or_tl(cpu_PSW_V, cpu_PSW_V, temp);
+    /* combine av bits */
+    tcg_gen_or_tl(cpu_PSW_AV, cpu_PSW_AV, temp3);
+
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+    tcg_temp_free(temp3);
+    tcg_temp_free_i64(temp64);
+
+}
+
+static inline void
+gen_maddm_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2,
+           TCGv r3, uint32_t n, uint32_t mode)
+{
+    TCGv temp = tcg_const_i32(n);
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+    TCGv_i64 temp64_2 = tcg_temp_new_i64();
+    TCGv_i64 temp64_3 = tcg_temp_new_i64();
+    switch (mode) {
+    case MODE_LL:
+        GEN_HELPER_LL(mulm_h, temp64, r2, r3, temp);
+        break;
+    case MODE_LU:
+        GEN_HELPER_LU(mulm_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UL:
+        GEN_HELPER_UL(mulm_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UU:
+        GEN_HELPER_UU(mulm_h, temp64, r2, r3, temp);
+        break;
+    }
+    tcg_gen_concat_i32_i64(temp64_2, r1_low, r1_high);
+    gen_add64_d(temp64_3, temp64_2, temp64);
+    /* write back result */
+    tcg_gen_extr_i64_i32(ret_low, ret_high, temp64_3);
+
+    tcg_temp_free(temp);
+    tcg_temp_free_i64(temp64);
+    tcg_temp_free_i64(temp64_2);
+    tcg_temp_free_i64(temp64_3);
+}
+
+static inline void
+gen_maddms_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2,
+           TCGv r3, uint32_t n, uint32_t mode)
+{
+    TCGv temp = tcg_const_i32(n);
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+    TCGv_i64 temp64_2 = tcg_temp_new_i64();
+    switch (mode) {
+    case MODE_LL:
+        GEN_HELPER_LL(mulm_h, temp64, r2, r3, temp);
+        break;
+    case MODE_LU:
+        GEN_HELPER_LU(mulm_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UL:
+        GEN_HELPER_UL(mulm_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UU:
+        GEN_HELPER_UU(mulm_h, temp64, r2, r3, temp);
+        break;
+    }
+    tcg_gen_concat_i32_i64(temp64_2, r1_low, r1_high);
+    gen_helper_add64_ssov(temp64, cpu_env, temp64_2, temp64);
+    tcg_gen_extr_i64_i32(ret_low, ret_high, temp64);
+
+    tcg_temp_free(temp);
+    tcg_temp_free_i64(temp64);
+    tcg_temp_free_i64(temp64_2);
+}
+
+static inline void
+gen_maddr64_h(TCGv ret, TCGv r1_low, TCGv r1_high, TCGv r2, TCGv r3, uint32_t n,
+              uint32_t mode)
+{
+    TCGv temp = tcg_const_i32(n);
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+    switch (mode) {
+    case MODE_LL:
+        GEN_HELPER_LL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_LU:
+        GEN_HELPER_LU(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UL:
+        GEN_HELPER_UL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UU:
+        GEN_HELPER_UU(mul_h, temp64, r2, r3, temp);
+        break;
+    }
+    gen_helper_addr_h(ret, cpu_env, temp64, r1_low, r1_high);
+
+    tcg_temp_free(temp);
+    tcg_temp_free_i64(temp64);
+}
+
+static inline void
+gen_maddr32_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode)
+{
+    TCGv temp = tcg_temp_new();
+    TCGv temp2 = tcg_temp_new();
+
+    tcg_gen_andi_tl(temp2, r1, 0xffff0000);
+    tcg_gen_shli_tl(temp, r1, 16);
+    gen_maddr64_h(ret, temp, temp2, r2, r3, n, mode);
+
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+}
+
+static inline void
+gen_maddr64s_h(TCGv ret, TCGv r1_low, TCGv r1_high, TCGv r2, TCGv r3,
+               uint32_t n, uint32_t mode)
+{
+    TCGv temp = tcg_const_i32(n);
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+    switch (mode) {
+    case MODE_LL:
+        GEN_HELPER_LL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_LU:
+        GEN_HELPER_LU(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UL:
+        GEN_HELPER_UL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UU:
+        GEN_HELPER_UU(mul_h, temp64, r2, r3, temp);
+        break;
+    }
+    gen_helper_addr_h_ssov(ret, cpu_env, temp64, r1_low, r1_high);
+
+    tcg_temp_free(temp);
+    tcg_temp_free_i64(temp64);
+}
+
+static inline void
+gen_maddr32s_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode)
+{
+    TCGv temp = tcg_temp_new();
+    TCGv temp2 = tcg_temp_new();
+
+    tcg_gen_andi_tl(temp2, r1, 0xffff0000);
+    tcg_gen_shli_tl(temp, r1, 16);
+    gen_maddr64s_h(ret, temp, temp2, r2, r3, n, mode);
+
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+}
+
+
 /* ret = r2 - (r1 * r3); */
 static inline void gen_msub32_d(TCGv ret, TCGv r1, TCGv r2, TCGv r3)
 {
@@ -5300,6 +5604,119 @@ static void decode_rrr2_msub(CPUTriCoreState *env, DisasContext *ctx)
     }
 }
 
+/* RRR1 format */
+static void decode_rrr1_madd(CPUTriCoreState *env, DisasContext *ctx)
+{
+    uint32_t op2;
+    uint32_t r1, r2, r3, r4, n;
+
+    op2 = MASK_OP_RRR1_OP2(ctx->opcode);
+    r1 = MASK_OP_RRR1_S1(ctx->opcode);
+    r2 = MASK_OP_RRR1_S2(ctx->opcode);
+    r3 = MASK_OP_RRR1_S3(ctx->opcode);
+    r4 = MASK_OP_RRR1_D(ctx->opcode);
+    n = MASK_OP_RRR1_N(ctx->opcode);
+
+    switch (op2) {
+    case OPC2_32_RRR1_MADD_H_LL:
+        gen_madd_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                   cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL);
+        break;
+    case OPC2_32_RRR1_MADD_H_LU:
+        gen_madd_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                   cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU);
+        break;
+    case OPC2_32_RRR1_MADD_H_UL:
+        gen_madd_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                   cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL);
+        break;
+    case OPC2_32_RRR1_MADD_H_UU:
+        gen_madd_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                   cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU);
+        break;
+    case OPC2_32_RRR1_MADDS_H_LL:
+        gen_madds_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                    cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL);
+        break;
+    case OPC2_32_RRR1_MADDS_H_LU:
+        gen_madds_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                    cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU);
+        break;
+    case OPC2_32_RRR1_MADDS_H_UL:
+        gen_madds_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                    cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL);
+        break;
+    case OPC2_32_RRR1_MADDS_H_UU:
+        gen_madds_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                    cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU);
+        break;
+    case OPC2_32_RRR1_MADDM_H_LL:
+        gen_maddm_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                    cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL);
+        break;
+    case OPC2_32_RRR1_MADDM_H_LU:
+        gen_maddm_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                    cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU);
+        break;
+    case OPC2_32_RRR1_MADDM_H_UL:
+        gen_maddm_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                    cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL);
+        break;
+    case OPC2_32_RRR1_MADDM_H_UU:
+        gen_maddm_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                    cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU);
+        break;
+    case OPC2_32_RRR1_MADDMS_H_LL:
+        gen_maddms_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                     cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL);
+        break;
+    case OPC2_32_RRR1_MADDMS_H_LU:
+        gen_maddms_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                     cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU);
+        break;
+    case OPC2_32_RRR1_MADDMS_H_UL:
+        gen_maddms_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                     cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL);
+        break;
+    case OPC2_32_RRR1_MADDMS_H_UU:
+        gen_maddms_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                     cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU);
+        break;
+    case OPC2_32_RRR1_MADDR_H_LL:
+        gen_maddr32_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                      cpu_gpr_d[r2], n, MODE_LL);
+        break;
+    case OPC2_32_RRR1_MADDR_H_LU:
+        gen_maddr32_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                      cpu_gpr_d[r2], n, MODE_LU);
+        break;
+    case OPC2_32_RRR1_MADDR_H_UL:
+        gen_maddr32_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                      cpu_gpr_d[r2], n, MODE_UL);
+        break;
+    case OPC2_32_RRR1_MADDR_H_UU:
+        gen_maddr32_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                      cpu_gpr_d[r2], n, MODE_UU);
+        break;
+    case OPC2_32_RRR1_MADDRS_H_LL:
+        gen_maddr32s_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                       cpu_gpr_d[r2], n, MODE_LL);
+        break;
+    case OPC2_32_RRR1_MADDRS_H_LU:
+        gen_maddr32s_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                       cpu_gpr_d[r2], n, MODE_LU);
+        break;
+    case OPC2_32_RRR1_MADDRS_H_UL:
+        gen_maddr32s_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                       cpu_gpr_d[r2], n, MODE_UL);
+        break;
+    case OPC2_32_RRR1_MADDRS_H_UU:
+        gen_maddr32s_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                       cpu_gpr_d[r2], n, MODE_UU);
+        break;
+    }
+}
+
 static void decode_32Bit_opc(CPUTriCoreState *env, DisasContext *ctx)
 {
     int op1;
@@ -5596,6 +6013,10 @@ static void decode_32Bit_opc(CPUTriCoreState *env, DisasContext *ctx)
     case OPCM_32_RRR2_MSUB:
         decode_rrr2_msub(env, ctx);
         break;
+/* RRR1 format */
+    case OPCM_32_RRR1_MADD:
+        decode_rrr1_madd(env, ctx);
+        break;
     }
 }
 

From b00aa8ecbc6fd7468178a0dabc7dfd0b7e1b8cd6 Mon Sep 17 00:00:00 2001
From: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Date: Tue, 3 Feb 2015 18:36:53 +0000
Subject: [PATCH 5/6] target-tricore: Add instructions of RRR1 opcode format,
 which have 0x43 as first opcode

Add helpers:
    * madd64_q_ssov: multiply two 32 bit q-format number, add them with a
                     64 bit q-format number and saturate.
    * madd32_q_add_ssov: add two 64 bit q-format numbers and return a 32 bit
                         result.
    * maddr_q_ssov: multiplay two 32 bit q-format numbers, add a 32 bit
                    q-format number and saturate.
    * maddr_q: multiplay two 32 bit q-format numbers and add a 32 bit
               q-format number.

Note: madd instructions in the q format can behave strange, e.g.
0x1 + (0x80000000 * 0x80000000) << 1 for 32 bit signed values does not cause an
overflow on the guest, because all intermediate results should be handled as if
they are indefinitely precise. We handle this by inverting the overflow bit for
all cases: a + (0x80000000 * 0x80000000) << 1.

Signed-off-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 target-tricore/helper.h          |   4 +
 target-tricore/op_helper.c       | 153 +++++++++++
 target-tricore/translate.c       | 427 +++++++++++++++++++++++++++++++
 target-tricore/tricore-opcodes.h |   8 +-
 4 files changed, 588 insertions(+), 4 deletions(-)

diff --git a/target-tricore/helper.h b/target-tricore/helper.h
index a1aa12e414..8e9eea5082 100644
--- a/target-tricore/helper.h
+++ b/target-tricore/helper.h
@@ -33,6 +33,9 @@ DEF_HELPER_3(absdif_ssov, i32, env, i32, i32)
 DEF_HELPER_4(madd32_ssov, i32, env, i32, i32, i32)
 DEF_HELPER_4(madd32_suov, i32, env, i32, i32, i32)
 DEF_HELPER_4(madd64_ssov, i64, env, i32, i64, i32)
+DEF_HELPER_5(madd64_q_ssov, i64, env, i64, i32, i32, i32)
+DEF_HELPER_3(madd32_q_add_ssov, i32, env, i64, i64)
+DEF_HELPER_5(maddr_q_ssov, i32, env, i32, i32, i32, i32)
 DEF_HELPER_4(madd64_suov, i64, env, i32, i64, i32)
 DEF_HELPER_4(msub32_ssov, i32, env, i32, i32, i32)
 DEF_HELPER_4(msub32_suov, i32, env, i32, i32, i32)
@@ -47,6 +50,7 @@ DEF_HELPER_2(abs_h, i32, env, i32)
 DEF_HELPER_3(absdif_b, i32, env, i32, i32)
 DEF_HELPER_3(absdif_h, i32, env, i32, i32)
 DEF_HELPER_4(addr_h, i32, env, i64, i32, i32)
+DEF_HELPER_5(maddr_q, i32, env, i32, i32, i32, i32)
 DEF_HELPER_3(add_b, i32, env, i32, i32)
 DEF_HELPER_3(add_h, i32, env, i32, i32)
 DEF_HELPER_3(sub_b, i32, env, i32, i32)
diff --git a/target-tricore/op_helper.c b/target-tricore/op_helper.c
index 8cd154b6b9..2755d45aa7 100644
--- a/target-tricore/op_helper.c
+++ b/target-tricore/op_helper.c
@@ -475,6 +475,131 @@ uint64_t helper_madd64_ssov(CPUTriCoreState *env, target_ulong r1,
     return ret;
 }
 
+uint32_t
+helper_madd32_q_add_ssov(CPUTriCoreState *env, uint64_t r1, uint64_t r2)
+{
+    int64_t result;
+
+    result = (r1 + r2);
+
+    env->PSW_USB_AV = (result ^ result * 2u);
+    env->PSW_USB_SAV |= env->PSW_USB_AV;
+
+    /* we do the saturation by hand, since we produce an overflow on the host
+       if the mul before was (0x80000000 * 0x80000000) << 1). If this is the
+       case, we flip the saturated value. */
+    if (r2 == 0x8000000000000000LL) {
+        if (result > 0x7fffffffLL) {
+            env->PSW_USB_V = (1 << 31);
+            env->PSW_USB_SV = (1 << 31);
+            result = INT32_MIN;
+        } else if (result < -0x80000000LL) {
+            env->PSW_USB_V = (1 << 31);
+            env->PSW_USB_SV = (1 << 31);
+            result = INT32_MAX;
+        } else {
+            env->PSW_USB_V = 0;
+        }
+    } else {
+        if (result > 0x7fffffffLL) {
+            env->PSW_USB_V = (1 << 31);
+            env->PSW_USB_SV = (1 << 31);
+            result = INT32_MAX;
+        } else if (result < -0x80000000LL) {
+            env->PSW_USB_V = (1 << 31);
+            env->PSW_USB_SV = (1 << 31);
+            result = INT32_MIN;
+        } else {
+            env->PSW_USB_V = 0;
+        }
+    }
+    return (uint32_t)result;
+}
+
+uint64_t helper_madd64_q_ssov(CPUTriCoreState *env, uint64_t r1, uint32_t r2,
+                              uint32_t r3, uint32_t n)
+{
+    int64_t t1 = (int64_t)r1;
+    int64_t t2 = sextract64(r2, 0, 32);
+    int64_t t3 = sextract64(r3, 0, 32);
+    int64_t result, mul;
+    int64_t ovf;
+
+    mul = (t2 * t3) << n;
+    result = mul + t1;
+
+    env->PSW_USB_AV = (result ^ result * 2u) >> 32;
+    env->PSW_USB_SAV |= env->PSW_USB_AV;
+
+    ovf = (result ^ mul) & ~(mul ^ t1);
+    /* we do the saturation by hand, since we produce an overflow on the host
+       if the mul was (0x80000000 * 0x80000000) << 1). If this is the
+       case, we flip the saturated value. */
+    if ((r2 == 0x80000000) && (r3 == 0x80000000) && (n == 1)) {
+        if (ovf >= 0) {
+            env->PSW_USB_V = (1 << 31);
+            env->PSW_USB_SV = (1 << 31);
+            /* ext_ret > MAX_INT */
+            if (mul < 0) {
+                result = INT64_MAX;
+            /* ext_ret < MIN_INT */
+            } else {
+               result = INT64_MIN;
+            }
+        } else {
+            env->PSW_USB_V = 0;
+        }
+    } else {
+        if (ovf < 0) {
+            env->PSW_USB_V = (1 << 31);
+            env->PSW_USB_SV = (1 << 31);
+            /* ext_ret > MAX_INT */
+            if (mul >= 0) {
+                result = INT64_MAX;
+            /* ext_ret < MIN_INT */
+            } else {
+               result = INT64_MIN;
+            }
+        } else {
+            env->PSW_USB_V = 0;
+        }
+    }
+    return (uint64_t)result;
+}
+
+uint32_t helper_maddr_q_ssov(CPUTriCoreState *env, uint32_t r1, uint32_t r2,
+                             uint32_t r3, uint32_t n)
+{
+    int64_t t1 = sextract64(r1, 0, 32);
+    int64_t t2 = sextract64(r2, 0, 32);
+    int64_t t3 = sextract64(r3, 0, 32);
+    int64_t mul, ret;
+
+    if ((t2 == -0x8000ll) && (t3 == -0x8000ll) && (n == 1)) {
+        mul = 0x7fffffff;
+    } else {
+        mul = (t2 * t3) << n;
+    }
+
+    ret = t1 + mul + 0x8000;
+
+    env->PSW_USB_AV = ret ^ ret * 2u;
+    env->PSW_USB_SAV |= env->PSW_USB_AV;
+
+    if (ret > 0x7fffffffll) {
+        env->PSW_USB_V = (1 << 31);
+        env->PSW_USB_SV |= env->PSW_USB_V;
+        ret = INT32_MAX;
+    } else if (ret < -0x80000000ll) {
+        env->PSW_USB_V = (1 << 31);
+        env->PSW_USB_SV |= env->PSW_USB_V;
+        ret = INT32_MIN;
+    } else {
+        env->PSW_USB_V = 0;
+    }
+    return ret & 0xffff0000ll;
+}
+
 uint64_t helper_madd64_suov(CPUTriCoreState *env, target_ulong r1,
                             uint64_t r2, target_ulong r3)
 {
@@ -729,6 +854,34 @@ uint32_t helper_addr_h(CPUTriCoreState *env, uint64_t r1, uint32_t r2_l,
     return (result1 & 0xffff0000ULL) | ((result0 >> 16) & 0xffffULL);
 }
 
+uint32_t helper_maddr_q(CPUTriCoreState *env, uint32_t r1, uint32_t r2,
+                        uint32_t r3, uint32_t n)
+{
+    int64_t t1 = sextract64(r1, 0, 32);
+    int64_t t2 = sextract64(r2, 0, 32);
+    int64_t t3 = sextract64(r3, 0, 32);
+    int64_t mul, ret;
+
+    if ((t2 == -0x8000ll) && (t3 == -0x8000ll) && (n == 1)) {
+        mul = 0x7fffffff;
+    } else {
+        mul = (t2 * t3) << n;
+    }
+
+    ret = t1 + mul + 0x8000;
+
+    if ((ret > 0x7fffffffll) || (ret < -0x80000000ll)) {
+        env->PSW_USB_V = (1 << 31);
+        env->PSW_USB_SV |= env->PSW_USB_V;
+    } else {
+        env->PSW_USB_V = 0;
+    }
+    env->PSW_USB_AV = ret ^ ret * 2u;
+    env->PSW_USB_SAV |= env->PSW_USB_AV;
+
+    return ret & 0xffff0000ll;
+}
+
 uint32_t helper_add_b(CPUTriCoreState *env, target_ulong r1, target_ulong r2)
 {
     int32_t b, i;
diff --git a/target-tricore/translate.c b/target-tricore/translate.c
index 086db3d1bb..4f13e4f0bd 100644
--- a/target-tricore/translate.c
+++ b/target-tricore/translate.c
@@ -826,7 +826,275 @@ gen_maddr32s_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode)
     tcg_temp_free(temp2);
 }
 
+static inline void
+gen_maddr_q(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n)
+{
+    TCGv temp = tcg_const_i32(n);
+    gen_helper_maddr_q(ret, cpu_env, r1, r2, r3, temp);
+    tcg_temp_free(temp);
+}
 
+static inline void
+gen_maddrs_q(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n)
+{
+    TCGv temp = tcg_const_i32(n);
+    gen_helper_maddr_q_ssov(ret, cpu_env, r1, r2, r3, temp);
+    tcg_temp_free(temp);
+}
+
+static inline void
+gen_madd32_q(TCGv ret, TCGv arg1, TCGv arg2, TCGv arg3, uint32_t n,
+             uint32_t up_shift, CPUTriCoreState *env)
+{
+    TCGv temp = tcg_temp_new();
+    TCGv temp2 = tcg_temp_new();
+    TCGv temp3 = tcg_temp_new();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+    TCGv_i64 t3 = tcg_temp_new_i64();
+
+    tcg_gen_ext_i32_i64(t2, arg2);
+    tcg_gen_ext_i32_i64(t3, arg3);
+
+    tcg_gen_mul_i64(t2, t2, t3);
+    tcg_gen_shli_i64(t2, t2, n);
+
+    tcg_gen_ext_i32_i64(t1, arg1);
+    tcg_gen_sari_i64(t2, t2, up_shift);
+
+    tcg_gen_add_i64(t3, t1, t2);
+    tcg_gen_trunc_i64_i32(temp3, t3);
+    /* calc v bit */
+    tcg_gen_setcondi_i64(TCG_COND_GT, t1, t3, 0x7fffffffLL);
+    tcg_gen_setcondi_i64(TCG_COND_LT, t2, t3, -0x80000000LL);
+    tcg_gen_or_i64(t1, t1, t2);
+    tcg_gen_trunc_i64_i32(cpu_PSW_V, t1);
+    tcg_gen_shli_tl(cpu_PSW_V, cpu_PSW_V, 31);
+    /* We produce an overflow on the host if the mul before was
+       (0x80000000 * 0x80000000) << 1). If this is the
+       case, we negate the ovf. */
+    if (n == 1) {
+        tcg_gen_setcondi_tl(TCG_COND_EQ, temp, arg2, 0x80000000);
+        tcg_gen_setcond_tl(TCG_COND_EQ, temp2, arg2, arg3);
+        tcg_gen_and_tl(temp, temp, temp2);
+        tcg_gen_shli_tl(temp, temp, 31);
+        /* negate v bit, if special condition */
+        tcg_gen_xor_tl(cpu_PSW_V, cpu_PSW_V, temp);
+    }
+    /* Calc SV bit */
+    tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V);
+    /* Calc AV/SAV bits */
+    tcg_gen_add_tl(cpu_PSW_AV, temp3, temp3);
+    tcg_gen_xor_tl(cpu_PSW_AV, temp3, cpu_PSW_AV);
+    /* calc SAV */
+    tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV);
+    /* write back result */
+    tcg_gen_mov_tl(ret, temp3);
+
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+    tcg_temp_free(temp3);
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+    tcg_temp_free_i64(t3);
+}
+
+static inline void
+gen_m16add32_q(TCGv ret, TCGv arg1, TCGv arg2, TCGv arg3, uint32_t n)
+{
+    TCGv temp = tcg_temp_new();
+    TCGv temp2 = tcg_temp_new();
+    if (n == 0) {
+        tcg_gen_mul_tl(temp, arg2, arg3);
+    } else { /* n is exspected to be 1 */
+        tcg_gen_mul_tl(temp, arg2, arg3);
+        tcg_gen_shli_tl(temp, temp, 1);
+        /* catch special case r1 = r2 = 0x8000 */
+        tcg_gen_setcondi_tl(TCG_COND_EQ, temp2, temp, 0x80000000);
+        tcg_gen_sub_tl(temp, temp, temp2);
+    }
+    gen_add_d(ret, arg1, temp);
+
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+}
+
+static inline void
+gen_m16adds32_q(TCGv ret, TCGv arg1, TCGv arg2, TCGv arg3, uint32_t n)
+{
+    TCGv temp = tcg_temp_new();
+    TCGv temp2 = tcg_temp_new();
+    if (n == 0) {
+        tcg_gen_mul_tl(temp, arg2, arg3);
+    } else { /* n is exspected to be 1 */
+        tcg_gen_mul_tl(temp, arg2, arg3);
+        tcg_gen_shli_tl(temp, temp, 1);
+        /* catch special case r1 = r2 = 0x8000 */
+        tcg_gen_setcondi_tl(TCG_COND_EQ, temp2, temp, 0x80000000);
+        tcg_gen_sub_tl(temp, temp, temp2);
+    }
+    gen_adds(ret, arg1, temp);
+
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+}
+
+static inline void
+gen_m16add64_q(TCGv rl, TCGv rh, TCGv arg1_low, TCGv arg1_high, TCGv arg2,
+               TCGv arg3, uint32_t n)
+{
+    TCGv temp = tcg_temp_new();
+    TCGv temp2 = tcg_temp_new();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+    TCGv_i64 t3 = tcg_temp_new_i64();
+
+    if (n == 0) {
+        tcg_gen_mul_tl(temp, arg2, arg3);
+    } else { /* n is exspected to be 1 */
+        tcg_gen_mul_tl(temp, arg2, arg3);
+        tcg_gen_shli_tl(temp, temp, 1);
+        /* catch special case r1 = r2 = 0x8000 */
+        tcg_gen_setcondi_tl(TCG_COND_EQ, temp2, temp, 0x80000000);
+        tcg_gen_sub_tl(temp, temp, temp2);
+    }
+    tcg_gen_ext_i32_i64(t2, temp);
+    tcg_gen_shli_i64(t2, t2, 16);
+    tcg_gen_concat_i32_i64(t1, arg1_low, arg1_high);
+    gen_add64_d(t3, t1, t2);
+    /* write back result */
+    tcg_gen_extr_i64_i32(rl, rh, t3);
+
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+    tcg_temp_free_i64(t3);
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+}
+
+static inline void
+gen_m16adds64_q(TCGv rl, TCGv rh, TCGv arg1_low, TCGv arg1_high, TCGv arg2,
+               TCGv arg3, uint32_t n)
+{
+    TCGv temp = tcg_temp_new();
+    TCGv temp2 = tcg_temp_new();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+
+    if (n == 0) {
+        tcg_gen_mul_tl(temp, arg2, arg3);
+    } else { /* n is exspected to be 1 */
+        tcg_gen_mul_tl(temp, arg2, arg3);
+        tcg_gen_shli_tl(temp, temp, 1);
+        /* catch special case r1 = r2 = 0x8000 */
+        tcg_gen_setcondi_tl(TCG_COND_EQ, temp2, temp, 0x80000000);
+        tcg_gen_sub_tl(temp, temp, temp2);
+    }
+    tcg_gen_ext_i32_i64(t2, temp);
+    tcg_gen_shli_i64(t2, t2, 16);
+    tcg_gen_concat_i32_i64(t1, arg1_low, arg1_high);
+
+    gen_helper_add64_ssov(t1, cpu_env, t1, t2);
+    tcg_gen_extr_i64_i32(rl, rh, t1);
+
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+}
+
+static inline void
+gen_madd64_q(TCGv rl, TCGv rh, TCGv arg1_low, TCGv arg1_high, TCGv arg2,
+             TCGv arg3, uint32_t n, CPUTriCoreState *env)
+{
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+    TCGv_i64 t3 = tcg_temp_new_i64();
+    TCGv_i64 t4 = tcg_temp_new_i64();
+    TCGv temp, temp2;
+
+    tcg_gen_concat_i32_i64(t1, arg1_low, arg1_high);
+    tcg_gen_ext_i32_i64(t2, arg2);
+    tcg_gen_ext_i32_i64(t3, arg3);
+
+    tcg_gen_mul_i64(t2, t2, t3);
+    if (n != 0) {
+        tcg_gen_shli_i64(t2, t2, 1);
+    }
+    tcg_gen_add_i64(t4, t1, t2);
+    /* calc v bit */
+    tcg_gen_xor_i64(t3, t4, t1);
+    tcg_gen_xor_i64(t2, t1, t2);
+    tcg_gen_andc_i64(t3, t3, t2);
+    tcg_gen_trunc_shr_i64_i32(cpu_PSW_V, t3, 32);
+    /* We produce an overflow on the host if the mul before was
+       (0x80000000 * 0x80000000) << 1). If this is the
+       case, we negate the ovf. */
+    if (n == 1) {
+        temp = tcg_temp_new();
+        temp2 = tcg_temp_new();
+        tcg_gen_setcondi_tl(TCG_COND_EQ, temp, arg2, 0x80000000);
+        tcg_gen_setcond_tl(TCG_COND_EQ, temp2, arg2, arg3);
+        tcg_gen_and_tl(temp, temp, temp2);
+        tcg_gen_shli_tl(temp, temp, 31);
+        /* negate v bit, if special condition */
+        tcg_gen_xor_tl(cpu_PSW_V, cpu_PSW_V, temp);
+
+        tcg_temp_free(temp);
+        tcg_temp_free(temp2);
+    }
+    /* write back result */
+    tcg_gen_extr_i64_i32(rl, rh, t4);
+    /* Calc SV bit */
+    tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V);
+    /* Calc AV/SAV bits */
+    tcg_gen_add_tl(cpu_PSW_AV, rh, rh);
+    tcg_gen_xor_tl(cpu_PSW_AV, rh, cpu_PSW_AV);
+    /* calc SAV */
+    tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV);
+
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+    tcg_temp_free_i64(t3);
+    tcg_temp_free_i64(t4);
+}
+
+static inline void
+gen_madds32_q(TCGv ret, TCGv arg1, TCGv arg2, TCGv arg3, uint32_t n,
+              uint32_t up_shift)
+{
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+    TCGv_i64 t3 = tcg_temp_new_i64();
+
+    tcg_gen_ext_i32_i64(t1, arg1);
+    tcg_gen_ext_i32_i64(t2, arg2);
+    tcg_gen_ext_i32_i64(t3, arg3);
+
+    tcg_gen_mul_i64(t2, t2, t3);
+    tcg_gen_sari_i64(t2, t2, up_shift - n);
+
+    gen_helper_madd32_q_add_ssov(ret, cpu_env, t1, t2);
+
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+    tcg_temp_free_i64(t3);
+}
+
+static inline void
+gen_madds64_q(TCGv rl, TCGv rh, TCGv arg1_low, TCGv arg1_high, TCGv arg2,
+             TCGv arg3, uint32_t n)
+{
+    TCGv_i64 r1 = tcg_temp_new_i64();
+    TCGv temp = tcg_const_i32(n);
+
+    tcg_gen_concat_i32_i64(r1, arg1_low, arg1_high);
+    gen_helper_madd64_q_ssov(r1, cpu_env, r1, arg2, arg3, temp);
+    tcg_gen_extr_i64_i32(rl, rh, r1);
+
+    tcg_temp_free_i64(r1);
+    tcg_temp_free(temp);
+}
 /* ret = r2 - (r1 * r3); */
 static inline void gen_msub32_d(TCGv ret, TCGv r1, TCGv r2, TCGv r3)
 {
@@ -5717,6 +5985,162 @@ static void decode_rrr1_madd(CPUTriCoreState *env, DisasContext *ctx)
     }
 }
 
+static void decode_rrr1_maddq_h(CPUTriCoreState *env, DisasContext *ctx)
+{
+    uint32_t op2;
+    uint32_t r1, r2, r3, r4, n;
+    TCGv temp, temp2;
+
+    op2 = MASK_OP_RRR1_OP2(ctx->opcode);
+    r1 = MASK_OP_RRR1_S1(ctx->opcode);
+    r2 = MASK_OP_RRR1_S2(ctx->opcode);
+    r3 = MASK_OP_RRR1_S3(ctx->opcode);
+    r4 = MASK_OP_RRR1_D(ctx->opcode);
+    n = MASK_OP_RRR1_N(ctx->opcode);
+
+    temp = tcg_const_i32(n);
+    temp2 = tcg_temp_new();
+
+    switch (op2) {
+    case OPC2_32_RRR1_MADD_Q_32:
+        gen_madd32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                     cpu_gpr_d[r2], n, 32, env);
+        break;
+    case OPC2_32_RRR1_MADD_Q_64:
+        gen_madd64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                     cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                     n, env);
+        break;
+    case OPC2_32_RRR1_MADD_Q_32_L:
+        tcg_gen_ext16s_tl(temp, cpu_gpr_d[r2]);
+        gen_madd32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                     temp, n, 16, env);
+        break;
+    case OPC2_32_RRR1_MADD_Q_64_L:
+        tcg_gen_ext16s_tl(temp, cpu_gpr_d[r2]);
+        gen_madd64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                     cpu_gpr_d[r3+1], cpu_gpr_d[r1], temp,
+                     n, env);
+        break;
+    case OPC2_32_RRR1_MADD_Q_32_U:
+        tcg_gen_sari_tl(temp, cpu_gpr_d[r2], 16);
+        gen_madd32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                     temp, n, 16, env);
+        break;
+    case OPC2_32_RRR1_MADD_Q_64_U:
+        tcg_gen_sari_tl(temp, cpu_gpr_d[r2], 16);
+        gen_madd64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                     cpu_gpr_d[r3+1], cpu_gpr_d[r1], temp,
+                     n, env);
+        break;
+    case OPC2_32_RRR1_MADD_Q_32_LL:
+        tcg_gen_ext16s_tl(temp, cpu_gpr_d[r1]);
+        tcg_gen_ext16s_tl(temp2, cpu_gpr_d[r2]);
+        gen_m16add32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n);
+        break;
+    case OPC2_32_RRR1_MADD_Q_64_LL:
+        tcg_gen_ext16s_tl(temp, cpu_gpr_d[r1]);
+        tcg_gen_ext16s_tl(temp2, cpu_gpr_d[r2]);
+        gen_m16add64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                       cpu_gpr_d[r3+1], temp, temp2, n);
+        break;
+    case OPC2_32_RRR1_MADD_Q_32_UU:
+        tcg_gen_sari_tl(temp, cpu_gpr_d[r1], 16);
+        tcg_gen_sari_tl(temp2, cpu_gpr_d[r2], 16);
+        gen_m16add32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n);
+        break;
+    case OPC2_32_RRR1_MADD_Q_64_UU:
+        tcg_gen_sari_tl(temp, cpu_gpr_d[r1], 16);
+        tcg_gen_sari_tl(temp2, cpu_gpr_d[r2], 16);
+        gen_m16add64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                       cpu_gpr_d[r3+1], temp, temp2, n);
+        break;
+    case OPC2_32_RRR1_MADDS_Q_32:
+        gen_madds32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                      cpu_gpr_d[r2], n, 32);
+        break;
+    case OPC2_32_RRR1_MADDS_Q_64:
+        gen_madds64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                      cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                      n);
+        break;
+    case OPC2_32_RRR1_MADDS_Q_32_L:
+        tcg_gen_ext16s_tl(temp, cpu_gpr_d[r2]);
+        gen_madds32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                      temp, n, 16);
+        break;
+    case OPC2_32_RRR1_MADDS_Q_64_L:
+        tcg_gen_ext16s_tl(temp, cpu_gpr_d[r2]);
+        gen_madds64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                      cpu_gpr_d[r3+1], cpu_gpr_d[r1], temp,
+                      n);
+        break;
+    case OPC2_32_RRR1_MADDS_Q_32_U:
+        tcg_gen_sari_tl(temp, cpu_gpr_d[r2], 16);
+        gen_madds32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                      temp, n, 16);
+        break;
+    case OPC2_32_RRR1_MADDS_Q_64_U:
+        tcg_gen_sari_tl(temp, cpu_gpr_d[r2], 16);
+        gen_madds64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                      cpu_gpr_d[r3+1], cpu_gpr_d[r1], temp,
+                      n);
+        break;
+    case OPC2_32_RRR1_MADDS_Q_32_LL:
+        tcg_gen_ext16s_tl(temp, cpu_gpr_d[r1]);
+        tcg_gen_ext16s_tl(temp2, cpu_gpr_d[r2]);
+        gen_m16adds32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n);
+        break;
+    case OPC2_32_RRR1_MADDS_Q_64_LL:
+        tcg_gen_ext16s_tl(temp, cpu_gpr_d[r1]);
+        tcg_gen_ext16s_tl(temp2, cpu_gpr_d[r2]);
+        gen_m16adds64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                        cpu_gpr_d[r3+1], temp, temp2, n);
+        break;
+    case OPC2_32_RRR1_MADDS_Q_32_UU:
+        tcg_gen_sari_tl(temp, cpu_gpr_d[r1], 16);
+        tcg_gen_sari_tl(temp2, cpu_gpr_d[r2], 16);
+        gen_m16adds32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n);
+        break;
+    case OPC2_32_RRR1_MADDS_Q_64_UU:
+        tcg_gen_sari_tl(temp, cpu_gpr_d[r1], 16);
+        tcg_gen_sari_tl(temp2, cpu_gpr_d[r2], 16);
+        gen_m16adds64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                        cpu_gpr_d[r3+1], temp, temp2, n);
+        break;
+    case OPC2_32_RRR1_MADDR_H_64_UL:
+        gen_maddr64_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r3+1],
+                      cpu_gpr_d[r1], cpu_gpr_d[r2], n, 2);
+        break;
+    case OPC2_32_RRR1_MADDRS_H_64_UL:
+        gen_maddr64s_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r3+1],
+                       cpu_gpr_d[r1], cpu_gpr_d[r2], n, 2);
+        break;
+    case OPC2_32_RRR1_MADDR_Q_32_LL:
+        tcg_gen_ext16s_tl(temp, cpu_gpr_d[r1]);
+        tcg_gen_ext16s_tl(temp2, cpu_gpr_d[r2]);
+        gen_maddr_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n);
+        break;
+    case OPC2_32_RRR1_MADDR_Q_32_UU:
+        tcg_gen_sari_tl(temp, cpu_gpr_d[r1], 16);
+        tcg_gen_sari_tl(temp2, cpu_gpr_d[r2], 16);
+        gen_maddr_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n);
+        break;
+    case OPC2_32_RRR1_MADDRS_Q_32_LL:
+        tcg_gen_ext16s_tl(temp, cpu_gpr_d[r1]);
+        tcg_gen_ext16s_tl(temp2, cpu_gpr_d[r2]);
+        gen_maddrs_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n);
+        break;
+    case OPC2_32_RRR1_MADDRS_Q_32_UU:
+        tcg_gen_sari_tl(temp, cpu_gpr_d[r1], 16);
+        tcg_gen_sari_tl(temp2, cpu_gpr_d[r2], 16);
+        gen_maddrs_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n);
+        break;
+    }
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+}
+
 static void decode_32Bit_opc(CPUTriCoreState *env, DisasContext *ctx)
 {
     int op1;
@@ -6017,6 +6441,9 @@ static void decode_32Bit_opc(CPUTriCoreState *env, DisasContext *ctx)
     case OPCM_32_RRR1_MADD:
         decode_rrr1_madd(env, ctx);
         break;
+    case OPCM_32_RRR1_MADDQ_H:
+        decode_rrr1_maddq_h(env, ctx);
+        break;
     }
 }
 
diff --git a/target-tricore/tricore-opcodes.h b/target-tricore/tricore-opcodes.h
index 30d780ea80..41c9ef60ad 100644
--- a/target-tricore/tricore-opcodes.h
+++ b/target-tricore/tricore-opcodes.h
@@ -1245,10 +1245,10 @@ enum {
     OPC2_32_RRR1_MADDS_Q_64_LL                   = 0x3d,
     OPC2_32_RRR1_MADDS_Q_32_UU                   = 0x24,
     OPC2_32_RRR1_MADDS_Q_64_UU                   = 0x3c,
-    OPC2_32_RRR1_MADDR_H_16_UL                   = 0x1e,
-    OPC2_32_RRR1_MADDRS_H_16_UL                  = 0x3e,
-    OPC2_32_RRR1_MADDR_Q_32_L                    = 0x07,
-    OPC2_32_RRR1_MADDR_Q_32_U                    = 0x06,
+    OPC2_32_RRR1_MADDR_H_64_UL                   = 0x1e,
+    OPC2_32_RRR1_MADDRS_H_64_UL                  = 0x3e,
+    OPC2_32_RRR1_MADDR_Q_32_LL                   = 0x07,
+    OPC2_32_RRR1_MADDR_Q_32_UU                   = 0x06,
     OPC2_32_RRR1_MADDRS_Q_32_LL                  = 0x27,
     OPC2_32_RRR1_MADDRS_Q_32_UU                  = 0x26,
 };

From bebe80fc78cc91c4225cfb98ef3a916b9c861c60 Mon Sep 17 00:00:00 2001
From: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Date: Fri, 6 Feb 2015 14:48:33 +0000
Subject: [PATCH 6/6] target-tricore: Add instructions of RRR1 opcode format,
 which have 0xc3 as first opcode

Add helpers helper_addsur_h/_ssov which adds one halfword and subtracts one
halfword, rounds / and saturates each half word independently.

Add microcode helper functions:
    * gen_maddsu_h/sus_h: multiply two halfwords left justified and add to the
                          first one word and subtract from the second one word
                          / and saturate each resulting word independetly.
    * gen_maddsum_h/sums_h: multiply two halfwords in q-format left justified
                            and add to the first one word and subtract from
                            the second one word / and saturate each resulting
                            word independetly.
    * gen_maddsur32_h/32s_h: multiply two halfwords in q-format left justified
                             and add to the first one word and subtract from
                             the second one word, round both results / and
                             saturate each resulting word independetly.

Signed-off-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 target-tricore/helper.h    |   2 +
 target-tricore/op_helper.c |  84 ++++++++++
 target-tricore/translate.c | 332 +++++++++++++++++++++++++++++++++++++
 3 files changed, 418 insertions(+)

diff --git a/target-tricore/helper.h b/target-tricore/helper.h
index 8e9eea5082..4c823460e1 100644
--- a/target-tricore/helper.h
+++ b/target-tricore/helper.h
@@ -22,6 +22,7 @@ DEF_HELPER_3(add_suov, i32, env, i32, i32)
 DEF_HELPER_3(add_h_ssov, i32, env, i32, i32)
 DEF_HELPER_3(add_h_suov, i32, env, i32, i32)
 DEF_HELPER_4(addr_h_ssov, i32, env, i64, i32, i32)
+DEF_HELPER_4(addsur_h_ssov, i32, env, i64, i32, i32)
 DEF_HELPER_3(sub_ssov, i32, env, i32, i32)
 DEF_HELPER_3(sub_suov, i32, env, i32, i32)
 DEF_HELPER_3(sub_h_ssov, i32, env, i32, i32)
@@ -50,6 +51,7 @@ DEF_HELPER_2(abs_h, i32, env, i32)
 DEF_HELPER_3(absdif_b, i32, env, i32, i32)
 DEF_HELPER_3(absdif_h, i32, env, i32, i32)
 DEF_HELPER_4(addr_h, i32, env, i64, i32, i32)
+DEF_HELPER_4(addsur_h, i32, env, i64, i32, i32)
 DEF_HELPER_5(maddr_q, i32, env, i32, i32, i32, i32)
 DEF_HELPER_3(add_b, i32, env, i32, i32)
 DEF_HELPER_3(add_h, i32, env, i32, i32)
diff --git a/target-tricore/op_helper.c b/target-tricore/op_helper.c
index 2755d45aa7..40d32af5d3 100644
--- a/target-tricore/op_helper.c
+++ b/target-tricore/op_helper.c
@@ -265,6 +265,52 @@ uint32_t helper_addr_h_ssov(CPUTriCoreState *env, uint64_t r1, uint32_t r2_l,
     return (result1 & 0xffff0000ULL) | ((result0 >> 16) & 0xffffULL);
 }
 
+uint32_t helper_addsur_h_ssov(CPUTriCoreState *env, uint64_t r1, uint32_t r2_l,
+                              uint32_t r2_h)
+{
+    int64_t mul_res0 = sextract64(r1, 0, 32);
+    int64_t mul_res1 = sextract64(r1, 32, 32);
+    int64_t r2_low = sextract64(r2_l, 0, 32);
+    int64_t r2_high = sextract64(r2_h, 0, 32);
+    int64_t result0, result1;
+    uint32_t ovf0, ovf1;
+    uint32_t avf0, avf1;
+
+    ovf0 = ovf1 = 0;
+
+    result0 = r2_low - mul_res0 + 0x8000;
+    result1 = r2_high + mul_res1 + 0x8000;
+
+    avf0 = result0 * 2u;
+    avf0 = result0 ^ avf0;
+    avf1 = result1 * 2u;
+    avf1 = result1 ^ avf1;
+
+    if (result0 > INT32_MAX) {
+        ovf0 = (1 << 31);
+        result0 = INT32_MAX;
+    } else if (result0 < INT32_MIN) {
+        ovf0 = (1 << 31);
+        result0 = INT32_MIN;
+    }
+
+    if (result1 > INT32_MAX) {
+        ovf1 = (1 << 31);
+        result1 = INT32_MAX;
+    } else if (result1 < INT32_MIN) {
+        ovf1 = (1 << 31);
+        result1 = INT32_MIN;
+    }
+
+    env->PSW_USB_V = ovf0 | ovf1;
+    env->PSW_USB_SV |= env->PSW_USB_V;
+
+    env->PSW_USB_AV = avf0 | avf1;
+    env->PSW_USB_SAV |= env->PSW_USB_AV;
+
+    return (result1 & 0xffff0000ULL) | ((result0 >> 16) & 0xffffULL);
+}
+
 
 target_ulong helper_add_suov(CPUTriCoreState *env, target_ulong r1,
                              target_ulong r2)
@@ -854,6 +900,44 @@ uint32_t helper_addr_h(CPUTriCoreState *env, uint64_t r1, uint32_t r2_l,
     return (result1 & 0xffff0000ULL) | ((result0 >> 16) & 0xffffULL);
 }
 
+uint32_t helper_addsur_h(CPUTriCoreState *env, uint64_t r1, uint32_t r2_l,
+                         uint32_t r2_h)
+{
+    int64_t mul_res0 = sextract64(r1, 0, 32);
+    int64_t mul_res1 = sextract64(r1, 32, 32);
+    int64_t r2_low = sextract64(r2_l, 0, 32);
+    int64_t r2_high = sextract64(r2_h, 0, 32);
+    int64_t result0, result1;
+    uint32_t ovf0, ovf1;
+    uint32_t avf0, avf1;
+
+    ovf0 = ovf1 = 0;
+
+    result0 = r2_low - mul_res0 + 0x8000;
+    result1 = r2_high + mul_res1 + 0x8000;
+
+    if ((result0 > INT32_MAX) || (result0 < INT32_MIN)) {
+        ovf0 = (1 << 31);
+    }
+
+    if ((result1 > INT32_MAX) || (result1 < INT32_MIN)) {
+        ovf1 = (1 << 31);
+    }
+
+    env->PSW_USB_V = ovf0 | ovf1;
+    env->PSW_USB_SV |= env->PSW_USB_V;
+
+    avf0 = result0 * 2u;
+    avf0 = result0 ^ avf0;
+    avf1 = result1 * 2u;
+    avf1 = result1 ^ avf1;
+
+    env->PSW_USB_AV = avf0 | avf1;
+    env->PSW_USB_SAV |= env->PSW_USB_AV;
+
+    return (result1 & 0xffff0000ULL) | ((result0 >> 16) & 0xffffULL);
+}
+
 uint32_t helper_maddr_q(CPUTriCoreState *env, uint32_t r1, uint32_t r2,
                         uint32_t r3, uint32_t n)
 {
diff --git a/target-tricore/translate.c b/target-tricore/translate.c
index 4f13e4f0bd..f720cd7fc5 100644
--- a/target-tricore/translate.c
+++ b/target-tricore/translate.c
@@ -641,6 +641,73 @@ gen_madd_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2,
     tcg_temp_free_i64(temp64);
 }
 
+static inline void
+gen_maddsu_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2,
+             TCGv r3, uint32_t n, uint32_t mode)
+{
+    TCGv temp = tcg_const_i32(n);
+    TCGv temp2 = tcg_temp_new();
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+    switch (mode) {
+    case MODE_LL:
+        GEN_HELPER_LL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_LU:
+        GEN_HELPER_LU(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UL:
+        GEN_HELPER_UL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UU:
+        GEN_HELPER_UU(mul_h, temp64, r2, r3, temp);
+        break;
+    }
+    tcg_gen_extr_i64_i32(temp, temp2, temp64);
+    gen_addsub64_h(ret_low, ret_high, r1_low, r1_high, temp, temp2,
+                   tcg_gen_sub_tl, tcg_gen_add_tl);
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+    tcg_temp_free_i64(temp64);
+}
+
+static inline void
+gen_maddsum_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2,
+              TCGv r3, uint32_t n, uint32_t mode)
+{
+    TCGv temp = tcg_const_i32(n);
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+    TCGv_i64 temp64_2 = tcg_temp_new_i64();
+    TCGv_i64 temp64_3 = tcg_temp_new_i64();
+    switch (mode) {
+    case MODE_LL:
+        GEN_HELPER_LL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_LU:
+        GEN_HELPER_LU(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UL:
+        GEN_HELPER_UL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UU:
+        GEN_HELPER_UU(mul_h, temp64, r2, r3, temp);
+        break;
+    }
+    tcg_gen_concat_i32_i64(temp64_3, r1_low, r1_high);
+    tcg_gen_sari_i64(temp64_2, temp64, 32); /* high */
+    tcg_gen_ext32s_i64(temp64, temp64); /* low */
+    tcg_gen_sub_i64(temp64, temp64_2, temp64);
+    tcg_gen_shli_i64(temp64, temp64, 16);
+
+    gen_add64_d(temp64_2, temp64_3, temp64);
+    /* write back result */
+    tcg_gen_extr_i64_i32(ret_low, ret_high, temp64_2);
+
+    tcg_temp_free(temp);
+    tcg_temp_free_i64(temp64);
+    tcg_temp_free_i64(temp64_2);
+    tcg_temp_free_i64(temp64_3);
+}
+
 static inline void gen_adds(TCGv ret, TCGv r1, TCGv r2);
 
 static inline void
@@ -683,6 +750,85 @@ gen_madds_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2,
 
 }
 
+static inline void gen_subs(TCGv ret, TCGv r1, TCGv r2);
+
+static inline void
+gen_maddsus_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2,
+              TCGv r3, uint32_t n, uint32_t mode)
+{
+    TCGv temp = tcg_const_i32(n);
+    TCGv temp2 = tcg_temp_new();
+    TCGv temp3 = tcg_temp_new();
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+
+    switch (mode) {
+    case MODE_LL:
+        GEN_HELPER_LL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_LU:
+        GEN_HELPER_LU(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UL:
+        GEN_HELPER_UL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UU:
+        GEN_HELPER_UU(mul_h, temp64, r2, r3, temp);
+        break;
+    }
+    tcg_gen_extr_i64_i32(temp, temp2, temp64);
+    gen_subs(ret_low, r1_low, temp);
+    tcg_gen_mov_tl(temp, cpu_PSW_V);
+    tcg_gen_mov_tl(temp3, cpu_PSW_AV);
+    gen_adds(ret_high, r1_high, temp2);
+    /* combine v bits */
+    tcg_gen_or_tl(cpu_PSW_V, cpu_PSW_V, temp);
+    /* combine av bits */
+    tcg_gen_or_tl(cpu_PSW_AV, cpu_PSW_AV, temp3);
+
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+    tcg_temp_free(temp3);
+    tcg_temp_free_i64(temp64);
+
+}
+
+static inline void
+gen_maddsums_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2,
+               TCGv r3, uint32_t n, uint32_t mode)
+{
+    TCGv temp = tcg_const_i32(n);
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+    TCGv_i64 temp64_2 = tcg_temp_new_i64();
+
+    switch (mode) {
+    case MODE_LL:
+        GEN_HELPER_LL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_LU:
+        GEN_HELPER_LU(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UL:
+        GEN_HELPER_UL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UU:
+        GEN_HELPER_UU(mul_h, temp64, r2, r3, temp);
+        break;
+    }
+    tcg_gen_sari_i64(temp64_2, temp64, 32); /* high */
+    tcg_gen_ext32s_i64(temp64, temp64); /* low */
+    tcg_gen_sub_i64(temp64, temp64_2, temp64);
+    tcg_gen_shli_i64(temp64, temp64, 16);
+    tcg_gen_concat_i32_i64(temp64_2, r1_low, r1_high);
+
+    gen_helper_add64_ssov(temp64, cpu_env, temp64_2, temp64);
+    tcg_gen_extr_i64_i32(ret_low, ret_high, temp64);
+
+    tcg_temp_free(temp);
+    tcg_temp_free_i64(temp64);
+    tcg_temp_free_i64(temp64_2);
+}
+
+
 static inline void
 gen_maddm_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2,
            TCGv r3, uint32_t n, uint32_t mode)
@@ -786,6 +932,36 @@ gen_maddr32_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode)
     tcg_temp_free(temp2);
 }
 
+static inline void
+gen_maddsur32_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode)
+{
+    TCGv temp = tcg_const_i32(n);
+    TCGv temp2 = tcg_temp_new();
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+    switch (mode) {
+    case MODE_LL:
+        GEN_HELPER_LL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_LU:
+        GEN_HELPER_LU(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UL:
+        GEN_HELPER_UL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UU:
+        GEN_HELPER_UU(mul_h, temp64, r2, r3, temp);
+        break;
+    }
+    tcg_gen_andi_tl(temp2, r1, 0xffff0000);
+    tcg_gen_shli_tl(temp, r1, 16);
+    gen_helper_addsur_h(ret, cpu_env, temp64, temp, temp2);
+
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+    tcg_temp_free_i64(temp64);
+}
+
+
 static inline void
 gen_maddr64s_h(TCGv ret, TCGv r1_low, TCGv r1_high, TCGv r2, TCGv r3,
                uint32_t n, uint32_t mode)
@@ -826,6 +1002,35 @@ gen_maddr32s_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode)
     tcg_temp_free(temp2);
 }
 
+static inline void
+gen_maddsur32s_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode)
+{
+    TCGv temp = tcg_const_i32(n);
+    TCGv temp2 = tcg_temp_new();
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+    switch (mode) {
+    case MODE_LL:
+        GEN_HELPER_LL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_LU:
+        GEN_HELPER_LU(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UL:
+        GEN_HELPER_UL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UU:
+        GEN_HELPER_UU(mul_h, temp64, r2, r3, temp);
+        break;
+    }
+    tcg_gen_andi_tl(temp2, r1, 0xffff0000);
+    tcg_gen_shli_tl(temp, r1, 16);
+    gen_helper_addsur_h_ssov(ret, cpu_env, temp64, temp, temp2);
+
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+    tcg_temp_free_i64(temp64);
+}
+
 static inline void
 gen_maddr_q(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n)
 {
@@ -6141,6 +6346,130 @@ static void decode_rrr1_maddq_h(CPUTriCoreState *env, DisasContext *ctx)
     tcg_temp_free(temp2);
 }
 
+static void decode_rrr1_maddsu_h(CPUTriCoreState *env, DisasContext *ctx)
+{
+    uint32_t op2;
+    uint32_t r1, r2, r3, r4, n;
+
+    op2 = MASK_OP_RRR1_OP2(ctx->opcode);
+    r1 = MASK_OP_RRR1_S1(ctx->opcode);
+    r2 = MASK_OP_RRR1_S2(ctx->opcode);
+    r3 = MASK_OP_RRR1_S3(ctx->opcode);
+    r4 = MASK_OP_RRR1_D(ctx->opcode);
+    n = MASK_OP_RRR1_N(ctx->opcode);
+
+    switch (op2) {
+    case OPC2_32_RRR1_MADDSU_H_32_LL:
+        gen_maddsu_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                     cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL);
+        break;
+    case OPC2_32_RRR1_MADDSU_H_32_LU:
+        gen_maddsu_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                     cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU);
+        break;
+    case OPC2_32_RRR1_MADDSU_H_32_UL:
+        gen_maddsu_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                     cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL);
+        break;
+    case OPC2_32_RRR1_MADDSU_H_32_UU:
+        gen_maddsu_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                     cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU);
+        break;
+    case OPC2_32_RRR1_MADDSUS_H_32_LL:
+        gen_maddsus_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                      cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                      n, MODE_LL);
+        break;
+    case OPC2_32_RRR1_MADDSUS_H_32_LU:
+        gen_maddsus_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                      cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                      n, MODE_LU);
+        break;
+    case OPC2_32_RRR1_MADDSUS_H_32_UL:
+        gen_maddsus_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                      cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                      n, MODE_UL);
+        break;
+    case OPC2_32_RRR1_MADDSUS_H_32_UU:
+        gen_maddsus_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                      cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                      n, MODE_UU);
+        break;
+    case OPC2_32_RRR1_MADDSUM_H_64_LL:
+        gen_maddsum_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                      cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                      n, MODE_LL);
+        break;
+    case OPC2_32_RRR1_MADDSUM_H_64_LU:
+        gen_maddsum_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                      cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                      n, MODE_LU);
+        break;
+    case OPC2_32_RRR1_MADDSUM_H_64_UL:
+        gen_maddsum_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                      cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                      n, MODE_UL);
+        break;
+    case OPC2_32_RRR1_MADDSUM_H_64_UU:
+        gen_maddsum_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                      cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                      n, MODE_UU);
+        break;
+    case OPC2_32_RRR1_MADDSUMS_H_64_LL:
+        gen_maddsums_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                       cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                       n, MODE_LL);
+        break;
+    case OPC2_32_RRR1_MADDSUMS_H_64_LU:
+        gen_maddsums_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                       cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                       n, MODE_LU);
+        break;
+    case OPC2_32_RRR1_MADDSUMS_H_64_UL:
+        gen_maddsums_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                       cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                       n, MODE_UL);
+        break;
+    case OPC2_32_RRR1_MADDSUMS_H_64_UU:
+        gen_maddsums_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                       cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                       n, MODE_UU);
+        break;
+    case OPC2_32_RRR1_MADDSUR_H_16_LL:
+        gen_maddsur32_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                        cpu_gpr_d[r2], n, MODE_LL);
+        break;
+    case OPC2_32_RRR1_MADDSUR_H_16_LU:
+        gen_maddsur32_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                        cpu_gpr_d[r2], n, MODE_LU);
+        break;
+    case OPC2_32_RRR1_MADDSUR_H_16_UL:
+        gen_maddsur32_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                        cpu_gpr_d[r2], n, MODE_UL);
+        break;
+    case OPC2_32_RRR1_MADDSUR_H_16_UU:
+        gen_maddsur32_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                        cpu_gpr_d[r2], n, MODE_UU);
+        break;
+    case OPC2_32_RRR1_MADDSURS_H_16_LL:
+        gen_maddsur32s_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                         cpu_gpr_d[r2], n, MODE_LL);
+        break;
+    case OPC2_32_RRR1_MADDSURS_H_16_LU:
+        gen_maddsur32s_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                         cpu_gpr_d[r2], n, MODE_LU);
+        break;
+    case OPC2_32_RRR1_MADDSURS_H_16_UL:
+        gen_maddsur32s_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                         cpu_gpr_d[r2], n, MODE_UL);
+        break;
+    case OPC2_32_RRR1_MADDSURS_H_16_UU:
+        gen_maddsur32s_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                         cpu_gpr_d[r2], n, MODE_UU);
+        break;
+    }
+}
+
 static void decode_32Bit_opc(CPUTriCoreState *env, DisasContext *ctx)
 {
     int op1;
@@ -6444,6 +6773,9 @@ static void decode_32Bit_opc(CPUTriCoreState *env, DisasContext *ctx)
     case OPCM_32_RRR1_MADDQ_H:
         decode_rrr1_maddq_h(env, ctx);
         break;
+    case OPCM_32_RRR1_MADDSU_H:
+        decode_rrr1_maddsu_h(env, ctx);
+        break;
     }
 }