qemu-patch-raspberry4/target/hexagon/imported/mpy.idef
Taylor Simpson 7cf9345c95 Hexagon (target/hexagon/imported) arch import
Imported from the Hexagon architecture library
    imported/macros.def
        The macro definitions specify instruction attributes that are applied
        to each instruction that references the macro. The generator will
        recursively apply attributes to each instruction that used the macro.
    imported/allidefs.def
        Top level instruction definition file
    imported/*.idef
        Instruction definition files
        These files are input to the first phase of the generator
        (gen_semantics.c) to create a python include file with the
        instruction semantics and attributes.  The python include
        file is fed to the second phase to generate various header files.
    imported/encode*.def
        Instruction encoding bit patterns for every instruction

Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Acked-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1612763186-18161-19-git-send-email-tsimpson@quicinc.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2021-02-18 07:48:22 -08:00

1209 lines
53 KiB
Plaintext

/*
* Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
/*
* Multiply Instructions
*/
#define STD_SP_MODES(TAG,OPER,ATR,DST,ACCSEM,SEM,OSEM,SATSEM,RNDSEM)\
Q6INSN(M2_##TAG##_hh_s0, OPER"(Rs.H32,Rt.H32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETHALF(1,RsV),fGETHALF(1,RtV))));})\
Q6INSN(M2_##TAG##_hh_s1, OPER"(Rs.H32,Rt.H32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETHALF(1,RsV),fGETHALF(1,RtV)))));})\
Q6INSN(M2_##TAG##_hl_s0, OPER"(Rs.H32,Rt.L32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETHALF(1,RsV),fGETHALF(0,RtV))));})\
Q6INSN(M2_##TAG##_hl_s1, OPER"(Rs.H32,Rt.L32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETHALF(1,RsV),fGETHALF(0,RtV)))));})\
Q6INSN(M2_##TAG##_lh_s0, OPER"(Rs.L32,Rt.H32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETHALF(0,RsV),fGETHALF(1,RtV))));})\
Q6INSN(M2_##TAG##_lh_s1, OPER"(Rs.L32,Rt.H32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETHALF(0,RsV),fGETHALF(1,RtV)))));})\
Q6INSN(M2_##TAG##_ll_s0, OPER"(Rs.L32,Rt.L32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETHALF(0,RsV),fGETHALF(0,RtV))));})\
Q6INSN(M2_##TAG##_ll_s1, OPER"(Rs.L32,Rt.L32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETHALF(0,RsV),fGETHALF(0,RtV)))));})
/*****************************************************/
/* multiply 16x16->32 signed instructions */
/*****************************************************/
STD_SP_MODES(mpy_acc, "Rx32+=mpy", ,RxV,RxV+ ,fMPY16SS, ,fPASS,fPASS)
STD_SP_MODES(mpy_nac, "Rx32-=mpy", ,RxV,RxV- ,fMPY16SS, ,fPASS,fPASS)
STD_SP_MODES(mpy_acc_sat,"Rx32+=mpy", ,RxV,RxV+ ,fMPY16SS,":sat" ,fSAT, fPASS)
STD_SP_MODES(mpy_nac_sat,"Rx32-=mpy", ,RxV,RxV- ,fMPY16SS,":sat" ,fSAT, fPASS)
STD_SP_MODES(mpy, "Rd32=mpy", ,RdV, ,fMPY16SS, ,fPASS,fPASS)
STD_SP_MODES(mpy_sat, "Rd32=mpy", ,RdV, ,fMPY16SS,":sat" ,fSAT, fPASS)
STD_SP_MODES(mpy_rnd, "Rd32=mpy", ,RdV, ,fMPY16SS,":rnd" ,fPASS,fROUND)
STD_SP_MODES(mpy_sat_rnd,"Rd32=mpy", ,RdV, ,fMPY16SS,":rnd:sat",fSAT, fROUND)
STD_SP_MODES(mpyd_acc, "Rxx32+=mpy",,RxxV,RxxV+ ,fMPY16SS, ,fPASS,fPASS)
STD_SP_MODES(mpyd_nac, "Rxx32-=mpy",,RxxV,RxxV- ,fMPY16SS, ,fPASS,fPASS)
STD_SP_MODES(mpyd, "Rdd32=mpy", ,RddV, ,fMPY16SS, ,fPASS,fPASS)
STD_SP_MODES(mpyd_rnd, "Rdd32=mpy", ,RddV, ,fMPY16SS,":rnd" ,fPASS,fROUND)
/*****************************************************/
/* multiply 16x16->32 unsigned instructions */
/*****************************************************/
#define STD_USP_MODES(TAG,OPER,ATR,DST,ACCSEM,SEM,OSEM,SATSEM,RNDSEM)\
Q6INSN(M2_##TAG##_hh_s0, OPER"(Rs.H32,Rt.H32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETUHALF(1,RsV),fGETUHALF(1,RtV))));})\
Q6INSN(M2_##TAG##_hh_s1, OPER"(Rs.H32,Rt.H32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETUHALF(1,RsV),fGETUHALF(1,RtV)))));})\
Q6INSN(M2_##TAG##_hl_s0, OPER"(Rs.H32,Rt.L32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETUHALF(1,RsV),fGETUHALF(0,RtV))));})\
Q6INSN(M2_##TAG##_hl_s1, OPER"(Rs.H32,Rt.L32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETUHALF(1,RsV),fGETUHALF(0,RtV)))));})\
Q6INSN(M2_##TAG##_lh_s0, OPER"(Rs.L32,Rt.H32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETUHALF(0,RsV),fGETUHALF(1,RtV))));})\
Q6INSN(M2_##TAG##_lh_s1, OPER"(Rs.L32,Rt.H32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETUHALF(0,RsV),fGETUHALF(1,RtV)))));})\
Q6INSN(M2_##TAG##_ll_s0, OPER"(Rs.L32,Rt.L32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETUHALF(0,RsV),fGETUHALF(0,RtV))));})\
Q6INSN(M2_##TAG##_ll_s1, OPER"(Rs.L32,Rt.L32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETUHALF(0,RsV),fGETUHALF(0,RtV)))));})
STD_USP_MODES(mpyu_acc, "Rx32+=mpyu", ,RxV,RxV+ ,fMPY16UU, ,fPASS,fPASS)
STD_USP_MODES(mpyu_nac, "Rx32-=mpyu", ,RxV,RxV- ,fMPY16UU, ,fPASS,fPASS)
STD_USP_MODES(mpyu, "Rd32=mpyu", ATTRIBS() ,RdV, ,fMPY16UU, ,fPASS,fPASS)
STD_USP_MODES(mpyud_acc, "Rxx32+=mpyu",,RxxV,RxxV+,fMPY16UU, ,fPASS,fPASS)
STD_USP_MODES(mpyud_nac, "Rxx32-=mpyu",,RxxV,RxxV-,fMPY16UU, ,fPASS,fPASS)
STD_USP_MODES(mpyud, "Rdd32=mpyu", ATTRIBS() ,RddV, ,fMPY16UU, ,fPASS,fPASS)
/**********************************************/
/* mpy 16x#s8->32 */
/**********************************************/
Q6INSN(M2_mpysip,"Rd32=+mpyi(Rs32,#u8)",ATTRIBS(A_ARCHV2),
"32-bit Multiply by unsigned immediate",
{ fIMMEXT(uiV); RdV=RsV*uiV; })
Q6INSN(M2_mpysin,"Rd32=-mpyi(Rs32,#u8)",ATTRIBS(A_ARCHV2),
"32-bit Multiply by unsigned immediate, negate result",
{ RdV=RsV*-uiV; })
Q6INSN(M2_macsip,"Rx32+=mpyi(Rs32,#u8)",ATTRIBS(A_ARCHV2),
"32-bit Multiply-Add by unsigned immediate",
{ fIMMEXT(uiV); RxV=RxV + (RsV*uiV);})
Q6INSN(M2_macsin,"Rx32-=mpyi(Rs32,#u8)",ATTRIBS(A_ARCHV2),
"32-bit Multiply-Subtract by unsigned immediate",
{ fIMMEXT(uiV); RxV=RxV - (RsV*uiV);})
/**********************************************/
/* multiply/mac 32x32->64 instructions */
/**********************************************/
Q6INSN(M2_dpmpyss_s0, "Rdd32=mpy(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RddV=fMPY32SS(RsV,RtV);})
Q6INSN(M2_dpmpyss_acc_s0,"Rxx32+=mpy(Rs32,Rt32)",ATTRIBS(),"Multiply 32x32",{RxxV= RxxV + fMPY32SS(RsV,RtV);})
Q6INSN(M2_dpmpyss_nac_s0,"Rxx32-=mpy(Rs32,Rt32)",ATTRIBS(),"Multiply 32x32",{RxxV= RxxV - fMPY32SS(RsV,RtV);})
Q6INSN(M2_dpmpyuu_s0, "Rdd32=mpyu(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RddV=fMPY32UU(fCAST4u(RsV),fCAST4u(RtV));})
Q6INSN(M2_dpmpyuu_acc_s0,"Rxx32+=mpyu(Rs32,Rt32)",ATTRIBS(),"Multiply 32x32",{RxxV= RxxV + fMPY32UU(fCAST4u(RsV),fCAST4u(RtV));})
Q6INSN(M2_dpmpyuu_nac_s0,"Rxx32-=mpyu(Rs32,Rt32)",ATTRIBS(),"Multiply 32x32",{RxxV= RxxV - fMPY32UU(fCAST4u(RsV),fCAST4u(RtV));})
/******************************************************/
/* multiply/mac 32x32->32 (upper) instructions */
/******************************************************/
Q6INSN(M2_mpy_up, "Rd32=mpy(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RdV=fMPY32SS(RsV,RtV)>>32;})
Q6INSN(M2_mpy_up_s1, "Rd32=mpy(Rs32,Rt32):<<1", ATTRIBS(),"Multiply 32x32",{RdV=fMPY32SS(RsV,RtV)>>31;})
Q6INSN(M2_mpy_up_s1_sat, "Rd32=mpy(Rs32,Rt32):<<1:sat", ATTRIBS(),"Multiply 32x32",{RdV=fSAT(fMPY32SS(RsV,RtV)>>31);})
Q6INSN(M2_mpyu_up, "Rd32=mpyu(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RdV=fMPY32UU(fCAST4u(RsV),fCAST4u(RtV))>>32;})
Q6INSN(M2_mpysu_up, "Rd32=mpysu(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RdV=fMPY32SU(RsV,fCAST4u(RtV))>>32;})
Q6INSN(M2_dpmpyss_rnd_s0,"Rd32=mpy(Rs32,Rt32):rnd", ATTRIBS(),"Multiply 32x32",{RdV=(fMPY32SS(RsV,RtV)+fCONSTLL(0x80000000))>>32;})
Q6INSN(M4_mac_up_s1_sat, "Rx32+=mpy(Rs32,Rt32):<<1:sat", ATTRIBS(),"Multiply 32x32",{RxV=fSAT( (fSE32_64(RxV)) + (fMPY32SS(RsV,RtV)>>31));})
Q6INSN(M4_nac_up_s1_sat, "Rx32-=mpy(Rs32,Rt32):<<1:sat", ATTRIBS(),"Multiply 32x32",{RxV=fSAT( (fSE32_64(RxV)) - (fMPY32SS(RsV,RtV)>>31));})
/**********************************************/
/* 32x32->32 multiply (lower) */
/**********************************************/
Q6INSN(M2_mpyi,"Rd32=mpyi(Rs32,Rt32)",ATTRIBS(),
"Multiply Integer",
{ RdV=RsV*RtV;})
Q6INSN(M2_maci,"Rx32+=mpyi(Rs32,Rt32)",ATTRIBS(A_ARCHV2),
"Multiply-Accumulate Integer",
{ RxV=RxV + RsV*RtV;})
Q6INSN(M2_mnaci,"Rx32-=mpyi(Rs32,Rt32)",ATTRIBS(A_ARCHV2),
"Multiply-Neg-Accumulate Integer",
{ RxV=RxV - RsV*RtV;})
/****** WHY ARE THESE IN MPY.IDEF? **********/
Q6INSN(M2_acci,"Rx32+=add(Rs32,Rt32)",ATTRIBS(A_ARCHV2),
"Add with accumulate",
{ RxV=RxV + RsV + RtV;})
Q6INSN(M2_accii,"Rx32+=add(Rs32,#s8)",ATTRIBS(A_ARCHV2),
"Add with accumulate",
{ fIMMEXT(siV); RxV=RxV + RsV + siV;})
Q6INSN(M2_nacci,"Rx32-=add(Rs32,Rt32)",ATTRIBS(A_ARCHV2),
"Add with neg accumulate",
{ RxV=RxV - (RsV + RtV);})
Q6INSN(M2_naccii,"Rx32-=add(Rs32,#s8)",ATTRIBS(A_ARCHV2),
"Add with neg accumulate",
{ fIMMEXT(siV); RxV=RxV - (RsV + siV);})
Q6INSN(M2_subacc,"Rx32+=sub(Rt32,Rs32)",ATTRIBS(A_ARCHV2),
"Sub with accumulate",
{ RxV=RxV + RtV - RsV;})
Q6INSN(M4_mpyrr_addr,"Ry32=add(Ru32,mpyi(Ry32,Rs32))",ATTRIBS(),
"Mpy by immed and add immed",
{ RyV = RuV + RsV*RyV;})
Q6INSN(M4_mpyri_addr_u2,"Rd32=add(Ru32,mpyi(#u6:2,Rs32))",ATTRIBS(),
"Mpy by immed and add immed",
{ RdV = RuV + RsV*uiV;})
Q6INSN(M4_mpyri_addr,"Rd32=add(Ru32,mpyi(Rs32,#u6))",ATTRIBS(),
"Mpy by immed and add immed",
{ fIMMEXT(uiV); RdV = RuV + RsV*uiV;})
Q6INSN(M4_mpyri_addi,"Rd32=add(#u6,mpyi(Rs32,#U6))",ATTRIBS(),
"Mpy by immed and add immed",
{ fIMMEXT(uiV); RdV = uiV + RsV*UiV;})
Q6INSN(M4_mpyrr_addi,"Rd32=add(#u6,mpyi(Rs32,Rt32))",ATTRIBS(),
"Mpy by immed and add immed",
{ fIMMEXT(uiV); RdV = uiV + RsV*RtV;})
/**********************************************/
/* vector mac 2x[16x16 -> 32] */
/**********************************************/
#undef vmac_sema
#define vmac_sema(N)\
{ fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)))));\
fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)))));\
}
Q6INSN(M2_vmpy2s_s0,"Rdd32=vmpyh(Rs32,Rt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0))
Q6INSN(M2_vmpy2s_s1,"Rdd32=vmpyh(Rs32,Rt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1))
#undef vmac_sema
#define vmac_sema(N)\
{ fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)))));\
fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)))));\
}
Q6INSN(M2_vmac2s_s0,"Rxx32+=vmpyh(Rs32,Rt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0))
Q6INSN(M2_vmac2s_s1,"Rxx32+=vmpyh(Rs32,Rt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1))
#undef vmac_sema
#define vmac_sema(N)\
{ fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SU(fGETHALF(0,RsV),fGETUHALF(0,RtV)))));\
fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SU(fGETHALF(1,RsV),fGETUHALF(1,RtV)))));\
}
Q6INSN(M2_vmpy2su_s0,"Rdd32=vmpyhsu(Rs32,Rt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0))
Q6INSN(M2_vmpy2su_s1,"Rdd32=vmpyhsu(Rs32,Rt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1))
#undef vmac_sema
#define vmac_sema(N)\
{ fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SU(fGETHALF(0,RsV),fGETUHALF(0,RtV)))));\
fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SU(fGETHALF(1,RsV),fGETUHALF(1,RtV)))));\
}
Q6INSN(M2_vmac2su_s0,"Rxx32+=vmpyhsu(Rs32,Rt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0))
Q6INSN(M2_vmac2su_s1,"Rxx32+=vmpyhsu(Rs32,Rt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1))
#undef vmac_sema
#define vmac_sema(N)\
{ fSETHALF(1,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV))) + 0x8000))));\
fSETHALF(0,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) + 0x8000))));\
}
Q6INSN(M2_vmpy2s_s0pack,"Rd32=vmpyh(Rs32,Rt32):rnd:sat",ATTRIBS(A_ARCHV2),"Vector Multiply",vmac_sema(0))
Q6INSN(M2_vmpy2s_s1pack,"Rd32=vmpyh(Rs32,Rt32):<<1:rnd:sat",ATTRIBS(A_ARCHV2),"Vector Multiply",vmac_sema(1))
#undef vmac_sema
#define vmac_sema(N)\
{ fSETWORD(0,RxxV,fGETWORD(0,RxxV) + fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)));\
fSETWORD(1,RxxV,fGETWORD(1,RxxV) + fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)));\
}
Q6INSN(M2_vmac2,"Rxx32+=vmpyh(Rs32,Rt32)",ATTRIBS(A_ARCHV2),"Vector Multiply",vmac_sema(0))
#undef vmac_sema
#define vmac_sema(N)\
{ fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)))));\
fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)))));\
}
Q6INSN(M2_vmpy2es_s0,"Rdd32=vmpyeh(Rss32,Rtt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0))
Q6INSN(M2_vmpy2es_s1,"Rdd32=vmpyeh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1))
#undef vmac_sema
#define vmac_sema(N)\
{ fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)))));\
fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)))));\
}
Q6INSN(M2_vmac2es_s0,"Rxx32+=vmpyeh(Rss32,Rtt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0))
Q6INSN(M2_vmac2es_s1,"Rxx32+=vmpyeh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1))
#undef vmac_sema
#define vmac_sema(N)\
{ fSETWORD(0,RxxV,fGETWORD(0,RxxV) + fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)));\
fSETWORD(1,RxxV,fGETWORD(1,RxxV) + fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)));\
}
Q6INSN(M2_vmac2es,"Rxx32+=vmpyeh(Rss32,Rtt32)",ATTRIBS(A_ARCHV2),"Vector Multiply",vmac_sema(0))
/********************************************************/
/* vrmpyh, aka Big Mac, aka Mac Daddy, aka Mac-ac-ac-ac */
/* vector mac 4x[16x16] + 64 ->64 */
/********************************************************/
#undef vmac_sema
#define vmac_sema(N)\
{ RxxV = RxxV + fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))\
+ fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV))\
+ fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))\
+ fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\
}
Q6INSN(M2_vrmac_s0,"Rxx32+=vrmpyh(Rss32,Rtt32)",ATTRIBS(),"Vector Multiply",vmac_sema(0))
#undef vmac_sema
#define vmac_sema(N)\
{ RddV = fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))\
+ fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV))\
+ fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))\
+ fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\
}
Q6INSN(M2_vrmpy_s0,"Rdd32=vrmpyh(Rss32,Rtt32)",ATTRIBS(),"Vector Multiply",vmac_sema(0))
/******************************************************/
/* vector dual macs. just like complex */
/******************************************************/
/* With round&pack */
#undef dmpy_sema
#define dmpy_sema(N)\
{ fSETHALF(0,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))) + \
fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV))) + 0x8000))));\
fSETHALF(1,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))) + \
fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV))) + 0x8000))));\
}
Q6INSN(M2_vdmpyrs_s0,"Rd32=vdmpy(Rss32,Rtt32):rnd:sat",ATTRIBS(), "vector dual mac w/ round&pack",dmpy_sema(0))
Q6INSN(M2_vdmpyrs_s1,"Rd32=vdmpy(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"vector dual mac w/ round&pack",dmpy_sema(1))
/******************************************************/
/* vector byte multiplies */
/******************************************************/
Q6INSN(M5_vrmpybuu,"Rdd32=vrmpybu(Rss32,Rtt32)",ATTRIBS(),
"vector dual mpy bytes",
{
fSETWORD(0,RddV,(fMPY16SS(fGETUBYTE(0,RssV),fGETUBYTE(0,RttV)) +
fMPY16SS(fGETUBYTE(1,RssV),fGETUBYTE(1,RttV)) +
fMPY16SS(fGETUBYTE(2,RssV),fGETUBYTE(2,RttV)) +
fMPY16SS(fGETUBYTE(3,RssV),fGETUBYTE(3,RttV))));
fSETWORD(1,RddV,(fMPY16SS(fGETUBYTE(4,RssV),fGETUBYTE(4,RttV)) +
fMPY16SS(fGETUBYTE(5,RssV),fGETUBYTE(5,RttV)) +
fMPY16SS(fGETUBYTE(6,RssV),fGETUBYTE(6,RttV)) +
fMPY16SS(fGETUBYTE(7,RssV),fGETUBYTE(7,RttV))));
})
Q6INSN(M5_vrmacbuu,"Rxx32+=vrmpybu(Rss32,Rtt32)",ATTRIBS(),
"vector dual mac bytes",
{
fSETWORD(0,RxxV,(fGETWORD(0,RxxV) +
fMPY16SS(fGETUBYTE(0,RssV),fGETUBYTE(0,RttV)) +
fMPY16SS(fGETUBYTE(1,RssV),fGETUBYTE(1,RttV)) +
fMPY16SS(fGETUBYTE(2,RssV),fGETUBYTE(2,RttV)) +
fMPY16SS(fGETUBYTE(3,RssV),fGETUBYTE(3,RttV))));
fSETWORD(1,RxxV,(fGETWORD(1,RxxV) +
fMPY16SS(fGETUBYTE(4,RssV),fGETUBYTE(4,RttV)) +
fMPY16SS(fGETUBYTE(5,RssV),fGETUBYTE(5,RttV)) +
fMPY16SS(fGETUBYTE(6,RssV),fGETUBYTE(6,RttV)) +
fMPY16SS(fGETUBYTE(7,RssV),fGETUBYTE(7,RttV))));
})
Q6INSN(M5_vrmpybsu,"Rdd32=vrmpybsu(Rss32,Rtt32)",ATTRIBS(),
"vector dual mpy bytes",
{
fSETWORD(0,RddV,(fMPY16SS(fGETBYTE(0,RssV),fGETUBYTE(0,RttV)) +
fMPY16SS(fGETBYTE(1,RssV),fGETUBYTE(1,RttV)) +
fMPY16SS(fGETBYTE(2,RssV),fGETUBYTE(2,RttV)) +
fMPY16SS(fGETBYTE(3,RssV),fGETUBYTE(3,RttV))));
fSETWORD(1,RddV,(fMPY16SS(fGETBYTE(4,RssV),fGETUBYTE(4,RttV)) +
fMPY16SS(fGETBYTE(5,RssV),fGETUBYTE(5,RttV)) +
fMPY16SS(fGETBYTE(6,RssV),fGETUBYTE(6,RttV)) +
fMPY16SS(fGETBYTE(7,RssV),fGETUBYTE(7,RttV))));
})
Q6INSN(M5_vrmacbsu,"Rxx32+=vrmpybsu(Rss32,Rtt32)",ATTRIBS(),
"vector dual mac bytes",
{
fSETWORD(0,RxxV,(fGETWORD(0,RxxV) +
fMPY16SS(fGETBYTE(0,RssV),fGETUBYTE(0,RttV)) +
fMPY16SS(fGETBYTE(1,RssV),fGETUBYTE(1,RttV)) +
fMPY16SS(fGETBYTE(2,RssV),fGETUBYTE(2,RttV)) +
fMPY16SS(fGETBYTE(3,RssV),fGETUBYTE(3,RttV))));
fSETWORD(1,RxxV,(fGETWORD(1,RxxV) +
fMPY16SS(fGETBYTE(4,RssV),fGETUBYTE(4,RttV)) +
fMPY16SS(fGETBYTE(5,RssV),fGETUBYTE(5,RttV)) +
fMPY16SS(fGETBYTE(6,RssV),fGETUBYTE(6,RttV)) +
fMPY16SS(fGETBYTE(7,RssV),fGETUBYTE(7,RttV))));
})
Q6INSN(M5_vmpybuu,"Rdd32=vmpybu(Rs32,Rt32)",ATTRIBS(),
"vector mpy bytes",
{
fSETHALF(0,RddV,(fMPY16SS(fGETUBYTE(0,RsV),fGETUBYTE(0,RtV))));
fSETHALF(1,RddV,(fMPY16SS(fGETUBYTE(1,RsV),fGETUBYTE(1,RtV))));
fSETHALF(2,RddV,(fMPY16SS(fGETUBYTE(2,RsV),fGETUBYTE(2,RtV))));
fSETHALF(3,RddV,(fMPY16SS(fGETUBYTE(3,RsV),fGETUBYTE(3,RtV))));
})
Q6INSN(M5_vmpybsu,"Rdd32=vmpybsu(Rs32,Rt32)",ATTRIBS(),
"vector mpy bytes",
{
fSETHALF(0,RddV,(fMPY16SS(fGETBYTE(0,RsV),fGETUBYTE(0,RtV))));
fSETHALF(1,RddV,(fMPY16SS(fGETBYTE(1,RsV),fGETUBYTE(1,RtV))));
fSETHALF(2,RddV,(fMPY16SS(fGETBYTE(2,RsV),fGETUBYTE(2,RtV))));
fSETHALF(3,RddV,(fMPY16SS(fGETBYTE(3,RsV),fGETUBYTE(3,RtV))));
})
Q6INSN(M5_vmacbuu,"Rxx32+=vmpybu(Rs32,Rt32)",ATTRIBS(),
"vector mac bytes",
{
fSETHALF(0,RxxV,(fGETHALF(0,RxxV)+fMPY16SS(fGETUBYTE(0,RsV),fGETUBYTE(0,RtV))));
fSETHALF(1,RxxV,(fGETHALF(1,RxxV)+fMPY16SS(fGETUBYTE(1,RsV),fGETUBYTE(1,RtV))));
fSETHALF(2,RxxV,(fGETHALF(2,RxxV)+fMPY16SS(fGETUBYTE(2,RsV),fGETUBYTE(2,RtV))));
fSETHALF(3,RxxV,(fGETHALF(3,RxxV)+fMPY16SS(fGETUBYTE(3,RsV),fGETUBYTE(3,RtV))));
})
Q6INSN(M5_vmacbsu,"Rxx32+=vmpybsu(Rs32,Rt32)",ATTRIBS(),
"vector mac bytes",
{
fSETHALF(0,RxxV,(fGETHALF(0,RxxV)+fMPY16SS(fGETBYTE(0,RsV),fGETUBYTE(0,RtV))));
fSETHALF(1,RxxV,(fGETHALF(1,RxxV)+fMPY16SS(fGETBYTE(1,RsV),fGETUBYTE(1,RtV))));
fSETHALF(2,RxxV,(fGETHALF(2,RxxV)+fMPY16SS(fGETBYTE(2,RsV),fGETUBYTE(2,RtV))));
fSETHALF(3,RxxV,(fGETHALF(3,RxxV)+fMPY16SS(fGETBYTE(3,RsV),fGETUBYTE(3,RtV))));
})
Q6INSN(M5_vdmpybsu,"Rdd32=vdmpybsu(Rss32,Rtt32):sat",ATTRIBS(),
"vector quad mpy bytes",
{
fSETHALF(0,RddV,fSATN(16,(fMPY16SS(fGETBYTE(0,RssV),fGETUBYTE(0,RttV)) +
fMPY16SS(fGETBYTE(1,RssV),fGETUBYTE(1,RttV)))));
fSETHALF(1,RddV,fSATN(16,(fMPY16SS(fGETBYTE(2,RssV),fGETUBYTE(2,RttV)) +
fMPY16SS(fGETBYTE(3,RssV),fGETUBYTE(3,RttV)))));
fSETHALF(2,RddV,fSATN(16,(fMPY16SS(fGETBYTE(4,RssV),fGETUBYTE(4,RttV)) +
fMPY16SS(fGETBYTE(5,RssV),fGETUBYTE(5,RttV)))));
fSETHALF(3,RddV,fSATN(16,(fMPY16SS(fGETBYTE(6,RssV),fGETUBYTE(6,RttV)) +
fMPY16SS(fGETBYTE(7,RssV),fGETUBYTE(7,RttV)))));
})
Q6INSN(M5_vdmacbsu,"Rxx32+=vdmpybsu(Rss32,Rtt32):sat",ATTRIBS(),
"vector quad mac bytes",
{
fSETHALF(0,RxxV,fSATN(16,(fGETHALF(0,RxxV) +
fMPY16SS(fGETBYTE(0,RssV),fGETUBYTE(0,RttV)) +
fMPY16SS(fGETBYTE(1,RssV),fGETUBYTE(1,RttV)))));
fSETHALF(1,RxxV,fSATN(16,(fGETHALF(1,RxxV) +
fMPY16SS(fGETBYTE(2,RssV),fGETUBYTE(2,RttV)) +
fMPY16SS(fGETBYTE(3,RssV),fGETUBYTE(3,RttV)))));
fSETHALF(2,RxxV,fSATN(16,(fGETHALF(2,RxxV) +
fMPY16SS(fGETBYTE(4,RssV),fGETUBYTE(4,RttV)) +
fMPY16SS(fGETBYTE(5,RssV),fGETUBYTE(5,RttV)))));
fSETHALF(3,RxxV,fSATN(16,(fGETHALF(3,RxxV) +
fMPY16SS(fGETBYTE(6,RssV),fGETUBYTE(6,RttV)) +
fMPY16SS(fGETBYTE(7,RssV),fGETUBYTE(7,RttV)))));
})
/* Full version */
#undef dmpy_sema
#define dmpy_sema(N)\
{ fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))) + \
fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)))));\
fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))) + \
fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV)))));\
}
Q6INSN(M2_vdmacs_s0,"Rxx32+=vdmpy(Rss32,Rtt32):sat",ATTRIBS(), "",dmpy_sema(0))
Q6INSN(M2_vdmacs_s1,"Rxx32+=vdmpy(Rss32,Rtt32):<<1:sat",ATTRIBS(),"",dmpy_sema(1))
#undef dmpy_sema
#define dmpy_sema(N)\
{ fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))) + \
fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)))));\
fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))) + \
fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV)))));\
}
Q6INSN(M2_vdmpys_s0,"Rdd32=vdmpy(Rss32,Rtt32):sat",ATTRIBS(), "",dmpy_sema(0))
Q6INSN(M2_vdmpys_s1,"Rdd32=vdmpy(Rss32,Rtt32):<<1:sat",ATTRIBS(),"",dmpy_sema(1))
/******************************************************/
/* complex multiply/mac with */
/* real&imag are packed together and always saturated */
/* to protect against overflow. */
/******************************************************/
#undef cmpy_sema
#define cmpy_sema(N,CONJMINUS,CONJPLUS)\
{ fSETHALF(1,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV))) CONJMINUS \
fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV))) + 0x8000))));\
fSETHALF(0,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) CONJPLUS \
fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV))) + 0x8000))));\
}
Q6INSN(M2_cmpyrs_s0,"Rd32=cmpy(Rs32,Rt32):rnd:sat",ATTRIBS(), "Complex Multiply",cmpy_sema(0,+,-))
Q6INSN(M2_cmpyrs_s1,"Rd32=cmpy(Rs32,Rt32):<<1:rnd:sat",ATTRIBS(),"Complex Multiply",cmpy_sema(1,+,-))
Q6INSN(M2_cmpyrsc_s0,"Rd32=cmpy(Rs32,Rt32*):rnd:sat",ATTRIBS(A_ARCHV2), "Complex Multiply",cmpy_sema(0,-,+))
Q6INSN(M2_cmpyrsc_s1,"Rd32=cmpy(Rs32,Rt32*):<<1:rnd:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,-,+))
#undef cmpy_sema
#define cmpy_sema(N,CONJMINUS,CONJPLUS)\
{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV))) CONJMINUS \
fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV)))));\
fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) CONJPLUS \
fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)))));\
}
Q6INSN(M2_cmacs_s0,"Rxx32+=cmpy(Rs32,Rt32):sat",ATTRIBS(), "Complex Multiply",cmpy_sema(0,+,-))
Q6INSN(M2_cmacs_s1,"Rxx32+=cmpy(Rs32,Rt32):<<1:sat",ATTRIBS(),"Complex Multiply",cmpy_sema(1,+,-))
/* EJP: Need mac versions w/ CONJ T? */
Q6INSN(M2_cmacsc_s0,"Rxx32+=cmpy(Rs32,Rt32*):sat",ATTRIBS(A_ARCHV2), "Complex Multiply",cmpy_sema(0,-,+))
Q6INSN(M2_cmacsc_s1,"Rxx32+=cmpy(Rs32,Rt32*):<<1:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,-,+))
#undef cmpy_sema
#define cmpy_sema(N,CONJMINUS,CONJPLUS)\
{ fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV))) CONJMINUS \
fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV)))));\
fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) CONJPLUS \
fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)))));\
}
Q6INSN(M2_cmpys_s0,"Rdd32=cmpy(Rs32,Rt32):sat",ATTRIBS(), "Complex Multiply",cmpy_sema(0,+,-))
Q6INSN(M2_cmpys_s1,"Rdd32=cmpy(Rs32,Rt32):<<1:sat",ATTRIBS(),"Complex Multiply",cmpy_sema(1,+,-))
Q6INSN(M2_cmpysc_s0,"Rdd32=cmpy(Rs32,Rt32*):sat",ATTRIBS(A_ARCHV2), "Complex Multiply",cmpy_sema(0,-,+))
Q6INSN(M2_cmpysc_s1,"Rdd32=cmpy(Rs32,Rt32*):<<1:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,-,+))
#undef cmpy_sema
#define cmpy_sema(N,CONJMINUS,CONJPLUS)\
{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) - (fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV))) CONJMINUS \
fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV))))));\
fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) - (fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) CONJPLUS \
fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV))))));\
}
Q6INSN(M2_cnacs_s0,"Rxx32-=cmpy(Rs32,Rt32):sat",ATTRIBS(A_ARCHV2), "Complex Multiply",cmpy_sema(0,+,-))
Q6INSN(M2_cnacs_s1,"Rxx32-=cmpy(Rs32,Rt32):<<1:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,+,-))
/* EJP: need CONJ versions? */
Q6INSN(M2_cnacsc_s0,"Rxx32-=cmpy(Rs32,Rt32*):sat",ATTRIBS(A_ARCHV2), "Complex Multiply",cmpy_sema(0,-,+))
Q6INSN(M2_cnacsc_s1,"Rxx32-=cmpy(Rs32,Rt32*):<<1:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,-,+))
/******************************************************/
/* complex interpolation */
/* Given a pair of complex values, scale by a,b, sum */
/* Saturate/shift1 and round/pack */
/******************************************************/
#undef vrcmpys_sema
#define vrcmpys_sema(N,INWORD) \
{ fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,INWORD))) + \
fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(1,INWORD)))));\
fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,INWORD))) + \
fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(1,INWORD)))));\
}
Q6INSN(M2_vrcmpys_s1_h,"Rdd32=vrcmpys(Rss32,Rtt32):<<1:sat:raw:hi",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(1,RttV)))
Q6INSN(M2_vrcmpys_s1_l,"Rdd32=vrcmpys(Rss32,Rtt32):<<1:sat:raw:lo",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(0,RttV)))
#undef vrcmpys_sema
#define vrcmpys_sema(N,INWORD) \
{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,INWORD))) + \
fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(1,INWORD)))));\
fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,INWORD))) + \
fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(1,INWORD)))));\
}
Q6INSN(M2_vrcmpys_acc_s1_h,"Rxx32+=vrcmpys(Rss32,Rtt32):<<1:sat:raw:hi",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(1,RttV)))
Q6INSN(M2_vrcmpys_acc_s1_l,"Rxx32+=vrcmpys(Rss32,Rtt32):<<1:sat:raw:lo",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(0,RttV)))
#undef vrcmpys_sema
#define vrcmpys_sema(N,INWORD) \
{ fSETHALF(1,RdV,fGETHALF(1,fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,INWORD))) + \
fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(1,INWORD))) + 0x8000)));\
fSETHALF(0,RdV,fGETHALF(1,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,INWORD))) + \
fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(1,INWORD))) + 0x8000)));\
}
Q6INSN(M2_vrcmpys_s1rp_h,"Rd32=vrcmpys(Rss32,Rtt32):<<1:rnd:sat:raw:hi",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(1,RttV)))
Q6INSN(M2_vrcmpys_s1rp_l,"Rd32=vrcmpys(Rss32,Rtt32):<<1:rnd:sat:raw:lo",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(0,RttV)))
/**************************************************************/
/* mixed mode 32x16 vector dual multiplies */
/* */
/**************************************************************/
/* SIGNED 32 x SIGNED 16 */
#undef mixmpy_sema
#define mixmpy_sema(N)\
{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV))))>>16)) ); \
fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV))))>>16)) ); \
}
Q6INSN(M2_mmacls_s0,"Rxx32+=vmpyweh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
Q6INSN(M2_mmacls_s1,"Rxx32+=vmpyweh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
#undef mixmpy_sema
#define mixmpy_sema(N)\
{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV))))>>16) )); \
fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV))))>>16 ))); \
}
Q6INSN(M2_mmachs_s0,"Rxx32+=vmpywoh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
Q6INSN(M2_mmachs_s1,"Rxx32+=vmpywoh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
#undef mixmpy_sema
#define mixmpy_sema(N)\
{ fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV))))>>16)); \
fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV))))>>16)); \
}
Q6INSN(M2_mmpyl_s0,"Rdd32=vmpyweh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
Q6INSN(M2_mmpyl_s1,"Rdd32=vmpyweh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
#undef mixmpy_sema
#define mixmpy_sema(N)\
{ fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV))))>>16)); \
fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV))))>>16)); \
}
Q6INSN(M2_mmpyh_s0,"Rdd32=vmpywoh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
Q6INSN(M2_mmpyh_s1,"Rdd32=vmpywoh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
/* With rounding */
#undef mixmpy_sema
#define mixmpy_sema(N)\
{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV)))+0x8000)>>16)) ); \
fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV)))+0x8000)>>16)) ); \
}
Q6INSN(M2_mmacls_rs0,"Rxx32+=vmpyweh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
Q6INSN(M2_mmacls_rs1,"Rxx32+=vmpyweh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
#undef mixmpy_sema
#define mixmpy_sema(N)\
{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV)))+0x8000)>>16) )); \
fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV)))+0x8000)>>16 ))); \
}
Q6INSN(M2_mmachs_rs0,"Rxx32+=vmpywoh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
Q6INSN(M2_mmachs_rs1,"Rxx32+=vmpywoh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
#undef mixmpy_sema
#define mixmpy_sema(N)\
{ fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV)))+0x8000)>>16)); \
fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV)))+0x8000)>>16)); \
}
Q6INSN(M2_mmpyl_rs0,"Rdd32=vmpyweh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
Q6INSN(M2_mmpyl_rs1,"Rdd32=vmpyweh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
#undef mixmpy_sema
#define mixmpy_sema(N)\
{ fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV)))+0x8000)>>16)); \
fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV)))+0x8000)>>16)); \
}
Q6INSN(M2_mmpyh_rs0,"Rdd32=vmpywoh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
Q6INSN(M2_mmpyh_rs1,"Rdd32=vmpywoh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
#undef mixmpy_sema
#define mixmpy_sema(DEST,EQUALS,N)\
{ DEST EQUALS fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV))) + fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV)));}
Q6INSN(M4_vrmpyeh_s0,"Rdd32=vrmpyweh(Rss32,Rtt32)",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(RddV,=,0))
Q6INSN(M4_vrmpyeh_s1,"Rdd32=vrmpyweh(Rss32,Rtt32):<<1",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(RddV,=,1))
Q6INSN(M4_vrmpyeh_acc_s0,"Rxx32+=vrmpyweh(Rss32,Rtt32)",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(RxxV,+=,0))
Q6INSN(M4_vrmpyeh_acc_s1,"Rxx32+=vrmpyweh(Rss32,Rtt32):<<1",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(RxxV,+=,1))
#undef mixmpy_sema
#define mixmpy_sema(DEST,EQUALS,N)\
{ DEST EQUALS fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV))) + fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV)));}
Q6INSN(M4_vrmpyoh_s0,"Rdd32=vrmpywoh(Rss32,Rtt32)",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(RddV,=,0))
Q6INSN(M4_vrmpyoh_s1,"Rdd32=vrmpywoh(Rss32,Rtt32):<<1",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(RddV,=,1))
Q6INSN(M4_vrmpyoh_acc_s0,"Rxx32+=vrmpywoh(Rss32,Rtt32)",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(RxxV,+=,0))
Q6INSN(M4_vrmpyoh_acc_s1,"Rxx32+=vrmpywoh(Rss32,Rtt32):<<1",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(RxxV,+=,1))
#undef mixmpy_sema
#define mixmpy_sema(N,H,RND)\
{ RdV = fSAT((fSCALE(N,fMPY3216SS(RsV,fGETHALF(H,RtV)))RND)>>16); \
}
Q6INSN(M2_hmmpyl_rs1,"Rd32=mpy(Rs32,Rt.L32):<<1:rnd:sat",ATTRIBS(A_ARCHV2),"Mixed Precision Multiply",mixmpy_sema(1,0,+0x8000))
Q6INSN(M2_hmmpyh_rs1,"Rd32=mpy(Rs32,Rt.H32):<<1:rnd:sat",ATTRIBS(A_ARCHV2),"Mixed Precision Multiply",mixmpy_sema(1,1,+0x8000))
Q6INSN(M2_hmmpyl_s1,"Rd32=mpy(Rs32,Rt.L32):<<1:sat",ATTRIBS(A_ARCHV2),"Mixed Precision Multiply",mixmpy_sema(1,0,))
Q6INSN(M2_hmmpyh_s1,"Rd32=mpy(Rs32,Rt.H32):<<1:sat",ATTRIBS(A_ARCHV2),"Mixed Precision Multiply",mixmpy_sema(1,1,))
/* SIGNED 32 x UNSIGNED 16 */
#undef mixmpy_sema
#define mixmpy_sema(N)\
{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(2,RttV))))>>16)) ); \
fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(0,RttV))))>>16)) ); \
}
Q6INSN(M2_mmaculs_s0,"Rxx32+=vmpyweuh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
Q6INSN(M2_mmaculs_s1,"Rxx32+=vmpyweuh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
#undef mixmpy_sema
#define mixmpy_sema(N)\
{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(3,RttV))))>>16) )); \
fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(1,RttV))))>>16 ))); \
}
Q6INSN(M2_mmacuhs_s0,"Rxx32+=vmpywouh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
Q6INSN(M2_mmacuhs_s1,"Rxx32+=vmpywouh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
#undef mixmpy_sema
#define mixmpy_sema(N)\
{ fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(2,RttV))))>>16)); \
fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(0,RttV))))>>16)); \
}
Q6INSN(M2_mmpyul_s0,"Rdd32=vmpyweuh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
Q6INSN(M2_mmpyul_s1,"Rdd32=vmpyweuh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
#undef mixmpy_sema
#define mixmpy_sema(N)\
{ fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(3,RttV))))>>16)); \
fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(1,RttV))))>>16)); \
}
Q6INSN(M2_mmpyuh_s0,"Rdd32=vmpywouh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
Q6INSN(M2_mmpyuh_s1,"Rdd32=vmpywouh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
/* With rounding */
#undef mixmpy_sema
#define mixmpy_sema(N)\
{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(2,RttV)))+0x8000)>>16)) ); \
fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(0,RttV)))+0x8000)>>16)) ); \
}
Q6INSN(M2_mmaculs_rs0,"Rxx32+=vmpyweuh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
Q6INSN(M2_mmaculs_rs1,"Rxx32+=vmpyweuh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
#undef mixmpy_sema
#define mixmpy_sema(N)\
{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(3,RttV)))+0x8000)>>16) )); \
fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(1,RttV)))+0x8000)>>16 ))); \
}
Q6INSN(M2_mmacuhs_rs0,"Rxx32+=vmpywouh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
Q6INSN(M2_mmacuhs_rs1,"Rxx32+=vmpywouh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
#undef mixmpy_sema
#define mixmpy_sema(N)\
{ fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(2,RttV)))+0x8000)>>16)); \
fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(0,RttV)))+0x8000)>>16)); \
}
Q6INSN(M2_mmpyul_rs0,"Rdd32=vmpyweuh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
Q6INSN(M2_mmpyul_rs1,"Rdd32=vmpyweuh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
#undef mixmpy_sema
#define mixmpy_sema(N)\
{ fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(3,RttV)))+0x8000)>>16)); \
fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(1,RttV)))+0x8000)>>16)); \
}
Q6INSN(M2_mmpyuh_rs0,"Rdd32=vmpywouh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
Q6INSN(M2_mmpyuh_rs1,"Rdd32=vmpywouh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
/**************************************************************/
/* complex mac with full 64-bit accum - no sat, no shift */
/* either do real or accum, never both */
/**************************************************************/
Q6INSN(M2_vrcmaci_s0,"Rxx32+=vrcmpyi(Rss32,Rtt32)",ATTRIBS(),"Vector Complex Mac Imaginary",
{
RxxV = RxxV + fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) + \
fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV)) + \
fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) + \
fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV));\
})
Q6INSN(M2_vrcmacr_s0,"Rxx32+=vrcmpyr(Rss32,Rtt32)",ATTRIBS(),"Vector Complex Mac Real",
{ RxxV = RxxV + fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) - \
fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)) + \
fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) - \
fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\
})
Q6INSN(M2_vrcmaci_s0c,"Rxx32+=vrcmpyi(Rss32,Rtt32*)",ATTRIBS(A_ARCHV2),"Vector Complex Mac Imaginary",
{
RxxV = RxxV + fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) - \
fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV)) + \
fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) - \
fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV));\
})
Q6INSN(M2_vrcmacr_s0c,"Rxx32+=vrcmpyr(Rss32,Rtt32*)",ATTRIBS(A_ARCHV2),"Vector Complex Mac Real",
{ RxxV = RxxV + fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) + \
fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)) + \
fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) + \
fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\
})
Q6INSN(M2_cmaci_s0,"Rxx32+=cmpyi(Rs32,Rt32)",ATTRIBS(),"Vector Complex Mac Imaginary",
{
RxxV = RxxV + fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV)) + \
fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV));
})
Q6INSN(M2_cmacr_s0,"Rxx32+=cmpyr(Rs32,Rt32)",ATTRIBS(),"Vector Complex Mac Real",
{ RxxV = RxxV + fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)) - \
fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV));
})
Q6INSN(M2_vrcmpyi_s0,"Rdd32=vrcmpyi(Rss32,Rtt32)",ATTRIBS(),"Vector Complex Mpy Imaginary",
{
RddV = fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) + \
fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV)) + \
fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) + \
fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV));\
})
Q6INSN(M2_vrcmpyr_s0,"Rdd32=vrcmpyr(Rss32,Rtt32)",ATTRIBS(),"Vector Complex Mpy Real",
{ RddV = fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) - \
fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)) + \
fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) - \
fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\
})
Q6INSN(M2_vrcmpyi_s0c,"Rdd32=vrcmpyi(Rss32,Rtt32*)",ATTRIBS(A_ARCHV2),"Vector Complex Mpy Imaginary",
{
RddV = fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) - \
fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV)) + \
fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) - \
fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV));\
})
Q6INSN(M2_vrcmpyr_s0c,"Rdd32=vrcmpyr(Rss32,Rtt32*)",ATTRIBS(A_ARCHV2),"Vector Complex Mpy Real",
{ RddV = fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) + \
fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)) + \
fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) + \
fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\
})
Q6INSN(M2_cmpyi_s0,"Rdd32=cmpyi(Rs32,Rt32)",ATTRIBS(),"Vector Complex Mpy Imaginary",
{
RddV = fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV)) + \
fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV));
})
Q6INSN(M2_cmpyr_s0,"Rdd32=cmpyr(Rs32,Rt32)",ATTRIBS(),"Vector Complex Mpy Real",
{ RddV = fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)) - \
fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV));
})
/**************************************************************/
/* Complex mpy/mac with 2x32 bit accum, sat, shift */
/* 32x16 real or imag */
/**************************************************************/
#if 1
Q6INSN(M4_cmpyi_wh,"Rd32=cmpyiwh(Rss32,Rt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Complex Multiply",
{
RdV = fSAT( ( fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RtV))
+ fMPY3216SS(fGETWORD(1,RssV),fGETHALF(0,RtV))
+ 0x4000)>>15);
})
Q6INSN(M4_cmpyr_wh,"Rd32=cmpyrwh(Rss32,Rt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Complex Multiply",
{
RdV = fSAT( ( fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RtV))
- fMPY3216SS(fGETWORD(1,RssV),fGETHALF(1,RtV))
+ 0x4000)>>15);
})
Q6INSN(M4_cmpyi_whc,"Rd32=cmpyiwh(Rss32,Rt32*):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Complex Multiply",
{
RdV = fSAT( ( fMPY3216SS(fGETWORD(1,RssV),fGETHALF(0,RtV))
- fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RtV))
+ 0x4000)>>15);
})
Q6INSN(M4_cmpyr_whc,"Rd32=cmpyrwh(Rss32,Rt32*):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Complex Multiply",
{
RdV = fSAT( ( fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RtV))
+ fMPY3216SS(fGETWORD(1,RssV),fGETHALF(1,RtV))
+ 0x4000)>>15);
})
#endif
/**************************************************************/
/* Vector mpy/mac with 2x32 bit accum, sat, shift */
/* either do real or imag, never both */
/**************************************************************/
#undef VCMPYSEMI
#define VCMPYSEMI(DST,ACC0,ACC1,SHIFT,SAT) \
fSETWORD(0,DST,SAT(ACC0 fSCALE(SHIFT,fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) + \
fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV))))); \
fSETWORD(1,DST,SAT(ACC1 fSCALE(SHIFT,fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) + \
fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV))))); \
#undef VCMPYSEMR
#define VCMPYSEMR(DST,ACC0,ACC1,SHIFT,SAT) \
fSETWORD(0,DST,SAT(ACC0 fSCALE(SHIFT,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) - \
fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV))))); \
fSETWORD(1,DST,SAT(ACC1 fSCALE(SHIFT,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) - \
fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV))))); \
#undef VCMPYIR
#define VCMPYIR(TAGBASE,DSTSYN,DSTVAL,ACCSEM,ACCVAL0,ACCVAL1,SHIFTSYN,SHIFTVAL,SATSYN,SATVAL) \
Q6INSN(M2_##TAGBASE##i,DSTSYN ACCSEM "vcmpyi(Rss32,Rtt32)" SHIFTSYN SATSYN,ATTRIBS(A_ARCHV2), \
"Vector Complex Multiply Imaginary", { VCMPYSEMI(DSTVAL,ACCVAL0,ACCVAL1,SHIFTVAL,SATVAL); }) \
Q6INSN(M2_##TAGBASE##r,DSTSYN ACCSEM "vcmpyr(Rss32,Rtt32)" SHIFTSYN SATSYN,ATTRIBS(A_ARCHV2), \
"Vector Complex Multiply Imaginary", { VCMPYSEMR(DSTVAL,ACCVAL0,ACCVAL1,SHIFTVAL,SATVAL); })
VCMPYIR(vcmpy_s0_sat_,"Rdd32",RddV,"=",,,"",0,":sat",fSAT)
VCMPYIR(vcmpy_s1_sat_,"Rdd32",RddV,"=",,,":<<1",1,":sat",fSAT)
VCMPYIR(vcmac_s0_sat_,"Rxx32",RxxV,"+=",fGETWORD(0,RxxV) + ,fGETWORD(1,RxxV) + ,"",0,":sat",fSAT)
/**********************************************************************
* Rotation -- by 0, 90, 180, or 270 means mult by 1, J, -1, -J *
*********************************************************************/
Q6INSN(S2_vcrotate,"Rdd32=vcrotate(Rss32,Rt32)",ATTRIBS(A_ARCHV2),"Rotate complex value by multiple of PI/2",
{
fHIDE(size1u_t tmp;)
tmp = fEXTRACTU_RANGE(RtV,1,0);
if (tmp == 0) { /* No rotation */
fSETHALF(0,RddV,fGETHALF(0,RssV));
fSETHALF(1,RddV,fGETHALF(1,RssV));
} else if (tmp == 1) { /* Multiply by -J */
fSETHALF(0,RddV,fGETHALF(1,RssV));
fSETHALF(1,RddV,fSATH(-fGETHALF(0,RssV)));
} else if (tmp == 2) { /* Multiply by J */
fSETHALF(0,RddV,fSATH(-fGETHALF(1,RssV)));
fSETHALF(1,RddV,fGETHALF(0,RssV));
} else { /* Multiply by -1 */
fHIDE(if (tmp != 3) fatal("C is broken");)
fSETHALF(0,RddV,fSATH(-fGETHALF(0,RssV)));
fSETHALF(1,RddV,fSATH(-fGETHALF(1,RssV)));
}
tmp = fEXTRACTU_RANGE(RtV,3,2);
if (tmp == 0) { /* No rotation */
fSETHALF(2,RddV,fGETHALF(2,RssV));
fSETHALF(3,RddV,fGETHALF(3,RssV));
} else if (tmp == 1) { /* Multiply by -J */
fSETHALF(2,RddV,fGETHALF(3,RssV));
fSETHALF(3,RddV,fSATH(-fGETHALF(2,RssV)));
} else if (tmp == 2) { /* Multiply by J */
fSETHALF(2,RddV,fSATH(-fGETHALF(3,RssV)));
fSETHALF(3,RddV,fGETHALF(2,RssV));
} else { /* Multiply by -1 */
fHIDE(if (tmp != 3) fatal("C is broken");)
fSETHALF(2,RddV,fSATH(-fGETHALF(2,RssV)));
fSETHALF(3,RddV,fSATH(-fGETHALF(3,RssV)));
}
})
Q6INSN(S4_vrcrotate_acc,"Rxx32+=vrcrotate(Rss32,Rt32,#u2)",ATTRIBS(),"Rotate and Reduce Bytes",
{
fHIDE(int i; int tmpr; int tmpi; unsigned int control;)
fHIDE(int sumr; int sumi;)
sumr = 0;
sumi = 0;
control = fGETUBYTE(uiV,RtV);
for (i = 0; i < 8; i += 2) {
tmpr = fGETBYTE(i ,RssV);
tmpi = fGETBYTE(i+1,RssV);
switch (control & 3) {
case 0: /* No Rotation */
sumr += tmpr;
sumi += tmpi;
break;
case 1: /* Multiply by -J */
sumr += tmpi;
sumi -= tmpr;
break;
case 2: /* Multiply by J */
sumr -= tmpi;
sumi += tmpr;
break;
case 3: /* Multiply by -1 */
sumr -= tmpr;
sumi -= tmpi;
break;
fHIDE(default: fatal("C is broken!");)
}
control = control >> 2;
}
fSETWORD(0,RxxV,fGETWORD(0,RxxV) + sumr);
fSETWORD(1,RxxV,fGETWORD(1,RxxV) + sumi);
})
Q6INSN(S4_vrcrotate,"Rdd32=vrcrotate(Rss32,Rt32,#u2)",ATTRIBS(),"Rotate and Reduce Bytes",
{
fHIDE(int i; int tmpr; int tmpi; unsigned int control;)
fHIDE(int sumr; int sumi;)
sumr = 0;
sumi = 0;
control = fGETUBYTE(uiV,RtV);
for (i = 0; i < 8; i += 2) {
tmpr = fGETBYTE(i ,RssV);
tmpi = fGETBYTE(i+1,RssV);
switch (control & 3) {
case 0: /* No Rotation */
sumr += tmpr;
sumi += tmpi;
break;
case 1: /* Multiply by -J */
sumr += tmpi;
sumi -= tmpr;
break;
case 2: /* Multiply by J */
sumr -= tmpi;
sumi += tmpr;
break;
case 3: /* Multiply by -1 */
sumr -= tmpr;
sumi -= tmpi;
break;
fHIDE(default: fatal("C is broken!");)
}
control = control >> 2;
}
fSETWORD(0,RddV,sumr);
fSETWORD(1,RddV,sumi);
})
Q6INSN(S2_vcnegh,"Rdd32=vcnegh(Rss32,Rt32)",ATTRIBS(),"Conditional Negate halfwords",
{
fHIDE(int i;)
for (i = 0; i < 4; i++) {
if (fGETBIT(i,RtV)) {
fSETHALF(i,RddV,fSATH(-fGETHALF(i,RssV)));
} else {
fSETHALF(i,RddV,fGETHALF(i,RssV));
}
}
})
Q6INSN(S2_vrcnegh,"Rxx32+=vrcnegh(Rss32,Rt32)",ATTRIBS(),"Vector Reduce Conditional Negate halfwords",
{
fHIDE(int i;)
for (i = 0; i < 4; i++) {
if (fGETBIT(i,RtV)) {
RxxV += -fGETHALF(i,RssV);
} else {
RxxV += fGETHALF(i,RssV);
}
}
})
/**********************************************************************
* Finite-field multiplies. Written by David Hoyle *
*********************************************************************/
Q6INSN(M4_pmpyw,"Rdd32=pmpyw(Rs32,Rt32)",ATTRIBS(),"Polynomial 32bit Multiplication with Addition in GF(2)",
{
fHIDE(int i; unsigned int y;)
fHIDE(unsigned long long x; unsigned long long prod;)
x = fGETUWORD(0, RsV);
y = fGETUWORD(0, RtV);
prod = 0;
for(i=0; i < 32; i++) {
if((y >> i) & 1) prod ^= (x << i);
}
RddV = prod;
})
Q6INSN(M4_vpmpyh,"Rdd32=vpmpyh(Rs32,Rt32)",ATTRIBS(),"Dual Polynomial 16bit Multiplication with Addition in GF(2)",
{
fHIDE(int i; unsigned int x0; unsigned int x1;)
fHIDE(unsigned int y0; unsigned int y1;)
fHIDE(unsigned int prod0; unsigned int prod1;)
x0 = fGETUHALF(0, RsV);
x1 = fGETUHALF(1, RsV);
y0 = fGETUHALF(0, RtV);
y1 = fGETUHALF(1, RtV);
prod0 = prod1 = 0;
for(i=0; i < 16; i++) {
if((y0 >> i) & 1) prod0 ^= (x0 << i);
if((y1 >> i) & 1) prod1 ^= (x1 << i);
}
fSETHALF(0,RddV,fGETUHALF(0,prod0));
fSETHALF(1,RddV,fGETUHALF(0,prod1));
fSETHALF(2,RddV,fGETUHALF(1,prod0));
fSETHALF(3,RddV,fGETUHALF(1,prod1));
})
Q6INSN(M4_pmpyw_acc,"Rxx32^=pmpyw(Rs32,Rt32)",ATTRIBS(),"Polynomial 32bit Multiplication with Addition in GF(2)",
{
fHIDE(int i; unsigned int y;)
fHIDE(unsigned long long x; unsigned long long prod;)
x = fGETUWORD(0, RsV);
y = fGETUWORD(0, RtV);
prod = 0;
for(i=0; i < 32; i++) {
if((y >> i) & 1) prod ^= (x << i);
}
RxxV ^= prod;
})
Q6INSN(M4_vpmpyh_acc,"Rxx32^=vpmpyh(Rs32,Rt32)",ATTRIBS(),"Dual Polynomial 16bit Multiplication with Addition in GF(2)",
{
fHIDE(int i; unsigned int x0; unsigned int x1;)
fHIDE(unsigned int y0; unsigned int y1;)
fHIDE(unsigned int prod0; unsigned int prod1;)
x0 = fGETUHALF(0, RsV);
x1 = fGETUHALF(1, RsV);
y0 = fGETUHALF(0, RtV);
y1 = fGETUHALF(1, RtV);
prod0 = prod1 = 0;
for(i=0; i < 16; i++) {
if((y0 >> i) & 1) prod0 ^= (x0 << i);
if((y1 >> i) & 1) prod1 ^= (x1 << i);
}
fSETHALF(0,RxxV,fGETUHALF(0,RxxV) ^ fGETUHALF(0,prod0));
fSETHALF(1,RxxV,fGETUHALF(1,RxxV) ^ fGETUHALF(0,prod1));
fSETHALF(2,RxxV,fGETUHALF(2,RxxV) ^ fGETUHALF(1,prod0));
fSETHALF(3,RxxV,fGETUHALF(3,RxxV) ^ fGETUHALF(1,prod1));
})
/* V70: TINY CORE */
#define CMPY64(TAG,NAME,DESC,OPERAND1,OP,W0,W1,W2,W3) \
Q6INSN(M7_##TAG,"Rdd32=" NAME "(Rss32," OPERAND1 ")",ATTRIBS(A_RESTRICT_SLOT3ONLY),"Complex Multiply 64-bit " DESC, { RddV = (fMPY32SS(fGETWORD(W0, RssV), fGETWORD(W1, RttV)) OP fMPY32SS(fGETWORD(W2, RssV), fGETWORD(W3, RttV)));})\
Q6INSN(M7_##TAG##_acc,"Rxx32+=" NAME "(Rss32,"OPERAND1")",ATTRIBS(A_RESTRICT_SLOT3ONLY),"Complex Multiply-Accumulate 64-bit " DESC, { RxxV += (fMPY32SS(fGETWORD(W0, RssV), fGETWORD(W1, RttV)) OP fMPY32SS(fGETWORD(W2, RssV), fGETWORD(W3, RttV)));})
CMPY64(dcmpyrw, "cmpyrw","Real","Rtt32" ,-,0,0,1,1)
CMPY64(dcmpyrwc,"cmpyrw","Real","Rtt32*",+,0,0,1,1)
CMPY64(dcmpyiw, "cmpyiw","Imag","Rtt32" ,+,0,1,1,0)
CMPY64(dcmpyiwc,"cmpyiw","Imag","Rtt32*",-,1,0,0,1)
#define CMPY128(TAG, NAME, OPERAND1, WORD0, WORD1, WORD2, WORD3, OP) \
Q6INSN(M7_##TAG,"Rd32=" NAME "(Rss32,"OPERAND1"):<<1:sat",ATTRIBS(A_RESTRICT_SLOT3ONLY),"Complex Multiply 32-bit result real", \
{ \
fHIDE(size16s_t acc128;)\
fHIDE(size16s_t tmp128;)\
fHIDE(size8s_t acc64;)\
tmp128 = fCAST8S_16S(fMPY32SS(fGETWORD(WORD0, RssV), fGETWORD(WORD1, RttV)));\
acc128 = fCAST8S_16S(fMPY32SS(fGETWORD(WORD2, RssV), fGETWORD(WORD3, RttV)));\
acc128 = OP(tmp128,acc128);\
acc128 = fSHIFTR128(acc128, 31);\
acc64 = fCAST16S_8S(acc128);\
RdV = fSATW(acc64);\
})
CMPY128(wcmpyrw, "cmpyrw", "Rtt32", 0, 0, 1, 1, fSUB128)
CMPY128(wcmpyrwc, "cmpyrw", "Rtt32*", 0, 0, 1, 1, fADD128)
CMPY128(wcmpyiw, "cmpyiw", "Rtt32", 0, 1, 1, 0, fADD128)
CMPY128(wcmpyiwc, "cmpyiw", "Rtt32*", 1, 0, 0, 1, fSUB128)
#define CMPY128RND(TAG, NAME, OPERAND1, WORD0, WORD1, WORD2, WORD3, OP) \
Q6INSN(M7_##TAG##_rnd,"Rd32=" NAME "(Rss32,"OPERAND1"):<<1:rnd:sat",ATTRIBS(A_RESTRICT_SLOT3ONLY),"Complex Multiply 32-bit result real", \
{ \
fHIDE(size16s_t acc128;)\
fHIDE(size16s_t tmp128;)\
fHIDE(size16s_t const128;)\
fHIDE(size8s_t acc64;)\
tmp128 = fCAST8S_16S(fMPY32SS(fGETWORD(WORD0, RssV), fGETWORD(WORD1, RttV)));\
acc128 = fCAST8S_16S(fMPY32SS(fGETWORD(WORD2, RssV), fGETWORD(WORD3, RttV)));\
const128 = fCAST8S_16S(fCONSTLL(0x40000000));\
acc128 = OP(tmp128,acc128);\
acc128 = fADD128(acc128,const128);\
acc128 = fSHIFTR128(acc128, 31);\
acc64 = fCAST16S_8S(acc128);\
RdV = fSATW(acc64);\
})
CMPY128RND(wcmpyrw, "cmpyrw", "Rtt32", 0, 0, 1, 1, fSUB128)
CMPY128RND(wcmpyrwc, "cmpyrw", "Rtt32*", 0, 0, 1, 1, fADD128)
CMPY128RND(wcmpyiw, "cmpyiw", "Rtt32", 0, 1, 1, 0, fADD128)
CMPY128RND(wcmpyiwc, "cmpyiw", "Rtt32*", 1, 0, 0, 1, fSUB128)