Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions compiler/p/codegen/OMRCodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1879,6 +1879,11 @@ bool OMR::Power::CodeGenerator::getSupportsOpCodeForAutoSIMD(TR::CPU *cpu, TR::I
return true;
else
return false;
case TR::mTrueCount:
if (cpu->isAtLeast(OMR_PROCESSOR_PPC_P8))
return true;
else
return false;
case TR::vload:
case TR::vloadi:
case TR::vstore:
Expand Down
4 changes: 2 additions & 2 deletions compiler/p/codegen/OMRInstOpCode.enum
Original file line number Diff line number Diff line change
Expand Up @@ -646,7 +646,7 @@
vsel, // vector conditional select
vsld, // Vector Shift Left Dword
vsrad, // Vector Shift Right Algebraic Dword
// vsrd, // Vector Shift Right Dword
vsrd, // Vector Shift Right Dword
// vsubcuq, // Vector Subtract & write Carry Unsigned Qword
// vsubecuq, // Vector Subtract Extended & write Carry Unsigned Qword
// vsubeuqm, // Vector Subtract Extended Unsigned Qword Modulo
Expand Down Expand Up @@ -812,7 +812,7 @@
// vpmsumh, // Vector Polynomial Multiply-Sum Hword
// vpmsumw, // Vector Polynomial Multiply-Sum Word
// vpopcntb, // Vector Population Count Byte
// vpopcntd, // Vector Population Count Dword
vpopcntd, // Vector Population Count Dword
// vpopcnth, // Vector Population Count Hword
// vpopcntw, // Vector Population Count Word
vmrghb, // vector merge high byte
Expand Down
40 changes: 20 additions & 20 deletions compiler/p/codegen/OMRInstOpCodeProperties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7572,17 +7572,17 @@
PPCOpProp_SyncSideEffectFree,
},

/* { */
/* .mnemonic = OMR::InstOpCode::vsrd, */
/* .name = "vsrd", */
{
/* .mnemonic = */ OMR::InstOpCode::vsrd,
/* .name = */ "vsrd",
/* .description = "Vector Shift Right Dword", */
/* .prefix = 0x00000000, */
/* .opcode = 0x100006C4, */
/* .format = FORMAT_UNKNOWN, */
/* .minimumALS = OMR_PROCESSOR_PPC_P8, */
/* .properties = PPCOpProp_IsVMX | */
/* PPCOpProp_SyncSideEffectFree, */
/* }, */
/* .prefix = */ 0x00000000,
/* .opcode = */ 0x100006C4,
/* .format = */ FORMAT_VRT_VRA_VRB,
/* .minimumALS = */ OMR_PROCESSOR_PPC_P8,
/* .properties = */ PPCOpProp_IsVMX |
PPCOpProp_SyncSideEffectFree,
},

/* { */
/* .mnemonic = OMR::InstOpCode::vsubcuq, */
Expand Down Expand Up @@ -9589,17 +9589,17 @@
/* PPCOpProp_SyncSideEffectFree, */
/* }, */

/* { */
/* .mnemonic = OMR::InstOpCode::vpopcntd, */
/* .name = "vpopcntd", */
{
/* .mnemonic = */ OMR::InstOpCode::vpopcntd,
/* .name = */ "vpopcntd",
/* .description = "Vector Population Count Dword", */
/* .prefix = 0x00000000, */
/* .opcode = 0x100007C3, */
/* .format = FORMAT_UNKNOWN, */
/* .minimumALS = OMR_PROCESSOR_PPC_P8, */
/* .properties = PPCOpProp_IsVMX | */
/* PPCOpProp_SyncSideEffectFree, */
/* }, */
/* .prefix = */ 0x00000000,
/* .opcode = */ 0x100007C3,
/* .format = */ FORMAT_VRT_VRB,
/* .minimumALS = */ OMR_PROCESSOR_PPC_P8,
/* .properties = */ PPCOpProp_IsVMX |
PPCOpProp_SyncSideEffectFree,
},

/* { */
/* .mnemonic = OMR::InstOpCode::vpopcnth, */
Expand Down
53 changes: 52 additions & 1 deletion compiler/p/codegen/OMRTreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -913,7 +913,58 @@ OMR::Power::TreeEvaluator::mstoreiEvaluator(TR::Node *node, TR::CodeGenerator *c
TR::Register*
OMR::Power::TreeEvaluator::mTrueCountEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
TR::Node *firstChild = node->getFirstChild();

TR_ASSERT_FATAL_WITH_NODE(node, firstChild->getDataType().getVectorLength() == TR::VectorLength128,
"Only 128-bit vectors are supported %s", node->getDataType().toString());

TR::Register *srcReg = cg->evaluate(firstChild);
TR::Register *resReg = cg->allocateRegister(TR_GPR);

TR::Register *temp1 = cg->allocateRegister(TR_VRF);
TR::Register *temp2 = cg->allocateRegister(TR_VRF);

node->setRegister(resReg);

//pick shift distance based on vector element type
TR::DataType type = firstChild->getDataType().getVectorElementType();
int shift;

switch(type)
{
case TR::Int8:
shift = 3;
break;
case TR::Int16:
shift = 4;
break;
case TR::Int32:
shift = 5;
break;
case TR::Int64:
shift = 6;
break;
default:
TR_ASSERT_FATAL(false, "Unsupported vector type %s for mTrueCount\n", firstChild->getDataType().toString()); return NULL;
}

//get population counts of each half of input vector separately
generateTrg1Src1Instruction(cg, OMR::InstOpCode::vpopcntd, node, temp1, srcReg);

//add two halves together and move result to GPR
generateTrg1Src2ImmInstruction(cg, OMR::InstOpCode::xxpermdi, node, temp2, temp1, temp1, 2);
generateTrg1Src2Instruction(cg, OMR::InstOpCode::vaddudm, node, temp1, temp1, temp2);
generateTrg1Src1Instruction(cg, TR::InstOpCode::mfvsrd, node, resReg, temp1);

//since vector mask values are represented as either all 1's (true) or all 0's (false), the number
//of "true" values can be calculated as: (total bitwise population count)/(element size in bits)
generateTrg1Src1ImmInstruction(cg, OMR::InstOpCode::sradi, node, resReg, resReg, shift);

cg->stopUsingRegister(temp1);
cg->stopUsingRegister(temp2);
cg->decReferenceCount(firstChild);

return resReg;
}

TR::Register*
Expand Down
8 changes: 7 additions & 1 deletion fvtest/compilerunittest/p/BinaryEncoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2176,7 +2176,9 @@ INSTANTIATE_TEST_CASE_P(VMX, PPCTrg1Src1EncodingTest, ::testing::Values(
std::make_tuple(TR::InstOpCode::vclzd, TR::RealRegister::vr31, TR::RealRegister::vr0, TRTest::BinaryInstruction("13E007C2")),
std::make_tuple(TR::InstOpCode::vclzd, TR::RealRegister::vr0, TR::RealRegister::vr31, TRTest::BinaryInstruction("1000FFC2")),
std::make_tuple(TR::InstOpCode::vclzw, TR::RealRegister::vr31, TR::RealRegister::vr0, TRTest::BinaryInstruction("13E00782")),
std::make_tuple(TR::InstOpCode::vclzw, TR::RealRegister::vr0, TR::RealRegister::vr31, TRTest::BinaryInstruction("1000FF82"))
std::make_tuple(TR::InstOpCode::vclzw, TR::RealRegister::vr0, TR::RealRegister::vr31, TRTest::BinaryInstruction("1000FF82")),
std::make_tuple(TR::InstOpCode::vpopcntd, TR::RealRegister::vr0, TR::RealRegister::vr31, TRTest::BinaryInstruction("1000FFC3")),
std::make_tuple(TR::InstOpCode::vpopcntd, TR::RealRegister::vr31, TR::RealRegister::vr0, TRTest::BinaryInstruction("13E007C3"))
));

INSTANTIATE_TEST_CASE_P(VMX, PPCTrg1Src2EncodingTest, ::testing::ValuesIn(*TRTest::MakeVector<std::tuple<TR::InstOpCode::Mnemonic, TR::RealRegister::RegNum, TR::RealRegister::RegNum, TR::RealRegister::RegNum, TRTest::BinaryInstruction>>(
Expand Down Expand Up @@ -2402,6 +2404,9 @@ INSTANTIATE_TEST_CASE_P(VMX, PPCTrg1Src2EncodingTest, ::testing::ValuesIn(*TRTes
std::make_tuple(TR::InstOpCode::vsrw, TR::RealRegister::vr31, TR::RealRegister::vr0, TR::RealRegister::vr0, TRTest::BinaryInstruction("13e00284")),
std::make_tuple(TR::InstOpCode::vsrw, TR::RealRegister::vr0, TR::RealRegister::vr31, TR::RealRegister::vr0, TRTest::BinaryInstruction("101f0284")),
std::make_tuple(TR::InstOpCode::vsrw, TR::RealRegister::vr0, TR::RealRegister::vr0, TR::RealRegister::vr31, TRTest::BinaryInstruction("1000fa84")),
std::make_tuple(TR::InstOpCode::vsrd, TR::RealRegister::vr31, TR::RealRegister::vr0, TR::RealRegister::vr0, TRTest::BinaryInstruction("13e006c4")),
std::make_tuple(TR::InstOpCode::vsrd, TR::RealRegister::vr0, TR::RealRegister::vr31, TR::RealRegister::vr0, TRTest::BinaryInstruction("101f06c4")),
std::make_tuple(TR::InstOpCode::vsrd, TR::RealRegister::vr0, TR::RealRegister::vr0, TR::RealRegister::vr31, TRTest::BinaryInstruction("1000fec4")),
std::make_tuple(TR::InstOpCode::vsubsbs, TR::RealRegister::vr31, TR::RealRegister::vr0, TR::RealRegister::vr0, TRTest::BinaryInstruction("13e00700")),
std::make_tuple(TR::InstOpCode::vsubsbs, TR::RealRegister::vr0, TR::RealRegister::vr31, TR::RealRegister::vr0, TRTest::BinaryInstruction("101f0700")),
std::make_tuple(TR::InstOpCode::vsubsbs, TR::RealRegister::vr0, TR::RealRegister::vr0, TR::RealRegister::vr31, TRTest::BinaryInstruction("1000ff00")),
Expand Down Expand Up @@ -2556,6 +2561,7 @@ INSTANTIATE_TEST_CASE_P(VMX, PPCRecordFormSanityTest, ::testing::ValuesIn(*TRTes
std::make_tuple(TR::InstOpCode::vsrh, TR::InstOpCode::bad, TRTest::BinaryInstruction()),
std::make_tuple(TR::InstOpCode::vsro, TR::InstOpCode::bad, TRTest::BinaryInstruction()),
std::make_tuple(TR::InstOpCode::vsrw, TR::InstOpCode::bad, TRTest::BinaryInstruction()),
std::make_tuple(TR::InstOpCode::vsrd, TR::InstOpCode::bad, TRTest::BinaryInstruction()),
std::make_tuple(TR::InstOpCode::vsubsbs, TR::InstOpCode::bad, TRTest::BinaryInstruction()),
std::make_tuple(TR::InstOpCode::vsubshs, TR::InstOpCode::bad, TRTest::BinaryInstruction()),
std::make_tuple(TR::InstOpCode::vsubsws, TR::InstOpCode::bad, TRTest::BinaryInstruction()),
Expand Down