-
Notifications
You must be signed in to change notification settings - Fork 812
Open
Labels
codegen/llvmLLVM code generation compiler backendLLVM code generation compiler backend
Description
I haven't triaged it yet. IIRC, the relayout ops are fused into scf.forall, and we may need to make it happen as well in vector level tiling. Something is missing here.
To repro:
Run iree-compile --compile-from=executable-sources --compile-to=executable-targets repro.mlir
hal.executable public @_encoding_0 {
hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {cpu = "znver4", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+sse4a,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vbmi,+avx512ifma,+avx512vpopcntdq,+avx512vbmi2,+gfni,+vpclmulqdq,+avx512vnni,+avx512bitalg,+avx512bf16,+adx,+clflushopt,+clwb,+clzero,+cx16,+f16c,+fsgsbase,+sahf,+lzcnt,+movbe,+mwaitx,+pku,+prfchw,+rdpid,+rdrnd,+rdseed,+sha,+shstk,+vaes,+wbnoinvd,+xsave,+xsavec,+xsaveopt,+xsaves,+cx8,+crc32,+invpcid,+rdpru,+x87,+fxsr", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", iree.encoding.resolver = #iree_cpu.cpu_encoding_resolver<>, max_stack_allocation_size = 32768 : i64, native_vector_size = 64 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf"}>) {
hal.executable.export public @_encoding_0_encode_456x789xf16_to_456x789xf16 ordinal(0) layout(#hal.pipeline.layout<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) count(%arg0: !hal.device) -> (index, index, index) {
%x, %y, %z = iree_tensor_ext.dispatch.workgroup_count_from_slice()
hal.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @_encoding_0_encode_456x789xf16_to_456x789xf16() {
%c0 = arith.constant 0 : index
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !iree_tensor_ext.dispatch.tensor<readonly:tensor<456x789xf16>>
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags(Indirect) : !iree_tensor_ext.dispatch.tensor<writeonly:tensor<456x789xf16, #iree_encoding.layout<[#iree_gpu.gpu_encoding_resolver<configuration = {encoding_info = {innerDimsPos = [1, 0], innerTileSizes = [64, 16], outerDimsPerm = [1, 0], swizzle = {expandShape = [[["CrossThread", 2 : i16], ["CrossIntrinsic", 2 : i16], ["CrossThread", 16 : i16]], [["CrossThread", 1 : i16], ["Internal", 16 : i16]]], permutation = [0, 1, 3, 2, 4]}}}>]>>>
%2 = iree_tensor_ext.dispatch.tensor.load %0, offsets = [0, 0], sizes = [456, 789], strides = [1, 1] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<456x789xf16>> -> tensor<456x789xf16>
%3 = iree_encoding.set_encoding %2 : tensor<456x789xf16> -> tensor<456x789xf16, #iree_encoding.layout<[#iree_gpu.gpu_encoding_resolver<configuration = {encoding_info = {innerDimsPos = [1, 0], innerTileSizes = [64, 16], outerDimsPerm = [1, 0], swizzle = {expandShape = [[["CrossThread", 2 : i16], ["CrossIntrinsic", 2 : i16], ["CrossThread", 16 : i16]], [["CrossThread", 1 : i16], ["Internal", 16 : i16]]], permutation = [0, 1, 3, 2, 4]}}}>]>>
iree_tensor_ext.dispatch.tensor.store %3, %1, offsets = [0, 0], sizes = [456, 789], strides = [1, 1] : tensor<456x789xf16, #iree_encoding.layout<[#iree_gpu.gpu_encoding_resolver<configuration = {encoding_info = {innerDimsPos = [1, 0], innerTileSizes = [64, 16], outerDimsPerm = [1, 0], swizzle = {expandShape = [[["CrossThread", 2 : i16], ["CrossIntrinsic", 2 : i16], ["CrossThread", 16 : i16]], [["CrossThread", 1 : i16], ["Internal", 16 : i16]]], permutation = [0, 1, 3, 2, 4]}}}>]>> -> !iree_tensor_ext.dispatch.tensor<writeonly:tensor<456x789xf16, #iree_encoding.layout<[#iree_gpu.gpu_encoding_resolver<configuration = {encoding_info = {innerDimsPos = [1, 0], innerTileSizes = [64, 16], outerDimsPerm = [1, 0], swizzle = {expandShape = [[["CrossThread", 2 : i16], ["CrossIntrinsic", 2 : i16], ["CrossThread", 16 : i16]], [["CrossThread", 1 : i16], ["Internal", 16 : i16]]], permutation = [0, 1, 3, 2, 4]}}}>]>>>
return
}
}
}
}Metadata
Metadata
Assignees
Labels
codegen/llvmLLVM code generation compiler backendLLVM code generation compiler backend