NVGPUOps

nvgpu.cga_barrier_arrive (triton::nvgpu::CGABarrierArriveOp)

Syntax:

operation ::= `nvgpu.cga_barrier_arrive` attr-dict

nvgpu.cga_barrier_sync (triton::nvgpu::CGABarrierSyncOp)

Syntax:

operation ::= `nvgpu.cga_barrier_sync` attr-dict

nvgpu.cga_barrier_wait (triton::nvgpu::CGABarrierWaitOp)

Syntax:

operation ::= `nvgpu.cga_barrier_wait` attr-dict

nvgpu.canonical_warp_id (triton::nvgpu::CanonicalWarpIdOp)

Syntax:

operation ::= `nvgpu.canonical_warp_id` attr-dict

Traits: AlwaysSpeculatableImplTrait

Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)

Effects: MemoryEffects::Effect{}

Results:

Result

Description

result

32-bit signless integer

nvgpu.cluster_arrive (triton::nvgpu::ClusterArriveOp)

Syntax:

operation ::= `nvgpu.cluster_arrive` attr-dict

Attributes:

AttributeMLIR TypeDescription
relaxed::mlir::IntegerAttr1-bit signless integer attribute

nvgpu.cluster_id (triton::nvgpu::ClusterCTAIdOp)

Syntax:

operation ::= `nvgpu.cluster_id` attr-dict

Traits: AlwaysSpeculatableImplTrait

Interfaces: ConditionallySpeculatable, InferTypeOpInterface, NoMemoryEffect (MemoryEffectOpInterface)

Effects: MemoryEffects::Effect{}

Results:

Result

Description

result

32-bit signless integer

nvgpu.cluster_wait (triton::nvgpu::ClusterWaitOp)

Syntax:

operation ::= `nvgpu.cluster_wait` attr-dict

nvgpu.fence_async_shared (triton::nvgpu::FenceAsyncSharedOp)

Syntax:

operation ::= `nvgpu.fence_async_shared` attr-dict

Attributes:

AttributeMLIR TypeDescription
bCluster::mlir::BoolAttrbool attribute

nvgpu.fence_mbarrier_init (triton::nvgpu::FenceMBarrierInitOp)

Syntax:

operation ::= `nvgpu.fence_mbarrier_init` attr-dict

nvgpu.load_dsmem (triton::nvgpu::LoadDSmemOp)

Syntax:

operation ::= `nvgpu.load_dsmem` operands attr-dict `:` functional-type(operands, results)

Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface)

Effects: MemoryEffects::Effect{MemoryEffects::Read on ::mlir::SideEffects::DefaultResource}

Attributes:

AttributeMLIR TypeDescription
bitwidth::mlir::IntegerAttr32-bit signless integer attribute
vec::mlir::IntegerAttr32-bit signless integer attribute

Operands:

Operand

Description

addr

LLVM pointer type

ctaId

32-bit signless integer

Results:

Result

Description

result

LLVM type with size

nvgpu.mbarrier_arrive (triton::nvgpu::MBarrierArriveOp)

Syntax:

operation ::= `nvgpu.mbarrier_arrive` $mbarrier `,` $pred (`,` $ctaId^)? attr-dict `:` type($mbarrier)

Attributes:

AttributeMLIR TypeDescription
arriveType::mlir::triton::nvgpu::MBarriveTypeAttr
mbarrier arrive type, either 'normal', 'expect_tx', 'cp_async'{{% markdown %}}Enum cases: * normal (`normal`) * cp_async (`cp_async`) * expect_tx (`expect_tx`) * remote (`remote`){{% /markdown %}}
txCount::mlir::IntegerAttr32-bit signless integer attribute

Operands:

Operand

Description

mbarrier

LLVM pointer in address space 3

pred

1-bit signless integer

ctaId

32-bit signless integer

nvgpu.mbarrier_init (triton::nvgpu::MBarrierInitOp)

Syntax:

operation ::= `nvgpu.mbarrier_init` $mbarrier `,` $pred attr-dict `:` type($mbarrier)

Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface)

Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}

Attributes:

AttributeMLIR TypeDescription
count::mlir::IntegerAttr32-bit signless integer attribute

Operands:

Operand

Description

mbarrier

LLVM pointer in address space 3

pred

1-bit signless integer

nvgpu.mbarrier_wait (triton::nvgpu::MBarrierWaitOp)

Syntax:

operation ::= `nvgpu.mbarrier_wait` $mbarrier `,` $phase attr-dict `:` type(operands)

Operands:

Operand

Description

mbarrier

LLVM pointer in address space 3

phase

1-bit signless integer

nvgpu.bar_arrive (triton::nvgpu::NamedBarrierArriveOp)

Syntax:

operation ::= `nvgpu.bar_arrive` $bar `,` $numThreads attr-dict `:` type(operands)

Operands:

Operand

Description

bar

32-bit signless integer

numThreads

32-bit signless integer

nvgpu.bar_wait (triton::nvgpu::NamedBarrierWaitOp)

Syntax:

operation ::= `nvgpu.bar_wait` $bar `,` $numThreads attr-dict `:` type(operands)

Operands:

Operand

Description

bar

32-bit signless integer

numThreads

32-bit signless integer

nvgpu.offset_of_stmatrix_v4 (triton::nvgpu::OffsetOfStmatrixV4Op)

Syntax:

operation ::= `nvgpu.offset_of_stmatrix_v4` operands attr-dict `:` type(operands) `->` type($offset)

Interfaces: InferTypeOpInterface

Attributes:

AttributeMLIR TypeDescription
leadingDimOffset::mlir::IntegerAttr32-bit signless integer attribute
rowStride::mlir::IntegerAttr32-bit signless integer attribute
swizzleEnabled::mlir::IntegerAttr1-bit signless integer attribute

Operands:

Operand

Description

threadId

32-bit signless integer

rowOfWarp

32-bit signless integer

elemIdx

32-bit signless integer

Results:

Result

Description

offset

32-bit signless integer

nvgpu.offset_of_sts64 (triton::nvgpu::OffsetOfSts64Op)

Syntax:

operation ::= `nvgpu.offset_of_sts64` operands attr-dict `:` type(operands) `->` type($offset)

Interfaces: InferTypeOpInterface

Attributes:

AttributeMLIR TypeDescription
leadingDimOffset::mlir::IntegerAttr32-bit signless integer attribute
rowStride::mlir::IntegerAttr32-bit signless integer attribute
swizzleEnabled::mlir::IntegerAttr1-bit signless integer attribute

Operands:

Operand

Description

threadId

32-bit signless integer

rowOfWarp

32-bit signless integer

elemIdx

32-bit signless integer

Results:

Result

Description

offset

32-bit signless integer

nvgpu.reg_alloc (triton::nvgpu::RegAllocOp)

Syntax:

operation ::= `nvgpu.reg_alloc` operands attr-dict `:` type(operands)

Attributes:

AttributeMLIR TypeDescription
regCount::mlir::IntegerAttr32-bit signless integer attribute

nvgpu.reg_dealloc (triton::nvgpu::RegDeallocOp)

Syntax:

operation ::= `nvgpu.reg_dealloc` operands attr-dict `:` type(operands)

Attributes:

AttributeMLIR TypeDescription
regCount::mlir::IntegerAttr32-bit signless integer attribute

nvgpu.store_dsmem (triton::nvgpu::StoreDSmemOp)

Syntax:

operation ::= `nvgpu.store_dsmem` operands attr-dict `:` type(operands)

Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface)

Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}

Operands:

Operand

Description

addr

LLVM pointer type

ctaId

32-bit signless integer

values

variadic of LLVM type with size

pred

1-bit signless integer

nvgpu.stmatrix (triton::nvgpu::StoreMatrixOp)

Syntax:

operation ::= `nvgpu.stmatrix` operands attr-dict `:` type(operands)

Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface)

Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}

Operands:

Operand

Description

addr

LLVM pointer in address space 3

datas

variadic of 32-bit signless integer

nvgpu.sts64 (triton::nvgpu::Sts64Op)

Syntax:

operation ::= `nvgpu.sts64` operands attr-dict `:` type(operands)

Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface)

Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}

Operands:

Operand

Description

offset

32-bit signless integer

d0

32-bit float or 32-bit signless integer

d1

32-bit float or 32-bit signless integer

nvgpu.tma_load_im2col (triton::nvgpu::TMALoadIm2colOp)

Syntax:

operation ::= `nvgpu.tma_load_im2col` operands attr-dict `:` type(operands)

Attributes:

AttributeMLIR TypeDescription
mcastMask::mlir::IntegerAttr16-bit signless integer attribute

Operands:

Operand

Description

dst

LLVM pointer in address space 3

mbarrier

LLVM pointer in address space 3

tmaDesc

LLVM pointer in address space 1

l2Desc

64-bit signless integer

im2colOffsets

LLVM structure type

pred

1-bit signless integer

coords

variadic of 32-bit signless integer

nvgpu.tma_load_tiled (triton::nvgpu::TMALoadTiledOp)

Syntax:

operation ::= `nvgpu.tma_load_tiled` operands attr-dict `:` type(operands)

Traits: AttrSizedOperandSegments

Operands:

Operand

Description

dst

LLVM pointer in address space 3

mbarrier

LLVM pointer in address space 3

tmaDesc

LLVM pointer in address space 1

l2Desc

64-bit signless integer

pred

1-bit signless integer

coords

variadic of 32-bit signless integer

mcastMask

16-bit signless integer

nvgpu.tma_store_tiled (triton::nvgpu::TMAStoreTiledOp)

Syntax:

operation ::= `nvgpu.tma_store_tiled` operands attr-dict `:` type(operands)

Interfaces: MemoryEffectOpInterface (MemoryEffectOpInterface)

Effects: MemoryEffects::Effect{MemoryEffects::Write on ::mlir::SideEffects::DefaultResource}

Operands:

Operand

Description

tmaDesc

LLVM pointer in address space 1

src

LLVM pointer in address space 3

pred

1-bit signless integer

coords

variadic of 32-bit signless integer

nvgpu.wgmma_commit_group (triton::nvgpu::WGMMACommitGroupOp)

Syntax:

operation ::= `nvgpu.wgmma_commit_group` attr-dict

nvgpu.wgmma_desc_create (triton::nvgpu::WGMMADescCreateOp)

Syntax:

operation ::= `nvgpu.wgmma_desc_create` $buffer `,` $height attr-dict `:` functional-type(operands, results)

Interfaces: InferTypeOpInterface

Attributes:

AttributeMLIR TypeDescription
mode::mlir::triton::nvgpu::WGMMADescModeAttr
wgmma desc mode, either 'none', 'swizzle128', 'swizzle64', or 'swizzle32'{{% markdown %}}Enum cases: * none (`none`) * swizzle128 (`swizzle128`) * swizzle64 (`swizzle64`) * swizzle32 (`swizzle32`){{% /markdown %}}
swizzling::mlir::IntegerAttr64-bit signless integer attribute

Operands:

Operand

Description

buffer

LLVM pointer type

height

32-bit signless integer

Results:

Result

Description

res

64-bit signless integer

nvgpu.wgmma_fence (triton::nvgpu::WGMMAFenceOp)

Syntax:

operation ::= `nvgpu.wgmma_fence` attr-dict

nvgpu.wgmma (triton::nvgpu::WGMMAOp)

Syntax:

operation ::= `nvgpu.wgmma` $opA `,` $opB (`,` $opC^)? attr-dict `:` functional-type(operands, $res)

Attributes:

AttributeMLIR TypeDescription
m::mlir::IntegerAttr32-bit signless integer attribute
n::mlir::IntegerAttr32-bit signless integer attribute
k::mlir::IntegerAttr32-bit signless integer attribute
eltTypeC::mlir::triton::nvgpu::WGMMAEltTypeAttr
wgmma operand type, either 's8', 's32', 'e4m3', 'e5m2', 'f16', 'bf16', 'tf32', or 'f32'{{% markdown %}}Enum cases: * s8 (`s8`) * s32 (`s32`) * e4m3 (`e4m3`) * e5m2 (`e5m2`) * f16 (`f16`) * bf16 (`bf16`) * tf32 (`tf32`) * f32 (`f32`){{% /markdown %}}
eltTypeA::mlir::triton::nvgpu::WGMMAEltTypeAttr
wgmma operand type, either 's8', 's32', 'e4m3', 'e5m2', 'f16', 'bf16', 'tf32', or 'f32'{{% markdown %}}Enum cases: * s8 (`s8`) * s32 (`s32`) * e4m3 (`e4m3`) * e5m2 (`e5m2`) * f16 (`f16`) * bf16 (`bf16`) * tf32 (`tf32`) * f32 (`f32`){{% /markdown %}}
eltTypeB::mlir::triton::nvgpu::WGMMAEltTypeAttr
wgmma operand type, either 's8', 's32', 'e4m3', 'e5m2', 'f16', 'bf16', 'tf32', or 'f32'{{% markdown %}}Enum cases: * s8 (`s8`) * s32 (`s32`) * e4m3 (`e4m3`) * e5m2 (`e5m2`) * f16 (`f16`) * bf16 (`bf16`) * tf32 (`tf32`) * f32 (`f32`){{% /markdown %}}
layoutA::mlir::triton::nvgpu::WGMMALayoutAttr
wgmma layout, either 'row' or 'col'{{% markdown %}}Enum cases: * row (`row`) * col (`col`){{% /markdown %}}
layoutB::mlir::triton::nvgpu::WGMMALayoutAttr
wgmma layout, either 'row' or 'col'{{% markdown %}}Enum cases: * row (`row`) * col (`col`){{% /markdown %}}

Operands:

Operand

Description

opA

wgmma operand A/B type

opB

wgmma operand A/B type

opC

LLVM structure type

Results:

Result

Description

res

LLVM structure type

nvgpu.wgmma_wait_group (triton::nvgpu::WGMMAWaitGroupOp)

Syntax:

operation ::= `nvgpu.wgmma_wait_group` $input attr-dict `:` type($input)

Interfaces: InferTypeOpInterface

Attributes:

AttributeMLIR TypeDescription
pendings::mlir::IntegerAttr32-bit signless integer attribute

Operands:

Operand

Description

input

LLVM structure type

Results:

Result

Description

output

LLVM structure type