Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion asm/arm64/encoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ var floatBinaryOpcodes = map[Op]struct{ single, double uint32 }{
OpFSUB: {0x1E203800, 0x1E603800},
OpFMUL: {0x1E200800, 0x1E600800},
OpFDIV: {0x1E201800, 0x1E601800},
OpFMIN: {0x1E205800, 0x1E605800},
OpFMAX: {0x1E204800, 0x1E604800},
}

// floatTernaryOpcodes maps each 4-register scalar float opcode (FMADD-family)
Expand Down Expand Up @@ -585,10 +587,25 @@ func (e *Encoder) Encode(inst asm.Instruction) ([]byte, error) {
// Float — arithmetic (double precision)
// -----------------------------------------------------------------------

case OpFADD, OpFSUB, OpFMUL, OpFDIV:
case OpFADD, OpFSUB, OpFMUL, OpFDIV, OpFMIN, OpFMAX:
fb := floatBinaryOpcodes[op]
return e.encodeFloatBinary(fb.single, fb.double, inst)

// -----------------------------------------------------------------------
// SIMD (fixed 8B arrangement): CNT, ADDV
// -----------------------------------------------------------------------

case OpCNT, OpADDV:
d, n, err := e.decodeReg2(inst)
if err != nil {
return nil, err
}
base := uint32(0x0E205800) // CNT Vd.8B, Vn.8B
if op == OpADDV {
base = 0x0E31B800 // ADDV Bd, Vn.8B
}
return enc(base | reg(n)<<5 | reg(d)), nil

case OpFMADD, OpFMSUB, OpFNMADD, OpFNMSUB:
ft := floatTernaryOpcodes[op]
return e.encodeFloatTernary(ft.single, ft.double, inst)
Expand Down
6 changes: 6 additions & 0 deletions asm/arm64/encoder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@ func TestEncoder_Encode(t *testing.T) {
{"FSUB D1,D2,D3", FSUB(D1, D2, D3), 0x1E633841},
{"FMUL D1,D2,D3", FMUL(D1, D2, D3), 0x1E630841},
{"FDIV D1,D2,D3", FDIV(D1, D2, D3), 0x1E631841},
{"FMIN D1,D2,D3", FMIN(D1, D2, D3), 0x1E635841},
{"FMIN S1,S2,S3", FMIN(S1, S2, S3), 0x1E235841},
{"FMAX D1,D2,D3", FMAX(D1, D2, D3), 0x1E634841},
{"FMAX S1,S2,S3", FMAX(S1, S2, S3), 0x1E234841},
{"CNT D1,D2", CNT(D1, D2), 0x0E205841},
{"ADDV D1,D2", ADDV(D1, D2), 0x0E31B841},
{"FMADD D0,D1,D2,D3", FMADD(D0, D1, D2, D3), 0x1F420C20},
{"FMSUB D0,D1,D2,D3", FMSUB(D0, D1, D2, D3), 0x1F428C20},
{"FNMADD D0,D1,D2,D3", FNMADD(D0, D1, D2, D3), 0x1F620C20},
Expand Down
14 changes: 14 additions & 0 deletions asm/arm64/instr.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ const (
OpFSUB
OpFMUL
OpFDIV
OpFMIN
OpFMAX
OpFMADD
OpFMSUB
OpFNMADD
Expand All @@ -131,6 +133,10 @@ const (
OpFRINTP
OpFRINTZ

// SIMD (fixed 8B arrangement)
OpCNT
OpADDV

// Float move / compare
OpFMOV
OpFCMP
Expand Down Expand Up @@ -489,6 +495,8 @@ func FADD(dst, src1, src2 asm.Reg) asm.Instruction { return newReg3(OpFADD, dst,
func FSUB(dst, src1, src2 asm.Reg) asm.Instruction { return newReg3(OpFSUB, dst, src1, src2) }
func FMUL(dst, src1, src2 asm.Reg) asm.Instruction { return newReg3(OpFMUL, dst, src1, src2) }
func FDIV(dst, src1, src2 asm.Reg) asm.Instruction { return newReg3(OpFDIV, dst, src1, src2) }
func FMIN(dst, src1, src2 asm.Reg) asm.Instruction { return newReg3(OpFMIN, dst, src1, src2) }
func FMAX(dst, src1, src2 asm.Reg) asm.Instruction { return newReg3(OpFMAX, dst, src1, src2) }

// FMADD Dd, Dn, Dm, Da → Dd = Da + Dn*Dm
func FMADD(dst, src1, src2, acc asm.Reg) asm.Instruction {
Expand Down Expand Up @@ -519,6 +527,12 @@ func FRINTM(dst, src asm.Reg) asm.Instruction { return newReg2(OpFRINTM, dst, sr
func FRINTP(dst, src asm.Reg) asm.Instruction { return newReg2(OpFRINTP, dst, src) }
func FRINTZ(dst, src asm.Reg) asm.Instruction { return newReg2(OpFRINTZ, dst, src) }

// CNT Vd.8B, Vn.8B → per-byte population count.
// ADDV Bd, Vn.8B → sum the 8 byte lanes into the low byte of Vd.
// Both take SIMD V registers (fixed 8-byte arrangement).
func CNT(dst, src asm.Reg) asm.Instruction { return newReg2(OpCNT, dst, src) }
func ADDV(dst, src asm.Reg) asm.Instruction { return newReg2(OpADDV, dst, src) }

// ---------------------------------------------------------------------------
// Float-point move / compare
// ---------------------------------------------------------------------------
Expand Down
60 changes: 52 additions & 8 deletions docs/instruction-set.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ Offsets are signed 16-bit values encoded little-endian. `BR 5` skips 5 bytes pas
| `BR_TABLE` | `{-2, 2}` | `index →` | ◐ | Jump table; negative or out-of-range index uses default target. JIT only for simple stack shapes. |
| `CALL` | `{}` | `fn →` | ◐ | Call `*Function`, `*HostFunction`, or `*Closure`; trace JIT lowers observed direct calls, small same-arity function-value indirect dispatches, and eligible closure-body calls to native `BL`. Host calls and misses fall back. |
| `RETURN` | `{}` | `→` | ◐ | Return from current frame; trace JIT lowers entry returns and stitches inlined callee returns. |
| `RETURN_CALL` | `{}` | `args… fn →` | ◐ | Tail call: pops args + funcref like `CALL`, but reuses the current frame so tail recursion runs in constant frame depth. Above the entry frame the frame is replaced in place; at the entry frame a new frame is pushed (callee returns to the entry frame normally). Target must be a `*Function` or `*Closure`; a host-function target is invoked in place and its results returned. Result arity should match the current function's. Trace JIT lowers plain-function targets: a tail call back to the trace anchor becomes a native loop back-edge (self/mutual recursion in constant depth), a tail call to another function morphs the frame in place. Host and closure targets fall back. |

## Variables

Expand Down Expand Up @@ -129,6 +130,13 @@ A `ref`-typed slot is the VM's dynamic ("any") type: it holds any `Boxed` — an
| `I32_AND` | `{}` | `a b → i32` | ✅ | Bitwise AND. |
| `I32_OR` | `{}` | `a b → i32` | ✅ | Bitwise OR. |
| `I32_XOR` | `{}` | `a b → i32` | ✅ | Bitwise XOR. |
| `I32_CLZ` | `{}` | `x → i32` | ✅ | Count leading zero bits (`32` if `x == 0`). |
| `I32_CTZ` | `{}` | `x → i32` | ✅ | Count trailing zero bits (`32` if `x == 0`). |
| `I32_POPCNT` | `{}` | `x → i32` | ✅ | Count set bits. |
| `I32_ROTL` | `{}` | `a b → i32` | ✅ | Rotate `a` left by `b` (modulo 32). |
| `I32_ROTR` | `{}` | `a b → i32` | ✅ | Rotate `a` right by `b` (modulo 32). |
| `I32_EXTEND8_S` | `{}` | `x → i32` | ✅ | Sign-extend low 8 bits to i32. |
| `I32_EXTEND16_S` | `{}` | `x → i32` | ✅ | Sign-extend low 16 bits to i32. |
| `I32_EQZ` | `{}` | `x → i32` | ✅ | Push `I32(1)` if zero. |
| `I32_EQ` | `{}` | `a b → i32` | ✅ | Equality comparison. |
| `I32_NE` | `{}` | `a b → i32` | ✅ | Inequality comparison. |
Expand All @@ -146,6 +154,7 @@ A `ref`-typed slot is the VM's dynamic ("any") type: it holds any `Boxed` — an
| `I32_TO_F32_U` | `{}` | `i32 → f32` | ✅ | Convert unsigned i32 to f32. |
| `I32_TO_F64_S` | `{}` | `i32 → f64` | ✅ | Convert signed i32 to f64. |
| `I32_TO_F64_U` | `{}` | `i32 → f64` | ✅ | Convert unsigned i32 to f64. |
| `I32_REINTERPRET_F32` | `{}` | `f32 → i32` | ✅ | Reinterpret f32 bit pattern as i32 (no conversion). |

## i64 Operations

Expand All @@ -162,13 +171,25 @@ A `ref`-typed slot is the VM's dynamic ("any") type: it holds any `Boxed` — an
| `I64_SHL` | `{}` | `a b → i64` | ✅ | Left shift; amount uses low 6 bits. |
| `I64_SHR_S` | `{}` | `a b → i64` | ✅ | Arithmetic right shift. |
| `I64_SHR_U` | `{}` | `a b → i64` | ✅ | Logical right shift. |
| `I64_XOR` | `{}` | `a b → i64` | ✅ | Bitwise XOR. |
| `I64_AND` | `{}` | `a b → i64` | ✅ | Bitwise AND. |
| `I64_OR` | `{}` | `a b → i64` | ✅ | Bitwise OR. |
| `I64_CLZ` | `{}` | `x → i64` | ✅ | Count leading zero bits (`64` if `x == 0`). |
| `I64_CTZ` | `{}` | `x → i64` | ✅ | Count trailing zero bits (`64` if `x == 0`). |
| `I64_POPCNT` | `{}` | `x → i64` | ✅ | Count set bits. |
| `I64_ROTL` | `{}` | `a b → i64` | ✅ | Rotate `a` left by `b` (modulo 64). |
| `I64_ROTR` | `{}` | `a b → i64` | ✅ | Rotate `a` right by `b` (modulo 64). |
| `I64_EXTEND8_S` | `{}` | `x → i64` | ✅ | Sign-extend low 8 bits to i64. |
| `I64_EXTEND16_S` | `{}` | `x → i64` | ✅ | Sign-extend low 16 bits to i64. |
| `I64_EXTEND32_S` | `{}` | `x → i64` | ✅ | Sign-extend low 32 bits to i64. |
| `I64_EQZ` | `{}` | `x → i32` | ✅ | Push `I32(1)` if zero. |
| `I64_EQ` … `I64_GE_U` | `{}` | `a b → i32` | ✅ | Same semantics as i32 comparisons. |
| `I64_TO_I32` | `{}` | `i64 → i32` | ✅ | Truncate to low 32 bits. |
| `I64_TO_F32_S` | `{}` | `i64 → f32` | ✅ | Convert signed i64 to f32. |
| `I64_TO_F32_U` | `{}` | `i64 → f32` | ✅ | Convert unsigned i64 to f32. |
| `I64_TO_F64_S` | `{}` | `i64 → f64` | ✅ | Convert signed i64 to f64. |
| `I64_TO_F64_U` | `{}` | `i64 → f64` | ✅ | Convert unsigned i64 to f64. |
| `I64_REINTERPRET_F64` | `{}` | `f64 → i64` | ✅ | Reinterpret f64 bit pattern as i64 (no conversion). |

## f32 Operations

Expand All @@ -179,12 +200,23 @@ A `ref`-typed slot is the VM's dynamic ("any") type: it holds any `Boxed` — an
| `F32_SUB` | `{}` | `a b → f32` | ✅ | Floating-point subtraction. |
| `F32_MUL` | `{}` | `a b → f32` | ✅ | Floating-point multiplication. |
| `F32_DIV` | `{}` | `a b → f32` | ✅ | Floating-point division. |
| `F32_ABS` | `{}` | `x → f32` | ✅ | Absolute value (clears sign bit). |
| `F32_NEG` | `{}` | `x → f32` | ✅ | Negate (flips sign bit, incl. NaN). |
| `F32_SQRT` | `{}` | `x → f32` | ✅ | Square root. |
| `F32_CEIL` | `{}` | `x → f32` | ✅ | Round toward +∞. |
| `F32_FLOOR` | `{}` | `x → f32` | ✅ | Round toward −∞. |
| `F32_TRUNC` | `{}` | `x → f32` | ✅ | Round toward zero. |
| `F32_NEAREST` | `{}` | `x → f32` | ✅ | Round to nearest, ties to even. |
| `F32_MIN` | `{}` | `a b → f32` | ✅ | Minimum; NaN propagates, `min(-0,+0)=-0`. |
| `F32_MAX` | `{}` | `a b → f32` | ✅ | Maximum; NaN propagates, `max(-0,+0)=+0`. |
| `F32_COPYSIGN` | `{}` | `a b → f32` | ✅ | Magnitude of `a` with sign of `b`. |
| `F32_EQ` … `F32_GE` | `{}` | `a b → i32` | ✅ | Floating-point comparisons. |
| `F32_TO_I32_S` | `{}` | `f32 → i32` | ✅ | Truncate to signed i32. |
| `F32_TO_I32_U` | `{}` | `f32 → i32` | ✅ | Truncate to unsigned i32. |
| `F32_TO_I64_S` | `{}` | `f32 → i64` | ✅ | Truncate to signed i64. |
| `F32_TO_I64_U` | `{}` | `f32 → i64` | ✅ | Truncate to unsigned i64. |
| `F32_TO_I32_S` | `{}` | `f32 → i32` | ✅ | Truncate toward zero to signed i32, saturating (NaN→0, out-of-range→nearest bound). |
| `F32_TO_I32_U` | `{}` | `f32 → i32` | ✅ | Truncate toward zero to unsigned i32, saturating (NaN/negative→0, overflow→`u32` max). |
| `F32_TO_I64_S` | `{}` | `f32 → i64` | ✅ | Truncate toward zero to signed i64, saturating (NaN→0, out-of-range→nearest bound). |
| `F32_TO_I64_U` | `{}` | `f32 → i64` | ✅ | Truncate toward zero to unsigned i64, saturating (NaN/negative→0, overflow→`u64` max). |
| `F32_TO_F64` | `{}` | `f32 → f64` | ✅ | Widen f32 to f64. |
| `F32_REINTERPRET_I32` | `{}` | `i32 → f32` | ✅ | Reinterpret i32 bit pattern as f32 (no conversion). |

## f64 Operations

Expand All @@ -195,12 +227,23 @@ A `ref`-typed slot is the VM's dynamic ("any") type: it holds any `Boxed` — an
| `F64_SUB` | `{}` | `a b → f64` | ✅ | Floating-point subtraction. |
| `F64_MUL` | `{}` | `a b → f64` | ✅ | Floating-point multiplication. |
| `F64_DIV` | `{}` | `a b → f64` | ✅ | Floating-point division. |
| `F64_ABS` | `{}` | `x → f64` | ✅ | Absolute value (clears sign bit). |
| `F64_NEG` | `{}` | `x → f64` | ✅ | Negate (flips sign bit, incl. NaN). |
| `F64_SQRT` | `{}` | `x → f64` | ✅ | Square root. |
| `F64_CEIL` | `{}` | `x → f64` | ✅ | Round toward +∞. |
| `F64_FLOOR` | `{}` | `x → f64` | ✅ | Round toward −∞. |
| `F64_TRUNC` | `{}` | `x → f64` | ✅ | Round toward zero. |
| `F64_NEAREST` | `{}` | `x → f64` | ✅ | Round to nearest, ties to even. |
| `F64_MIN` | `{}` | `a b → f64` | ✅ | Minimum; NaN propagates, `min(-0,+0)=-0`. |
| `F64_MAX` | `{}` | `a b → f64` | ✅ | Maximum; NaN propagates, `max(-0,+0)=+0`. |
| `F64_COPYSIGN` | `{}` | `a b → f64` | ✅ | Magnitude of `a` with sign of `b`. |
| `F64_EQ` … `F64_GE` | `{}` | `a b → i32` | ✅ | Floating-point comparisons. |
| `F64_TO_I32_S` | `{}` | `f64 → i32` | ✅ | Truncate to signed i32. |
| `F64_TO_I32_U` | `{}` | `f64 → i32` | ✅ | Truncate to unsigned i32. |
| `F64_TO_I64_S` | `{}` | `f64 → i64` | ✅ | Truncate to signed i64. |
| `F64_TO_I64_U` | `{}` | `f64 → i64` | ✅ | Truncate to unsigned i64. |
| `F64_TO_I32_S` | `{}` | `f64 → i32` | ✅ | Truncate toward zero to signed i32, saturating (NaN→0, out-of-range→nearest bound). |
| `F64_TO_I32_U` | `{}` | `f64 → i32` | ✅ | Truncate toward zero to unsigned i32, saturating (NaN/negative→0, overflow→`u32` max). |
| `F64_TO_I64_S` | `{}` | `f64 → i64` | ✅ | Truncate toward zero to signed i64, saturating (NaN→0, out-of-range→nearest bound). |
| `F64_TO_I64_U` | `{}` | `f64 → i64` | ✅ | Truncate toward zero to unsigned i64, saturating (NaN/negative→0, overflow→`u64` max). |
| `F64_TO_F32` | `{}` | `f64 → f32` | ✅ | Narrow f64 to f32. |
| `F64_REINTERPRET_I64` | `{}` | `i64 → f64` | ⬜ | Reinterpret i64 bit pattern as f64 (no conversion). |

## String Operations

Expand Down Expand Up @@ -249,3 +292,4 @@ Map keys use primitive value identity for `i32`, `i64`, `f32`, and `f64`; all re
| `MAP_SET` | `{}` | `map key value →` | ◐ | Insert or replace entry. JIT keeps framed entries by exiting locally to the threaded handler. |
| `MAP_DELETE` | `{}` | `map key →` | ◐ | Delete entry; missing key is a no-op. JIT keeps framed entries by exiting locally to the threaded handler. |
| `MAP_CLEAR` | `{}` | `map →` | ◐ | Delete all entries. JIT keeps framed entries by exiting locally to the threaded handler. |
| `MAP_KEYS` | `{}` | `map → array` | ⬜ | Snapshot keys into a new `[]K` array (`K` = map key type), in unspecified order. Enables guest map iteration with `ARRAY_LEN`/`ARRAY_GET` + `MAP_GET`. |
55 changes: 55 additions & 0 deletions instr/opcode.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ const (

CALL
RETURN
RETURN_CALL

GLOBAL_GET
GLOBAL_SET
Expand Down Expand Up @@ -74,6 +75,15 @@ const (
I32_AND
I32_OR

I32_CLZ
I32_CTZ
I32_POPCNT
I32_ROTL
I32_ROTR

I32_EXTEND8_S
I32_EXTEND16_S

I32_EQZ
I32_EQ
I32_NE
Expand All @@ -93,6 +103,8 @@ const (
I32_TO_F64_U
I32_TO_F64_S

I32_REINTERPRET_F32

I64_CONST

I64_ADD
Expand All @@ -106,6 +118,20 @@ const (
I64_SHR_S
I64_SHR_U

I64_XOR
I64_AND
I64_OR

I64_CLZ
I64_CTZ
I64_POPCNT
I64_ROTL
I64_ROTR

I64_EXTEND8_S
I64_EXTEND16_S
I64_EXTEND32_S

I64_EQZ
I64_EQ
I64_NE
Expand All @@ -124,13 +150,26 @@ const (
I64_TO_F64_S
I64_TO_F64_U

I64_REINTERPRET_F64

F32_CONST

F32_ADD
F32_SUB
F32_MUL
F32_DIV

F32_ABS
F32_NEG
F32_SQRT
F32_CEIL
F32_FLOOR
F32_TRUNC
F32_NEAREST
F32_MIN
F32_MAX
F32_COPYSIGN

F32_EQ
F32_NE
F32_LT
Expand All @@ -144,13 +183,26 @@ const (
F32_TO_I64_U
F32_TO_F64

F32_REINTERPRET_I32

F64_CONST

F64_ADD
F64_SUB
F64_MUL
F64_DIV

F64_ABS
F64_NEG
F64_SQRT
F64_CEIL
F64_FLOOR
F64_TRUNC
F64_NEAREST
F64_MIN
F64_MAX
F64_COPYSIGN

F64_EQ
F64_NE
F64_LT
Expand All @@ -164,6 +216,8 @@ const (
F64_TO_I64_U
F64_TO_F32

F64_REINTERPRET_I64

STRING_NEW_UTF32

STRING_LEN
Expand Down Expand Up @@ -202,6 +256,7 @@ const (
MAP_SET
MAP_DELETE
MAP_CLEAR
MAP_KEYS

CLOSURE_NEW
)
Loading
Loading