However, the enum tree is not supported since they do not support them. But other than that, mysql and maria DB seem to both be supported.
380 lines
5.7 KiB
ArmAsm
380 lines
5.7 KiB
ArmAsm
// Code generated by command: go run fe_amd64_asm.go -out ../fe_amd64.s -stubs ../fe_amd64.go -pkg field. DO NOT EDIT.
|
||
|
||
//go:build amd64 && gc && !purego
|
||
// +build amd64,gc,!purego
|
||
|
||
#include "textflag.h"
|
||
|
||
// func feMul(out *Element, a *Element, b *Element)
|
||
TEXT ·feMul(SB), NOSPLIT, $0-24
|
||
MOVQ a+8(FP), CX
|
||
MOVQ b+16(FP), BX
|
||
|
||
// r0 = a0×b0
|
||
MOVQ (CX), AX
|
||
MULQ (BX)
|
||
MOVQ AX, DI
|
||
MOVQ DX, SI
|
||
|
||
// r0 += 19×a1×b4
|
||
MOVQ 8(CX), AX
|
||
IMUL3Q $0x13, AX, AX
|
||
MULQ 32(BX)
|
||
ADDQ AX, DI
|
||
ADCQ DX, SI
|
||
|
||
// r0 += 19×a2×b3
|
||
MOVQ 16(CX), AX
|
||
IMUL3Q $0x13, AX, AX
|
||
MULQ 24(BX)
|
||
ADDQ AX, DI
|
||
ADCQ DX, SI
|
||
|
||
// r0 += 19×a3×b2
|
||
MOVQ 24(CX), AX
|
||
IMUL3Q $0x13, AX, AX
|
||
MULQ 16(BX)
|
||
ADDQ AX, DI
|
||
ADCQ DX, SI
|
||
|
||
// r0 += 19×a4×b1
|
||
MOVQ 32(CX), AX
|
||
IMUL3Q $0x13, AX, AX
|
||
MULQ 8(BX)
|
||
ADDQ AX, DI
|
||
ADCQ DX, SI
|
||
|
||
// r1 = a0×b1
|
||
MOVQ (CX), AX
|
||
MULQ 8(BX)
|
||
MOVQ AX, R9
|
||
MOVQ DX, R8
|
||
|
||
// r1 += a1×b0
|
||
MOVQ 8(CX), AX
|
||
MULQ (BX)
|
||
ADDQ AX, R9
|
||
ADCQ DX, R8
|
||
|
||
// r1 += 19×a2×b4
|
||
MOVQ 16(CX), AX
|
||
IMUL3Q $0x13, AX, AX
|
||
MULQ 32(BX)
|
||
ADDQ AX, R9
|
||
ADCQ DX, R8
|
||
|
||
// r1 += 19×a3×b3
|
||
MOVQ 24(CX), AX
|
||
IMUL3Q $0x13, AX, AX
|
||
MULQ 24(BX)
|
||
ADDQ AX, R9
|
||
ADCQ DX, R8
|
||
|
||
// r1 += 19×a4×b2
|
||
MOVQ 32(CX), AX
|
||
IMUL3Q $0x13, AX, AX
|
||
MULQ 16(BX)
|
||
ADDQ AX, R9
|
||
ADCQ DX, R8
|
||
|
||
// r2 = a0×b2
|
||
MOVQ (CX), AX
|
||
MULQ 16(BX)
|
||
MOVQ AX, R11
|
||
MOVQ DX, R10
|
||
|
||
// r2 += a1×b1
|
||
MOVQ 8(CX), AX
|
||
MULQ 8(BX)
|
||
ADDQ AX, R11
|
||
ADCQ DX, R10
|
||
|
||
// r2 += a2×b0
|
||
MOVQ 16(CX), AX
|
||
MULQ (BX)
|
||
ADDQ AX, R11
|
||
ADCQ DX, R10
|
||
|
||
// r2 += 19×a3×b4
|
||
MOVQ 24(CX), AX
|
||
IMUL3Q $0x13, AX, AX
|
||
MULQ 32(BX)
|
||
ADDQ AX, R11
|
||
ADCQ DX, R10
|
||
|
||
// r2 += 19×a4×b3
|
||
MOVQ 32(CX), AX
|
||
IMUL3Q $0x13, AX, AX
|
||
MULQ 24(BX)
|
||
ADDQ AX, R11
|
||
ADCQ DX, R10
|
||
|
||
// r3 = a0×b3
|
||
MOVQ (CX), AX
|
||
MULQ 24(BX)
|
||
MOVQ AX, R13
|
||
MOVQ DX, R12
|
||
|
||
// r3 += a1×b2
|
||
MOVQ 8(CX), AX
|
||
MULQ 16(BX)
|
||
ADDQ AX, R13
|
||
ADCQ DX, R12
|
||
|
||
// r3 += a2×b1
|
||
MOVQ 16(CX), AX
|
||
MULQ 8(BX)
|
||
ADDQ AX, R13
|
||
ADCQ DX, R12
|
||
|
||
// r3 += a3×b0
|
||
MOVQ 24(CX), AX
|
||
MULQ (BX)
|
||
ADDQ AX, R13
|
||
ADCQ DX, R12
|
||
|
||
// r3 += 19×a4×b4
|
||
MOVQ 32(CX), AX
|
||
IMUL3Q $0x13, AX, AX
|
||
MULQ 32(BX)
|
||
ADDQ AX, R13
|
||
ADCQ DX, R12
|
||
|
||
// r4 = a0×b4
|
||
MOVQ (CX), AX
|
||
MULQ 32(BX)
|
||
MOVQ AX, R15
|
||
MOVQ DX, R14
|
||
|
||
// r4 += a1×b3
|
||
MOVQ 8(CX), AX
|
||
MULQ 24(BX)
|
||
ADDQ AX, R15
|
||
ADCQ DX, R14
|
||
|
||
// r4 += a2×b2
|
||
MOVQ 16(CX), AX
|
||
MULQ 16(BX)
|
||
ADDQ AX, R15
|
||
ADCQ DX, R14
|
||
|
||
// r4 += a3×b1
|
||
MOVQ 24(CX), AX
|
||
MULQ 8(BX)
|
||
ADDQ AX, R15
|
||
ADCQ DX, R14
|
||
|
||
// r4 += a4×b0
|
||
MOVQ 32(CX), AX
|
||
MULQ (BX)
|
||
ADDQ AX, R15
|
||
ADCQ DX, R14
|
||
|
||
// First reduction chain
|
||
MOVQ $0x0007ffffffffffff, AX
|
||
SHLQ $0x0d, DI, SI
|
||
SHLQ $0x0d, R9, R8
|
||
SHLQ $0x0d, R11, R10
|
||
SHLQ $0x0d, R13, R12
|
||
SHLQ $0x0d, R15, R14
|
||
ANDQ AX, DI
|
||
IMUL3Q $0x13, R14, R14
|
||
ADDQ R14, DI
|
||
ANDQ AX, R9
|
||
ADDQ SI, R9
|
||
ANDQ AX, R11
|
||
ADDQ R8, R11
|
||
ANDQ AX, R13
|
||
ADDQ R10, R13
|
||
ANDQ AX, R15
|
||
ADDQ R12, R15
|
||
|
||
// Second reduction chain (carryPropagate)
|
||
MOVQ DI, SI
|
||
SHRQ $0x33, SI
|
||
MOVQ R9, R8
|
||
SHRQ $0x33, R8
|
||
MOVQ R11, R10
|
||
SHRQ $0x33, R10
|
||
MOVQ R13, R12
|
||
SHRQ $0x33, R12
|
||
MOVQ R15, R14
|
||
SHRQ $0x33, R14
|
||
ANDQ AX, DI
|
||
IMUL3Q $0x13, R14, R14
|
||
ADDQ R14, DI
|
||
ANDQ AX, R9
|
||
ADDQ SI, R9
|
||
ANDQ AX, R11
|
||
ADDQ R8, R11
|
||
ANDQ AX, R13
|
||
ADDQ R10, R13
|
||
ANDQ AX, R15
|
||
ADDQ R12, R15
|
||
|
||
// Store output
|
||
MOVQ out+0(FP), AX
|
||
MOVQ DI, (AX)
|
||
MOVQ R9, 8(AX)
|
||
MOVQ R11, 16(AX)
|
||
MOVQ R13, 24(AX)
|
||
MOVQ R15, 32(AX)
|
||
RET
|
||
|
||
// func feSquare(out *Element, a *Element)
|
||
TEXT ·feSquare(SB), NOSPLIT, $0-16
|
||
MOVQ a+8(FP), CX
|
||
|
||
// r0 = l0×l0
|
||
MOVQ (CX), AX
|
||
MULQ (CX)
|
||
MOVQ AX, SI
|
||
MOVQ DX, BX
|
||
|
||
// r0 += 38×l1×l4
|
||
MOVQ 8(CX), AX
|
||
IMUL3Q $0x26, AX, AX
|
||
MULQ 32(CX)
|
||
ADDQ AX, SI
|
||
ADCQ DX, BX
|
||
|
||
// r0 += 38×l2×l3
|
||
MOVQ 16(CX), AX
|
||
IMUL3Q $0x26, AX, AX
|
||
MULQ 24(CX)
|
||
ADDQ AX, SI
|
||
ADCQ DX, BX
|
||
|
||
// r1 = 2×l0×l1
|
||
MOVQ (CX), AX
|
||
SHLQ $0x01, AX
|
||
MULQ 8(CX)
|
||
MOVQ AX, R8
|
||
MOVQ DX, DI
|
||
|
||
// r1 += 38×l2×l4
|
||
MOVQ 16(CX), AX
|
||
IMUL3Q $0x26, AX, AX
|
||
MULQ 32(CX)
|
||
ADDQ AX, R8
|
||
ADCQ DX, DI
|
||
|
||
// r1 += 19×l3×l3
|
||
MOVQ 24(CX), AX
|
||
IMUL3Q $0x13, AX, AX
|
||
MULQ 24(CX)
|
||
ADDQ AX, R8
|
||
ADCQ DX, DI
|
||
|
||
// r2 = 2×l0×l2
|
||
MOVQ (CX), AX
|
||
SHLQ $0x01, AX
|
||
MULQ 16(CX)
|
||
MOVQ AX, R10
|
||
MOVQ DX, R9
|
||
|
||
// r2 += l1×l1
|
||
MOVQ 8(CX), AX
|
||
MULQ 8(CX)
|
||
ADDQ AX, R10
|
||
ADCQ DX, R9
|
||
|
||
// r2 += 38×l3×l4
|
||
MOVQ 24(CX), AX
|
||
IMUL3Q $0x26, AX, AX
|
||
MULQ 32(CX)
|
||
ADDQ AX, R10
|
||
ADCQ DX, R9
|
||
|
||
// r3 = 2×l0×l3
|
||
MOVQ (CX), AX
|
||
SHLQ $0x01, AX
|
||
MULQ 24(CX)
|
||
MOVQ AX, R12
|
||
MOVQ DX, R11
|
||
|
||
// r3 += 2×l1×l2
|
||
MOVQ 8(CX), AX
|
||
IMUL3Q $0x02, AX, AX
|
||
MULQ 16(CX)
|
||
ADDQ AX, R12
|
||
ADCQ DX, R11
|
||
|
||
// r3 += 19×l4×l4
|
||
MOVQ 32(CX), AX
|
||
IMUL3Q $0x13, AX, AX
|
||
MULQ 32(CX)
|
||
ADDQ AX, R12
|
||
ADCQ DX, R11
|
||
|
||
// r4 = 2×l0×l4
|
||
MOVQ (CX), AX
|
||
SHLQ $0x01, AX
|
||
MULQ 32(CX)
|
||
MOVQ AX, R14
|
||
MOVQ DX, R13
|
||
|
||
// r4 += 2×l1×l3
|
||
MOVQ 8(CX), AX
|
||
IMUL3Q $0x02, AX, AX
|
||
MULQ 24(CX)
|
||
ADDQ AX, R14
|
||
ADCQ DX, R13
|
||
|
||
// r4 += l2×l2
|
||
MOVQ 16(CX), AX
|
||
MULQ 16(CX)
|
||
ADDQ AX, R14
|
||
ADCQ DX, R13
|
||
|
||
// First reduction chain
|
||
MOVQ $0x0007ffffffffffff, AX
|
||
SHLQ $0x0d, SI, BX
|
||
SHLQ $0x0d, R8, DI
|
||
SHLQ $0x0d, R10, R9
|
||
SHLQ $0x0d, R12, R11
|
||
SHLQ $0x0d, R14, R13
|
||
ANDQ AX, SI
|
||
IMUL3Q $0x13, R13, R13
|
||
ADDQ R13, SI
|
||
ANDQ AX, R8
|
||
ADDQ BX, R8
|
||
ANDQ AX, R10
|
||
ADDQ DI, R10
|
||
ANDQ AX, R12
|
||
ADDQ R9, R12
|
||
ANDQ AX, R14
|
||
ADDQ R11, R14
|
||
|
||
// Second reduction chain (carryPropagate)
|
||
MOVQ SI, BX
|
||
SHRQ $0x33, BX
|
||
MOVQ R8, DI
|
||
SHRQ $0x33, DI
|
||
MOVQ R10, R9
|
||
SHRQ $0x33, R9
|
||
MOVQ R12, R11
|
||
SHRQ $0x33, R11
|
||
MOVQ R14, R13
|
||
SHRQ $0x33, R13
|
||
ANDQ AX, SI
|
||
IMUL3Q $0x13, R13, R13
|
||
ADDQ R13, SI
|
||
ANDQ AX, R8
|
||
ADDQ BX, R8
|
||
ANDQ AX, R10
|
||
ADDQ DI, R10
|
||
ANDQ AX, R12
|
||
ADDQ R9, R12
|
||
ANDQ AX, R14
|
||
ADDQ R11, R14
|
||
|
||
// Store output
|
||
MOVQ out+0(FP), AX
|
||
MOVQ SI, (AX)
|
||
MOVQ R8, 8(AX)
|
||
MOVQ R10, 16(AX)
|
||
MOVQ R12, 24(AX)
|
||
MOVQ R14, 32(AX)
|
||
RET
|