tridoraemu: fix crash on invalid vmem addresses examples: removed 5cubes due to unclear licensing add stuff to gitignore, licenses, README import Vivado project added missing assembly files, extended .gitignore stdlib: use DEL instead of BS
1053 lines
22 KiB
ArmAsm
1053 lines
22 KiB
ArmAsm
; Copyright 2021-2024 Sebastian Lederer. See the file LICENSE.md for details
|
|
|
|
; FLOAT32 format:
|
|
;| 31 | 30-8 | 7-0 |
|
|
;|sign| fraction | exp |
|
|
|
|
; exp has a bias of 128
|
|
; sign = 1: negative number
|
|
|
|
.EQU FLOAT32_EXPMASK %00000000000000000000000011111111
|
|
.EQU FLOAT32_FRCMASK %01111111111111111111111100000000
|
|
.EQU FLOAT32_SGNMASK %10000000000000000000000000000000
|
|
.EQU FLOAT32_FRCMSBM %01000000000000000000000000000000
|
|
.EQU FLOAT32_OVRFLOW %10000000000000000000000000000000
|
|
.EQU FLOAT32_RNDMASK %00000000000000000000000010000000
|
|
.EQU FLOAT32_RNDINCR %00000000000000000000000100000000
|
|
.EQU FLOAT32_FRCLSB %00000000000000000000000100000000
|
|
|
|
.EQU FLOAT32_MAX_EXP 255
|
|
.EQU FLOAT32_BIAS 127
|
|
.EQU FLOAT32_FRCBITS 23
|
|
.EQU FLOAT32_EXP_BITS 8
|
|
|
|
; unpack a float32 value into
|
|
; three fields: exponent, fraction, sign
|
|
; ptr is a pointer to an array with the three fields
|
|
; the sign flag is 0 or 1.
|
|
|
|
; parameters: [ floatval, ptr ]
|
|
_UNPACKFLOAT32:
|
|
OVER ; [ float, ptr, float ]
|
|
LOADC FLOAT32_EXPMASK ; [ float, ptr, float, mask ]
|
|
AND ; [ float, ptr, exp ]
|
|
STOREI 4 ; [ float, ptr + 4 ]
|
|
OVER ; [ float, ptr + 4, float ]
|
|
LOADCP FLOAT32_FRCMASK ; [ float, ptr + 4 , float, mask ]
|
|
AND ; [ float, ptr + 4 , frac ]
|
|
STOREI 4 ; [ float, ptr + 8 ]
|
|
SWAP ; [ ptr + 8, float ]
|
|
LOADCP FLOAT32_SGNMASK ; [ ptr + 8, float, mask ]
|
|
AND ; [ ptr + 8, signbit ]
|
|
LOADC 0 ; [ ptr + 8, signbit, 0 ]
|
|
CMPU NE ; [ ptr + 8, sign ]
|
|
STOREI ; [ ptr + 8 ]
|
|
DROP ; []
|
|
RET
|
|
|
|
; pack exponent, fraction, sign into a single 32-bit value
|
|
; ptr is a pointer to an array with the three fields
|
|
; parameters: [ ptr ]
|
|
; returns: float value
|
|
_PACKFLOAT32:
|
|
LOADI.S1.X2Y ; [ ptr, exp ]
|
|
OVER ; [ ptr, exp, ptr ]
|
|
INC 4 ; [ ptr, exp, ptr + 4 ]
|
|
LOADI ; [ ptr, exp, frac ]
|
|
LOADCP FLOAT32_FRCMASK ; [ ptr, exp, frac, mask ]
|
|
AND ; [ ptr, exp, frac ]
|
|
OR ; [ ptr, exp|frac ]
|
|
OVER ; [ ptr, exp|frac, ptr ]
|
|
INC 8 ; [ ptr, exp|frac, ptr + 8]
|
|
LOADI ; [ ptr, exp|frac, signflag ]
|
|
CBRANCH.Z PACKFL_1 ; [ ptr, exp | frac ]
|
|
LOADCP FLOAT32_SGNMASK ; [ ptr, exp|frac, signmask ]
|
|
ADD ; [ ptr, floatval ]
|
|
PACKFL_1:
|
|
SWAP ; [ floatval, ptr ]
|
|
DROP ; [ floatval ]
|
|
RET
|
|
|
|
; adds to floating point values a and b
|
|
; parameters: [ a, b ]
|
|
; returns: sum of a and b
|
|
.EQU FL_E_A 0
|
|
.EQU FL_F_A 4
|
|
.EQU FL_S_A 8
|
|
.EQU FL_E_B 12
|
|
.EQU FL_F_B 16
|
|
.EQU FL_S_B 20
|
|
.EQU FL_E_R 24
|
|
.EQU FL_F_R 28
|
|
.EQU FL_S_R 32
|
|
.EQU ADDFL_SUB 36
|
|
.EQU ADDFL_FS 40
|
|
|
|
_ADDFLOAT32:
|
|
FPADJ -ADDFL_FS
|
|
; unpack b into e_b and f_b
|
|
LOADREG FP
|
|
LOADC FL_E_B
|
|
ADD ; addr of FL_E_B
|
|
LOADCP _UNPACKFLOAT32
|
|
CALL
|
|
; unpack a into e_a and f_a
|
|
LOADREG FP ; addr of FL_E_A
|
|
LOADCP _UNPACKFLOAT32
|
|
CALL
|
|
|
|
|
|
; if abs(a) < abs(b), swap a and b (this includes e_a < e_b)
|
|
LOAD FL_E_A
|
|
LOAD FL_E_B
|
|
CMPU GT
|
|
CBRANCH ADDFL_1 ; don't swap if e_a > e_b
|
|
; e_a <= e_b
|
|
; check if e_a < e_b
|
|
LOAD FL_E_A
|
|
LOAD FL_E_B
|
|
CMPU LT
|
|
CBRANCH ADDFL_1_1 ; swap if e_a < e_b
|
|
; e_a = e_b
|
|
; check fractions
|
|
LOAD FL_F_A
|
|
LOAD FL_F_B
|
|
CMPU GE
|
|
CBRANCH ADDFL_1 ; dont't swap if f_a >= f_b
|
|
|
|
ADDFL_1_1:
|
|
;LOADC '"'
|
|
;LOADCP CONOUT
|
|
;CALL
|
|
|
|
LOAD FL_E_A
|
|
LOAD FL_E_B
|
|
STORE FL_E_A
|
|
STORE FL_E_B
|
|
LOAD FL_F_A
|
|
LOAD FL_F_B
|
|
STORE FL_F_A
|
|
STORE FL_F_B
|
|
LOAD FL_S_A
|
|
LOAD FL_S_B
|
|
STORE FL_S_A
|
|
STORE FL_S_B
|
|
ADDFL_1:
|
|
; e_r := e_a, result exp is exp from a
|
|
LOAD FL_E_A
|
|
STORE FL_E_R
|
|
|
|
LOAD FL_S_A ; take result sign from a (which might be swapped with b here)
|
|
STORE FL_S_R
|
|
|
|
LOAD FL_S_A ; check if the signs of a and b differ
|
|
LOAD FL_S_B
|
|
XOR ; if the xor of both sign flags is 1, signs are not the same
|
|
STORE ADDFL_SUB ; then we do a subtraction later
|
|
|
|
; ? if (e_a - e_b) > precision +1:
|
|
; ? set f_r to f_a
|
|
|
|
; shift right f_b by (e_a - e_b) bits
|
|
LOAD FL_E_A
|
|
LOAD FL_E_B
|
|
SUB
|
|
|
|
;LOADCP NEWLINE
|
|
;CALL
|
|
;LOADC 'e'
|
|
;LOADCP CONOUT
|
|
;CALL
|
|
;DUP
|
|
;LOADCP PRINTDEC
|
|
;CALL
|
|
;LOADCP NEWLINE
|
|
;CALL
|
|
|
|
LOAD FL_F_B ; on stack now: [ counter, f_b ]
|
|
ADDFL_L1:
|
|
OVER ; check counter for zero
|
|
CBRANCH.Z ADDFL_2 ; if yes, loop is done
|
|
SHR ; shift right
|
|
SWAP ; swap counter and value
|
|
DEC 1 ; decrement counter
|
|
SWAP ; swap back
|
|
BRANCH ADDFL_L1 ; next
|
|
ADDFL_2:
|
|
STORE FL_F_B ; store result
|
|
DROP ; remove counter
|
|
|
|
;LOADC 'a'
|
|
;LOADCP CONOUT
|
|
;CALL
|
|
;LOAD FL_F_A
|
|
;LOADCP PRINTHEXW
|
|
;CALL
|
|
;LOADCP NEWLINE
|
|
;CALL
|
|
;LOADC 'b'
|
|
;LOADCP CONOUT
|
|
;CALL
|
|
;LOAD FL_F_B
|
|
;LOADCP PRINTHEXW
|
|
;CALL
|
|
;LOADCP NEWLINE
|
|
;CALL
|
|
|
|
; set f_r to f_a + f_b
|
|
; (or f_r = f_a - f_b if we have a negative sign somewhere)
|
|
LOAD FL_F_A
|
|
LOAD FL_F_B
|
|
LOAD ADDFL_SUB ; do we need a subtract?
|
|
CBRANCH ADDFL_2_1 ; yes, skip add instruction
|
|
ADD ; no, do add and skip sub instruction
|
|
BRANCH ADDFL_2_2
|
|
ADDFL_2_1:
|
|
;LOADC '-'
|
|
;LOADCP CONOUT
|
|
;CALL
|
|
;LOADCP NEWLINE
|
|
;CALL
|
|
|
|
SUB
|
|
ADDFL_2_2:
|
|
;DUP
|
|
;LOADCP PRINTHEXW
|
|
;CALL
|
|
;LOADCP NEWLINE
|
|
;CALL
|
|
|
|
; normalize:
|
|
; if overflow bit of f_r is set, scale right
|
|
DUP
|
|
LOADCP FLOAT32_OVRFLOW ; isolate overflow bit
|
|
AND
|
|
|
|
CBRANCH.Z ADDFL_3
|
|
|
|
;LOADC 'V'
|
|
;LOADCP CONOUT
|
|
;CALL
|
|
|
|
; scale right: shift right f_r by one, increment e_r by one
|
|
SHR
|
|
LOAD FL_E_R
|
|
INC 1
|
|
STORE FL_E_R
|
|
ADDFL_3:
|
|
; check if result is zero
|
|
DUP
|
|
CBRANCH.NZ ADDFL_3_1
|
|
; set exponent to something useful
|
|
LOADC 0
|
|
STORE FL_E_R
|
|
; clear sign, we don't want -0
|
|
LOADC 0
|
|
STORE FL_S_R
|
|
|
|
BRANCH ADDFL_4
|
|
|
|
ADDFL_3_1:
|
|
; if bit 30 of f_r is zero, scale left
|
|
DUP
|
|
LOADCP FLOAT32_FRCMSBM ; isolate msb of fraction
|
|
AND
|
|
CBRANCH.NZ ADDFL_4
|
|
|
|
;LOADC '<'
|
|
;LOADCP CONOUT
|
|
;CALL
|
|
;LOADCP NEWLINE
|
|
;CALL
|
|
|
|
; scale left and repeat previous step
|
|
SHL ; shift fraction
|
|
LOAD FL_E_R ; decrement exponent
|
|
DEC 1
|
|
STORE FL_E_R
|
|
BRANCH ADDFL_3 ; repeat
|
|
ADDFL_4:
|
|
; round:
|
|
; simple round algorithm: if the bit below
|
|
; the least significat bit is 1, round up
|
|
; and check the overflow bit again
|
|
|
|
DUP
|
|
LOADC FLOAT32_RNDMASK ; test rounding bit
|
|
AND
|
|
CBRANCH.Z ADDFL_5 ; if not set, skip rounding
|
|
|
|
;LOADC '~'
|
|
;LOADCP CONOUT
|
|
;CALL
|
|
;LOADCP NEWLINE
|
|
;CALL
|
|
|
|
LOADC FLOAT32_RNDINCR
|
|
ADD
|
|
|
|
LOADCP FLOAT32_FRCMASK ; remove rounded off bits
|
|
AND
|
|
|
|
BRANCH ADDFL_3_1 ; check for overflow again
|
|
ADDFL_5:
|
|
|
|
STORE FL_F_R
|
|
|
|
;LOADCP NEWLINE
|
|
;CALL
|
|
;LOADC 'E'
|
|
;LOADCP CONOUT
|
|
;CALL
|
|
|
|
;LOAD FL_E_R
|
|
;LOADCP PRINTHEXW
|
|
;CALL
|
|
;LOADCP NEWLINE
|
|
;CALL
|
|
|
|
; check exponent range overflow
|
|
LOADCP FLOAT32_EXP_CHECK
|
|
CALL
|
|
ADDFL_6:
|
|
; pack e_r,f_r,s_r into r
|
|
LOADREG FP
|
|
LOADC FL_E_R
|
|
ADD
|
|
LOADCP _PACKFLOAT32
|
|
CALL
|
|
|
|
FPADJ ADDFL_FS
|
|
RET
|
|
|
|
; check exponent value of local var FL_E_R
|
|
; for overflow and issue runtime error on failure.
|
|
; uses stack frame of caller.
|
|
FLOAT32_EXP_CHECK:
|
|
; check exponent range overflow
|
|
LOAD FL_E_R
|
|
LOADC FLOAT32_MAX_EXP
|
|
CMPU LE
|
|
CBRANCH FLOAT32_EXP_XT
|
|
|
|
LOADCP FLOAT32_ERR_OVR
|
|
LOADCP _RUNTIME_ERR
|
|
JUMP
|
|
FLOAT32_EXP_XT:
|
|
RET
|
|
|
|
; subtract floating point value b from a
|
|
; parameters: [ a, b ]
|
|
; returns: a - b
|
|
_SUBFLOAT32:
|
|
; just flip sign bit of b and continue with _ADDFLOAT32
|
|
LOADCP $80000000
|
|
XOR
|
|
BRANCH _ADDFLOAT32
|
|
|
|
; multiply two floating point values a and b
|
|
; parameters: [ a, b ]
|
|
; returns: a * b
|
|
.EQU MULFL_FS 36
|
|
_MULFLOAT32:
|
|
FPADJ -MULFL_FS ; same stack frame layout as _ADDFLOAT32
|
|
|
|
DUP
|
|
CBRANCH.NZ _MULFLOAT32_1 ; check if b is zero
|
|
; if yes, just return zero
|
|
NIP ; remove a, take b (which is zero) as return value
|
|
BRANCH _MULFLOAT32_X
|
|
_MULFLOAT32_1:
|
|
; unpack b into e_b and f_b
|
|
LOADREG FP
|
|
LOADC FL_E_B
|
|
ADD ; addr of FL_E_B
|
|
LOADCP _UNPACKFLOAT32
|
|
CALL
|
|
|
|
DUP
|
|
CBRANCH.NZ _MULFLOAT32_2 ; check if a is zero
|
|
; if yes, return zero
|
|
; take a (which is zero) as return value
|
|
BRANCH _MULFLOAT32_X
|
|
_MULFLOAT32_2:
|
|
; unpack a into e_a and f_a
|
|
LOADREG FP ; addr of FL_E_A
|
|
LOADCP _UNPACKFLOAT32
|
|
CALL
|
|
|
|
; s_r = s_a xor s_b
|
|
LOAD FL_S_A
|
|
LOAD FL_S_B
|
|
XOR
|
|
STORE FL_S_R
|
|
|
|
; e_r = e_a + e_b - bias
|
|
LOAD FL_E_A
|
|
LOAD FL_E_B
|
|
ADD
|
|
LOADC FLOAT32_BIAS
|
|
SUB
|
|
STORE FL_E_R
|
|
|
|
; check exponent range overflow
|
|
LOADCP FLOAT32_EXP_CHECK
|
|
CALL
|
|
; f_r = fractmult(f_a, f_b)
|
|
LOADCP _FRACTMULT
|
|
CALL
|
|
|
|
LOADCP NORMROUND_FL ; normalize and round
|
|
CALL
|
|
|
|
; pack result
|
|
LOADREG FP
|
|
LOADC FL_E_R
|
|
ADD
|
|
LOADCP _PACKFLOAT32
|
|
CALL
|
|
_MULFLOAT32_X:
|
|
; 32 bit float result is now on stack
|
|
FPADJ MULFL_FS
|
|
RET
|
|
|
|
; multiply fractions.
|
|
; uses stack frame/local variables from caller.
|
|
; FL_F_B is destroyed.
|
|
; result is written to FL_R
|
|
_FRACTMULT:
|
|
LOADC FLOAT32_FRCBITS ; this is the counter which we keep on stack
|
|
|
|
LOADC 0 ; start with result zero
|
|
STORE FL_F_R
|
|
|
|
; for each digit of the fraction of b, from right to left,
|
|
; the fraction of a is added to the result, if that digit is 1.
|
|
; on each iteration, the result is shifted right.
|
|
_FRACTMULT_1:
|
|
LOAD FL_F_R
|
|
SHR
|
|
STORE FL_F_R ; shift result frac for next iteration
|
|
|
|
LOAD FL_F_B
|
|
LOADC FLOAT32_FRCLSB
|
|
AND ; isolate lowest bit
|
|
|
|
CBRANCH.Z _FRACTMULT_2 ; if not set, don't add in this iteration
|
|
|
|
LOAD FL_F_R ; otherwise, add fraction of a to result
|
|
LOAD FL_F_A
|
|
ADD
|
|
STORE FL_F_R
|
|
|
|
_FRACTMULT_2:
|
|
LOAD FL_F_B
|
|
SHR
|
|
STORE FL_F_B ; shift fraction b for next digit
|
|
|
|
DEC 1 ; decrease counter
|
|
DUP
|
|
CBRANCH _FRACTMULT_1 ; if not zero, next loop iteration
|
|
|
|
DROP ; drop counter
|
|
RET
|
|
|
|
; divide floating point value a by b
|
|
; parameters: [ a, b ]
|
|
; returns: a / b
|
|
.EQU DIVFL_FS 36
|
|
_DIVFLOAT32:
|
|
FPADJ -DIVFL_FS ; same stack frame layout as _ADDFLOAT32
|
|
DUP
|
|
CBRANCH.NZ _DIVFLOAT32_0 ; check b for zero
|
|
LOADCP FLOAT32_ERR_DIVZ ; if it is, we have a division by zero
|
|
LOADCP _RUNTIME_ERR ; and we issue a runtime error
|
|
JUMP
|
|
_DIVFLOAT32_0:
|
|
OVER ; check a for zero
|
|
CBRANCH.NZ _DIVFLOAT32_0_1
|
|
; if it is we just return zero
|
|
DROP ; remove args
|
|
DROP
|
|
LOADC 0
|
|
BRANCH _DIVFLOAT32_XT
|
|
|
|
_DIVFLOAT32_0_1:
|
|
; unpack b into e_b and f_b
|
|
LOADREG FP
|
|
LOADC FL_E_B
|
|
ADD ; addr of FL_E_B
|
|
LOADCP _UNPACKFLOAT32
|
|
CALL
|
|
; unpack a into e_a and f_a
|
|
LOADREG FP ; addr of FL_E_A
|
|
LOADCP _UNPACKFLOAT32
|
|
CALL
|
|
|
|
; s_r = s_a xor s_b
|
|
LOAD FL_S_A
|
|
LOAD FL_S_B
|
|
XOR
|
|
STORE FL_S_R
|
|
|
|
; e_r = e_a - e_b + bias
|
|
LOAD FL_E_A
|
|
LOAD FL_E_B
|
|
SUB
|
|
LOADC FLOAT32_BIAS
|
|
ADD
|
|
STORE FL_E_R
|
|
|
|
; check exponent range overflow
|
|
LOADCP FLOAT32_EXP_CHECK
|
|
CALL
|
|
; f_r = fractdiv(f_a, f_b)
|
|
LOADCP _FRACTDIV
|
|
CALL
|
|
|
|
LOADCP NORMROUND_FL ; normalize and round
|
|
CALL
|
|
|
|
; pack result
|
|
LOADREG FP
|
|
LOADC FL_E_R
|
|
ADD
|
|
LOADCP _PACKFLOAT32
|
|
CALL
|
|
_DIVFLOAT32_XT:
|
|
; 32 bit float result is now on stack
|
|
FPADJ DIVFL_FS
|
|
RET
|
|
|
|
; divide fraction of a by b.
|
|
; uses stackframe of caller.
|
|
; FL_F_A is destroyed.
|
|
; places result into FL_F_R.
|
|
_FRACTDIV:
|
|
LOADC 0
|
|
STORE FL_F_R
|
|
|
|
LOADC FLOAT32_FRCBITS ; load counter
|
|
_FRACTDIV_1:
|
|
LOAD FL_F_R ; load result for later
|
|
LOAD FL_F_B
|
|
LOAD FL_F_A
|
|
CMPU GT
|
|
CBRANCH _FRACTDIV_2 ; if b > a, skip next section
|
|
INC 1 ; a fits into b, so add 1 to result
|
|
LOAD FL_F_A ; subtract divisor from dividend
|
|
LOAD FL_F_B
|
|
SUB
|
|
STORE FL_F_A
|
|
|
|
_FRACTDIV_2:
|
|
SHL ; shift result left for next digit
|
|
STORE FL_F_R ; and store it again
|
|
LOAD FL_F_A ; shift the dividend left for next digit
|
|
SHL
|
|
STORE FL_F_A
|
|
|
|
DEC 1 ; decrement counter
|
|
DUP
|
|
CBRANCH.NZ _FRACTDIV_1 ; loop if it is not zero
|
|
|
|
DROP ; remove counter
|
|
|
|
LOAD FL_F_R ; undo the last shift of the result
|
|
SHR
|
|
BROT ; and shift left again to make room for the exponent
|
|
STORE FL_F_R
|
|
RET
|
|
|
|
|
|
; normalize and round f_r.
|
|
; uses stack frame of caller.
|
|
NORMROUND_FL:
|
|
LOAD FL_F_R
|
|
NORMFL_0:
|
|
; if overflow bit of f_r is set, scale right
|
|
DUP
|
|
LOADCP FLOAT32_OVRFLOW ; isolate overflow bit
|
|
AND
|
|
CBRANCH.Z NORMFL_1 ; if bit is zero, skip the scaling
|
|
|
|
; scale right: shift right f_r by one, increment e_r by one
|
|
SHR
|
|
LOAD FL_E_R
|
|
INC 1
|
|
STORE FL_E_R
|
|
|
|
NORMFL_1:
|
|
; check if result is zero
|
|
DUP
|
|
CBRANCH.NZ NORMFL_1_1
|
|
; set exponent to something useful
|
|
LOADC 0
|
|
STORE FL_E_R
|
|
; clear sign flag, we don't want -0
|
|
LOADC 0
|
|
STORE FL_S_R
|
|
|
|
BRANCH NORMFL_2
|
|
|
|
; normalize:
|
|
; if bit 30 of f_r is zero, scale left
|
|
NORMFL_1_1:
|
|
DUP
|
|
LOADCP FLOAT32_FRCMSBM ; isolate msb of fraction
|
|
AND
|
|
CBRANCH.NZ NORMFL_2 ; skip if bit is set
|
|
; scale left and repeat previous step
|
|
SHL ; shift fraction
|
|
LOAD FL_E_R ; decrement exponent
|
|
DEC 1
|
|
STORE FL_E_R
|
|
BRANCH NORMFL_1_1 ; repeat
|
|
|
|
NORMFL_2:
|
|
; round:
|
|
; simple round algorithm: if the bit below
|
|
; the least significat bit is 1, round up
|
|
; and check the overflow bit again
|
|
DUP
|
|
LOADC FLOAT32_RNDMASK ; test rounding bit
|
|
AND
|
|
CBRANCH.Z NORMFL_3 ; if not set, skip rounding
|
|
|
|
LOADC FLOAT32_RNDINCR
|
|
ADD
|
|
|
|
LOADCP FLOAT32_FRCMASK ; remove rounded off bits
|
|
AND
|
|
|
|
BRANCH NORMFL_0 ; check for overflow again
|
|
NORMFL_3:
|
|
STORE FL_F_R
|
|
RET
|
|
|
|
; return the fractional part of a floating point number
|
|
; parameters: float32 value
|
|
; returns: float32 value
|
|
.EQU FRACTFL_EXP 12
|
|
.EQU FRACTFL_FS 20
|
|
_FRACTFLOAT32:
|
|
FPADJ -FRACTFL_FS
|
|
DUP ; check for zero
|
|
CBRANCH.Z FRACTFL_XT ; if yes, just return zero
|
|
LOADREG FP
|
|
LOADCP _UNPACKFLOAT32
|
|
CALL
|
|
|
|
LOAD FL_E_A ; remove bias from exponent
|
|
LOADC FLOAT32_BIAS
|
|
SUB
|
|
DUP
|
|
STORE FRACTFL_EXP
|
|
|
|
LOADC 0 ; if the exponent is negative,
|
|
CMP LT ; there are no digits before the point
|
|
CBRANCH FRACTFL_PK ; and we just return the same value
|
|
|
|
LOAD FRACTFL_EXP ; the exponent indicates how far
|
|
; we need to shift the fraction to the left
|
|
INC 1 ; at exp 0 we need to shift by 1
|
|
|
|
LOADC FLOAT32_BIAS - 1
|
|
STORE FL_E_A ; the new exponent is -1 because we
|
|
; shifted by (old exp + 1) bits
|
|
|
|
LOAD FL_F_A ; load fraction, shift count is already on stack
|
|
FRACTFL_L:
|
|
SHL ; shift fraction
|
|
SWAP ; swap fraction and shift count
|
|
DEC 1 ; decrement count
|
|
SWAP ; swap back shift count and fraction
|
|
OVER ; check count for zero
|
|
CBRANCH FRACTFL_L ; if not, loop
|
|
|
|
NIP ; remove count
|
|
LOADCP FLOAT32_FRCMASK
|
|
AND ; remove superfluous bits
|
|
|
|
DUP ; check if fraction is zero
|
|
CBRANCH.NZ FRACTFL_1
|
|
; if yes, directly return
|
|
BRANCH FRACTFL_XT
|
|
|
|
; normalize
|
|
FRACTFL_1:
|
|
DUP
|
|
LOADCP FLOAT32_FRCMSBM ; isolate msb of fraction
|
|
AND
|
|
CBRANCH.NZ FRACTFL_2 ; skip if bit is set
|
|
; scale left and repeat previous step
|
|
SHL ; shift fraction
|
|
LOAD FL_E_A ; decrement exponent
|
|
DEC 1
|
|
STORE FL_E_A
|
|
BRANCH FRACTFL_1 ; repeat
|
|
FRACTFL_2:
|
|
STORE FL_F_A
|
|
FRACTFL_PK:
|
|
LOADREG FP
|
|
LOADCP _PACKFLOAT32
|
|
CALL
|
|
FRACTFL_XT:
|
|
FPADJ FRACTFL_FS
|
|
RET
|
|
|
|
; truncate a floating point number - return
|
|
; the integer value before the decimal point.
|
|
; issues a runtime error if the integer value
|
|
; does not fit into a signed 32-bit integer.
|
|
; parameters: float32 value
|
|
; returns: integer
|
|
.EQU TRUNCFL_SHIFT 12
|
|
.EQU TRUNCFL_FS 16
|
|
_TRUNCFLOAT32:
|
|
FPADJ -TRUNCFL_FS
|
|
LOADREG FP
|
|
LOADCP _UNPACKFLOAT32
|
|
CALL
|
|
|
|
LOAD FL_E_A ; remove bias from exponent
|
|
LOADC FLOAT32_BIAS
|
|
SUB
|
|
DUP
|
|
STORE FL_E_A
|
|
|
|
; if the exponent
|
|
LOADC 0 ; is negative,
|
|
CMP LT ; our result is zero
|
|
CBRANCH TRUNCFL_ZERO
|
|
|
|
; shift fraction all the way to the right
|
|
LOAD FL_F_A ; shift fraction right by 8 bits
|
|
BROT ; by rotating 3*8 bits left
|
|
BROT ; we know that the original bits 7-0
|
|
BROT ; are zero
|
|
|
|
; calculate number of bits to shift:
|
|
; fraction bits - 1 - exponent
|
|
LOADC FLOAT32_FRCBITS - 1
|
|
LOAD FL_E_A
|
|
SUB
|
|
DUP
|
|
STORE TRUNCFL_SHIFT
|
|
LOADC 0 ; is shift count >= 0?
|
|
CMP GE
|
|
CBRANCH TRUNCFL_2
|
|
|
|
; shift count is negative, need to shift left
|
|
; check for integer overflow first
|
|
LOAD TRUNCFL_SHIFT
|
|
NOT ; negate shift count
|
|
INC 1
|
|
|
|
DUP ; shift count >= bits in a word - bits in the fraction?
|
|
LOADC 32 - FLOAT32_FRCBITS
|
|
CMP GE
|
|
CBRANCH TRUNCFL_ERR ; then we have an overflow
|
|
|
|
; otherwise, shift n bits left
|
|
TRUNCFL_1:
|
|
DUP
|
|
CBRANCH.Z TRUNCFL_3 ; if zero, we are done
|
|
|
|
DEC 1 ; decrement shift count
|
|
SWAP ; swap shift count and fraction
|
|
SHL ; shift fraction left
|
|
SWAP ; swap back
|
|
BRANCH TRUNCFL_1 ; and loop
|
|
|
|
TRUNCFL_2: ; shift right, fraction is on ToS
|
|
LOAD TRUNCFL_SHIFT ; bits to shift
|
|
TRUNCFL_L1:
|
|
DUP
|
|
CBRANCH.Z TRUNCFL_3 ; if zero, we are done
|
|
|
|
DEC 1 ; decrement shift count
|
|
SWAP ; swap shift count and fraction
|
|
SHR ; shift fraction right
|
|
SWAP ; swap back
|
|
BRANCH TRUNCFL_L1 ; and loop
|
|
TRUNCFL_3:
|
|
DROP ; drop shift count
|
|
BRANCH TRUNCFL_EXIT
|
|
TRUNCFL_ZERO:
|
|
LOADC 0
|
|
TRUNCFL_EXIT:
|
|
LOAD FL_S_A ; check sign
|
|
CBRANCH.Z TRUNCFL_EXIT1
|
|
; if not zero, negate result
|
|
NOT
|
|
INC 1
|
|
TRUNCFL_EXIT1:
|
|
FPADJ TRUNCFL_FS
|
|
RET
|
|
|
|
TRUNCFL_ERR:
|
|
DROP
|
|
LOADCP FLOAT32_ERR_TRUNC
|
|
LOADCP _RUNTIME_ERR
|
|
JUMP
|
|
|
|
; like _TRUNCFLOAT32, but return a float32 value
|
|
; parameters [ float value ]
|
|
; returns: float value
|
|
; TODO: write a real routine which might be faster
|
|
; than just calling _TRUNCFLOAT32 and INTTOFLOAT32.
|
|
; Also, _TRUNCFLOAT32 can cause an integer overflow
|
|
; where INT should not.
|
|
_INTFLOAT32:
|
|
LOADCP _TRUNCFLOAT32
|
|
CALL
|
|
LOADCP _INTTOFLOAT32
|
|
CALL
|
|
RET
|
|
|
|
.CPOOL
|
|
|
|
; multiply/divide a float value by a multitude of 2
|
|
; shift count < 1: shift right (divide)
|
|
; shuft count > 0: shift left (multiply)
|
|
; parameters: [ float value, shift count ]
|
|
; returns: float value
|
|
.EQU SHF32_FS 36
|
|
.EQU SHF32_COUNT 12
|
|
SHIFTFLOAT32:
|
|
FPADJ -SHF32_FS
|
|
|
|
; store shift count parameter
|
|
STORE SHF32_COUNT
|
|
; Unpack float value.
|
|
; We use the result variable slot
|
|
; because EXP_CHECK uses that.
|
|
LOADREG FP
|
|
LOADC FL_E_R
|
|
ADD
|
|
LOADCP _UNPACKFLOAT32
|
|
CALL
|
|
|
|
; we just add the shift count to
|
|
; the exponent
|
|
LOAD FL_E_R
|
|
LOAD SHF32_COUNT
|
|
ADD
|
|
STORE FL_E_R
|
|
|
|
; and check for overflow
|
|
LOADCP FLOAT32_EXP_CHECK
|
|
CALL
|
|
|
|
; pack up the result
|
|
LOADREG FP
|
|
LOADC FL_E_R
|
|
ADD
|
|
LOADCP _PACKFLOAT32
|
|
CALL
|
|
|
|
FPADJ SHF32_FS
|
|
RET
|
|
|
|
; convert an integer to a float32
|
|
; parameters: an integer
|
|
; returns: a float32 value
|
|
.EQU INTF32_FS 12
|
|
_INTTOFLOAT32:
|
|
FPADJ -INTF32_FS
|
|
|
|
; is intval zero?
|
|
DUP
|
|
CBRANCH.Z INTTOFLOAT32_XT ; if yes, we are done, float zero has the same
|
|
; representation as integer zero
|
|
|
|
; is intval < 0?
|
|
LOADC 0
|
|
CMP.S0 LT
|
|
DUP
|
|
STORE FL_S_A ; store cmp result as sign flag
|
|
CBRANCH.Z INTF32_1 ; skip negate if cmp result is 0
|
|
; negate
|
|
NOT
|
|
INC 1
|
|
INTF32_1:
|
|
LOADC 30 ; set counter
|
|
INTF32_L:
|
|
; loop if bit 31 is not set:
|
|
SWAP ; swap counter and value (value is now ToS)
|
|
|
|
LOADCP $40000000
|
|
AND.S0
|
|
CBRANCH INTF32_2
|
|
; shift left
|
|
SHL
|
|
; swap counter and value again (counter is now ToS)
|
|
SWAP
|
|
; decrease counter
|
|
DEC 1
|
|
BRANCH INTF32_L
|
|
INTF32_2:
|
|
; store shifted value as fraction
|
|
STORE FL_F_A
|
|
; set exponent to counter + bias
|
|
LOADC FLOAT32_BIAS
|
|
ADD
|
|
STORE FL_E_A
|
|
; pack components
|
|
LOADREG FP
|
|
LOADCP _PACKFLOAT32
|
|
CALL
|
|
BRANCH INTTOFLOAT32_XT
|
|
INTTOFLOAT32_XT:
|
|
FPADJ INTF32_FS
|
|
RET
|
|
|
|
; get the binary exponent of a floating point value
|
|
; (without the bias)
|
|
; parameters: [ float value ]
|
|
; returns: exponent (-128..128)
|
|
_GETFLOAT32EXP:
|
|
LOADC FLOAT32_EXPMASK
|
|
AND
|
|
LOADC FLOAT32_BIAS
|
|
SUB
|
|
RET
|
|
|
|
; compare an integer to a floating point value
|
|
; parameters: a:integer, b: float
|
|
; calls INTTOFLOAT32 on a, then calls CMPFLOAT32
|
|
; returns: -1 if a < b
|
|
; 0 if a = b
|
|
_CMPINTFLOAT32:
|
|
SWAP ; swap a and b, a is now on top of stack
|
|
LOADCP _INTTOFLOAT32
|
|
CALL ; a is now a float
|
|
SWAP ; swap a and b back, b is on top of stack again
|
|
; fallthrough to CMPFLOAT32
|
|
|
|
; compare two floating point values
|
|
; parameters: two floating point values a and b
|
|
; returns: -1 if a < b
|
|
; 0 if a = b
|
|
; 1 if a > b
|
|
.EQU CMPFL_FS 24
|
|
_CMPFLOAT32:
|
|
FPADJ -CMPFL_FS
|
|
; unpack b
|
|
LOADREG FP
|
|
LOADC FL_E_B
|
|
ADD
|
|
LOADCP _UNPACKFLOAT32
|
|
CALL
|
|
;unpack a
|
|
LOADREG FP
|
|
LOADCP _UNPACKFLOAT32
|
|
CALL
|
|
|
|
;if sign(a) and not sign(b):
|
|
; return -1
|
|
;if not sign(a) and sign(b):
|
|
; return 1
|
|
LOAD FL_S_A
|
|
LOAD FL_S_B
|
|
XOR
|
|
CBRANCH.Z CMPFL_2 ; jump if signs are the same
|
|
; signs are not the same
|
|
LOAD FL_S_A
|
|
CBRANCH.Z CMPFL_1 ; jump if a is positive
|
|
; a is negative, b must be positive, so a < b
|
|
CMPFL_M1:
|
|
LOADC -1
|
|
BRANCH CMPFL_XT
|
|
CMPFL_1:
|
|
; a is positive, b must be negative, so a > b
|
|
LOADC 1
|
|
BRANCH CMPFL_XT
|
|
CMPFL_2:
|
|
; signs are the same, compare exponents now.
|
|
; we can compare the exponent values directly without
|
|
; subtracting the bias because negative exponents
|
|
; are smaller than positive exponents due to the added
|
|
; bias.
|
|
; if we determine a result, from now on the result
|
|
; must be inverted if the sign of a and b is negative.
|
|
|
|
;if exp(a) < exp(b):
|
|
; return -1 * sign(a)
|
|
LOAD FL_E_A
|
|
LOAD FL_E_B
|
|
CMPU GE
|
|
CBRANCH CMPFL_3 ; jump if exp(a) >= exp(b)
|
|
; exp(a) < exp(b), so abs(a) < abs(b)
|
|
LOADC -1
|
|
CMPFL_2_1:
|
|
; invert result if signs of a and b are negative
|
|
LOAD FL_S_A
|
|
CBRANCH.Z CMPFL_XT ; sign is clear, no need to invert
|
|
|
|
NOT ; negate result and exit
|
|
INC 1
|
|
BRANCH CMPFL_XT
|
|
CMPFL_3:
|
|
; exp(a) >= exp(b)
|
|
; check if exp(a) > exp(b)
|
|
LOAD FL_E_A
|
|
LOAD FL_E_B
|
|
CMPU GT
|
|
CBRANCH CMPFL_5 ; jump if yes
|
|
; exp(a) = exp(b)
|
|
; exponents are the same, compare fractions now
|
|
|
|
;if frac(a) < frac(b):
|
|
; return -1 * sign(a)
|
|
LOAD FL_F_A
|
|
LOAD FL_F_B
|
|
CMPU GE
|
|
CBRANCH CMPFL_4 ; jump if frac(a) >= frac(b)
|
|
|
|
; frac(a) < frac(b)
|
|
LOADC -1
|
|
BRANCH CMPFL_2_1 ; check for inversion of result
|
|
;if frac(a) > frac(b):
|
|
; return 1 * neg_result
|
|
CMPFL_4:
|
|
; frac(a) >= frac(b)
|
|
; check if frac(a) > frac(b)
|
|
LOAD FL_F_A
|
|
LOAD FL_F_B
|
|
CMPU GT
|
|
CBRANCH CMPFL_5 ; jump if frac(a) > frac(b)
|
|
; frac(a) = frac(b)
|
|
LOADC 0
|
|
BRANCH CMPFL_XT
|
|
CMPFL_5:
|
|
; exp(a) > exp(b):
|
|
; return 1 * sign(a)
|
|
LOADC 1
|
|
BRANCH CMPFL_2_1 ; check for inversion of result
|
|
CMPFL_XT:
|
|
FPADJ CMPFL_FS
|
|
RET
|
|
|
|
; Negate a float32 number.
|
|
; just flip the highest bit, no
|
|
; need to unpack.
|
|
; if it is zero, do nothing, we
|
|
; do not want a -0.0 value
|
|
_NEGFLOAT32:
|
|
DUP
|
|
CBRANCH.Z _NEGFL_XT
|
|
LOADCP $80000000
|
|
XOR
|
|
_NEGFL_XT:
|
|
RET
|
|
|
|
.CPOOL
|
|
|
|
FLOAT32_ERR_OVR:
|
|
.BYTE "floating point overflow",0
|
|
FLOAT32_ERR_TRUNC:
|
|
.BYTE "integer overflow",0
|
|
FLOAT32_ERR_DIVZ:
|
|
.BYTE "float division by zero",0
|