1545 lines
24 KiB
NASM
1545 lines
24 KiB
NASM
; ============================================================
|
|
; Z80 Soft Float Library (4-byte) + Print + Parse (vasm syntax)
|
|
; ============================================================
|
|
; Float format in memory (big-endian, 4 bytes):
|
|
; byte0: EXP (8-bit biased exponent, 0 = zero)
|
|
; byte1: S|F22..F16 (bit7 = sign, bits6..0 = top 7 fraction bits)
|
|
; byte2: F15..F8
|
|
; byte3: F7..F0
|
|
;
|
|
; For EXP != 0:
|
|
; value = (-1)^S * (1.F) * 2^(EXP - FP_BIAS)
|
|
; FP_BIAS = 127
|
|
;
|
|
; Calling convention (in-place ops):
|
|
; HL -> A (4 bytes)
|
|
; DE -> B (4 bytes)
|
|
; fp_add: A = A + B (stored back at HL)
|
|
; fp_sub: A = A - B
|
|
; fp_mul: A = A * B
|
|
; fp_div: A = A / B
|
|
;
|
|
; Extra:
|
|
; fp_print: print float at (HL) using external printChar (A=ASCII)
|
|
; fp_parse: parse null-terminated string at (DE) into float at (HL)
|
|
;
|
|
; Limitations:
|
|
; - No NaN/Inf/denormals
|
|
; - Truncation (no rounding)
|
|
; - fp_print prints fixed decimals with a lightweight fraction path
|
|
; - fp_parse supports optional +/- and '.' up to MAX_FRAC digits, no exponent notation
|
|
; ============================================================
|
|
|
|
.equ FP_BIAS,127
|
|
.equ FRAC_DIGITS,6
|
|
.equ MAX_FRAC,6
|
|
|
|
.global printChar
|
|
; ============================================================
|
|
; CODE
|
|
; ============================================================
|
|
.text
|
|
|
|
; ------------------------------------------------------------
|
|
; External routine you provide:
|
|
; printChar: prints ASCII character in A
|
|
; ------------------------------------------------------------
|
|
; printChar is external, not defined here.
|
|
|
|
; ============================================================
|
|
; Public API: fp_add / fp_sub / fp_mul / fp_div
|
|
; ============================================================
|
|
|
|
; ------------------------------------------------------------
|
|
; fp_add: A = A + B
|
|
; ------------------------------------------------------------
|
|
fp_add:
|
|
push hl
|
|
push de
|
|
call fp_unpackA
|
|
pop de
|
|
call fp_unpackB
|
|
pop hl
|
|
|
|
; zero short-cuts
|
|
ld a,(A_exp)
|
|
or a
|
|
jr nz,.checkB
|
|
; A==0 => result=B
|
|
call fp_pack_from_B_into_A
|
|
ret
|
|
.checkB:
|
|
ld a,(B_exp)
|
|
or a
|
|
ret z
|
|
|
|
; if signs same -> magnitude add
|
|
ld a,(A_sign)
|
|
ld b,a
|
|
ld a,(B_sign)
|
|
xor b
|
|
jr z,fp_add_same_sign
|
|
|
|
; signs differ -> magnitude subtract
|
|
jp fp_add_diff_sign
|
|
|
|
|
|
; ------------------------------------------------------------
|
|
; fp_sub: A = A - B (flip B sign in memory, add, flip back)
|
|
; ------------------------------------------------------------
|
|
fp_sub:
|
|
; Flip sign bit of B byte1 (DE+1)
|
|
push hl
|
|
push de
|
|
inc de
|
|
ld a,(de)
|
|
xor 080h
|
|
ld (de),a
|
|
pop de
|
|
pop hl
|
|
|
|
call fp_add
|
|
|
|
; Flip sign bit back
|
|
push hl
|
|
push de
|
|
inc de
|
|
ld a,(de)
|
|
xor 080h
|
|
ld (de),a
|
|
pop de
|
|
pop hl
|
|
ret
|
|
|
|
|
|
; ------------------------------------------------------------
|
|
; fp_mul: A = A * B
|
|
; ------------------------------------------------------------
|
|
fp_mul:
|
|
push hl
|
|
push de
|
|
call fp_unpackA
|
|
pop de
|
|
call fp_unpackB
|
|
pop hl
|
|
|
|
; if A==0 or B==0 => 0
|
|
ld a,(A_exp)
|
|
or a
|
|
jr z,fp_store_zero_A
|
|
ld a,(B_exp)
|
|
or a
|
|
jr z,fp_store_zero_A
|
|
|
|
; sign = A_sign XOR B_sign
|
|
ld a,(A_sign)
|
|
ld b,a
|
|
ld a,(B_sign)
|
|
xor b
|
|
ld (A_sign),a
|
|
|
|
; exponent = A_exp + B_exp - BIAS
|
|
ld a,(A_exp)
|
|
ld b,a
|
|
ld a,(B_exp)
|
|
add a,b
|
|
sub FP_BIAS
|
|
ld (A_exp),a
|
|
|
|
; product = A_mant * B_mant (24x24 => 48)
|
|
call mul24x24_schoolbook
|
|
|
|
; normalize product into A mantissa
|
|
call norm_product_to_A
|
|
|
|
; pack back into (HL)
|
|
call fp_packA
|
|
ret
|
|
|
|
|
|
; ------------------------------------------------------------
|
|
; fp_div: A = A / B
|
|
; ------------------------------------------------------------
|
|
fp_div:
|
|
push hl
|
|
push de
|
|
call fp_unpackA
|
|
pop de
|
|
call fp_unpackB
|
|
pop hl
|
|
|
|
; A==0 => 0
|
|
ld a,(A_exp)
|
|
or a
|
|
jr z,fp_store_zero_A
|
|
|
|
; B==0 => return 0 (simple “error” behavior)
|
|
ld a,(B_exp)
|
|
or a
|
|
jr z,fp_store_zero_A
|
|
|
|
; sign = A_sign XOR B_sign
|
|
ld a,(A_sign)
|
|
ld b,a
|
|
ld a,(B_sign)
|
|
xor b
|
|
ld (A_sign),a
|
|
|
|
; exponent = A_exp - B_exp + BIAS
|
|
ld a,(A_exp)
|
|
ld b,a
|
|
ld a,(B_exp)
|
|
ld c,a
|
|
ld a,b
|
|
sub c
|
|
add a,FP_BIAS
|
|
ld (A_exp),a
|
|
|
|
; mantissa division
|
|
call div_mantissas_to_A
|
|
call normalize_A_mant
|
|
|
|
call fp_packA
|
|
ret
|
|
|
|
|
|
; ============================================================
|
|
; Add/Sub core (unpacked)
|
|
; ============================================================
|
|
|
|
fp_add_same_sign:
|
|
call align_exponents_A_B
|
|
call add24_A_plus_B
|
|
|
|
; if carry: shift right, exponent++
|
|
jr nc,.noCarry
|
|
call shr24_A_1
|
|
ld a,(A_exp)
|
|
inc a
|
|
ld (A_exp),a
|
|
.noCarry:
|
|
call normalize_A_mant
|
|
call fp_packA
|
|
ret
|
|
|
|
|
|
fp_add_diff_sign:
|
|
; compare |A| vs |B|, do larger - smaller, sign = sign(larger)
|
|
call compare_mag_A_B
|
|
jr c,.A_ge_B
|
|
; |B| > |A| => swap
|
|
call swap_A_B_unpacked
|
|
.A_ge_B:
|
|
call align_exponents_A_B
|
|
call sub24_A_minus_B
|
|
call is_A_mant_zero
|
|
jr z,fp_store_zero_A
|
|
call normalize_A_mant
|
|
call fp_packA
|
|
ret
|
|
|
|
|
|
; ============================================================
|
|
; Unpack / Pack helpers
|
|
; ============================================================
|
|
|
|
; Unpack A from (HL)
|
|
fp_unpackA:
|
|
ld a,(hl)
|
|
ld (A_exp),a
|
|
or a
|
|
jr z,.zeroA
|
|
inc hl
|
|
ld a,(hl)
|
|
ld b,a
|
|
; sign bit -> A_sign (0/1)
|
|
and 080h
|
|
jr z,.sa0
|
|
ld a,1
|
|
jr .sa1
|
|
.sa0:
|
|
xor a
|
|
.sa1:
|
|
ld (A_sign),a
|
|
|
|
; mantissa bytes with hidden 1 inserted
|
|
ld a,b
|
|
and 07Fh
|
|
or 080h
|
|
ld (A_m2),a
|
|
inc hl
|
|
ld a,(hl)
|
|
ld (A_m1),a
|
|
inc hl
|
|
ld a,(hl)
|
|
ld (A_m0),a
|
|
ret
|
|
.zeroA:
|
|
xor a
|
|
ld (A_sign),a
|
|
ld (A_m2),a
|
|
ld (A_m1),a
|
|
ld (A_m0),a
|
|
ret
|
|
|
|
|
|
; Unpack B from (DE)
|
|
fp_unpackB:
|
|
ld a,(de)
|
|
ld (B_exp),a
|
|
or a
|
|
jr z,.zeroB
|
|
inc de
|
|
ld a,(de)
|
|
ld b,a
|
|
and 080h
|
|
jr z,.sb0
|
|
ld a,1
|
|
jr .sb1
|
|
.sb0:
|
|
xor a
|
|
.sb1:
|
|
ld (B_sign),a
|
|
|
|
ld a,b
|
|
and 07Fh
|
|
or 080h
|
|
ld (B_m2),a
|
|
inc de
|
|
ld a,(de)
|
|
ld (B_m1),a
|
|
inc de
|
|
ld a,(de)
|
|
ld (B_m0),a
|
|
ret
|
|
.zeroB:
|
|
xor a
|
|
ld (B_sign),a
|
|
ld (B_m2),a
|
|
ld (B_m1),a
|
|
ld (B_m0),a
|
|
ret
|
|
|
|
|
|
; Pack unpacked A back into memory at (HL)
|
|
fp_packA:
|
|
ld a,(A_exp)
|
|
or a
|
|
jr nz,.packNZ
|
|
; store 0
|
|
ld (hl),0
|
|
inc hl
|
|
ld (hl),0
|
|
inc hl
|
|
ld (hl),0
|
|
inc hl
|
|
ld (hl),0
|
|
ret
|
|
|
|
.packNZ:
|
|
ld a,(A_exp)
|
|
ld (hl),a
|
|
inc hl
|
|
|
|
; remove hidden 1
|
|
ld a,(A_m2)
|
|
and 07Fh
|
|
ld b,a
|
|
|
|
; apply sign bit7
|
|
ld a,(A_sign)
|
|
or a
|
|
jr z,.sign0
|
|
ld a,b
|
|
or 080h
|
|
jr .storeB1
|
|
.sign0:
|
|
ld a,b
|
|
.storeB1:
|
|
ld (hl),a
|
|
inc hl
|
|
ld a,(A_m1)
|
|
ld (hl),a
|
|
inc hl
|
|
ld a,(A_m0)
|
|
ld (hl),a
|
|
ret
|
|
|
|
|
|
; Pack from unpacked B into memory A (HL points to A destination)
|
|
fp_pack_from_B_into_A:
|
|
ld a,(B_exp)
|
|
ld (hl),a
|
|
inc hl
|
|
ld a,(B_m2)
|
|
and 07Fh
|
|
ld b,a
|
|
ld a,(B_sign)
|
|
or a
|
|
jr z,.bs0
|
|
ld a,b
|
|
or 080h
|
|
jr .bs1
|
|
.bs0:
|
|
ld a,b
|
|
.bs1:
|
|
ld (hl),a
|
|
inc hl
|
|
ld a,(B_m1)
|
|
ld (hl),a
|
|
inc hl
|
|
ld a,(B_m0)
|
|
ld (hl),a
|
|
ret
|
|
|
|
|
|
fp_store_zero_A:
|
|
xor a
|
|
ld (A_exp),a
|
|
ld (A_sign),a
|
|
ld (A_m2),a
|
|
ld (A_m1),a
|
|
ld (A_m0),a
|
|
jp fp_packA
|
|
|
|
|
|
; ============================================================
|
|
; Exponent alignment / compare / swap
|
|
; ============================================================
|
|
|
|
; Ensure A_exp >= B_exp; shift smaller mantissa right by diff
|
|
align_exponents_A_B:
|
|
ld a,(A_exp)
|
|
ld b,a
|
|
ld a,(B_exp)
|
|
cp b
|
|
jr z,.done
|
|
jr c,.A_bigger_exp ; B_exp < A_exp
|
|
call swap_A_B_unpacked ; make A the larger exponent
|
|
.A_bigger_exp:
|
|
ld a,(A_exp)
|
|
ld b,a
|
|
ld a,(B_exp)
|
|
ld c,a
|
|
ld a,b
|
|
sub c ; A = diff
|
|
call shr24_B_by_A
|
|
ld a,(A_exp)
|
|
ld (B_exp),a
|
|
.done:
|
|
ret
|
|
|
|
|
|
; Carry set if |A| >= |B|, else carry clear
|
|
compare_mag_A_B:
|
|
ld a,(A_exp)
|
|
ld b,a
|
|
ld a,(B_exp)
|
|
cp b
|
|
jr z,.cmpMant
|
|
jr c,.A_ge
|
|
or a
|
|
ret
|
|
.A_ge:
|
|
scf
|
|
ret
|
|
.cmpMant:
|
|
ld a,(A_m2)
|
|
ld b,a
|
|
ld a,(B_m2)
|
|
cp b
|
|
jr z,.m1
|
|
jr c,.A_ge2
|
|
or a
|
|
ret
|
|
.A_ge2:
|
|
scf
|
|
ret
|
|
.m1:
|
|
ld a,(A_m1)
|
|
ld b,a
|
|
ld a,(B_m1)
|
|
cp b
|
|
jr z,.m0
|
|
jr c,.A_ge3
|
|
or a
|
|
ret
|
|
.A_ge3:
|
|
scf
|
|
ret
|
|
.m0:
|
|
ld a,(A_m0)
|
|
ld b,a
|
|
ld a,(B_m0)
|
|
cp b
|
|
jr c,.A_ge4
|
|
scf
|
|
ret
|
|
.A_ge4:
|
|
scf
|
|
ret
|
|
|
|
|
|
swap_A_B_unpacked:
|
|
ld a,(A_exp)
|
|
ld b,a
|
|
ld a,(B_exp)
|
|
ld (A_exp),a
|
|
ld a,b
|
|
ld (B_exp),a
|
|
ld a,(A_sign)
|
|
ld b,a
|
|
ld a,(B_sign)
|
|
ld (A_sign),a
|
|
ld a,b
|
|
ld (B_sign),a
|
|
ld a,(A_m2)
|
|
ld b,a
|
|
ld a,(B_m2)
|
|
ld (A_m2),a
|
|
ld a,b
|
|
ld (B_m2),a
|
|
ld a,(A_m1)
|
|
ld b,a
|
|
ld a,(B_m1)
|
|
ld (A_m1),a
|
|
ld a,b
|
|
ld (B_m1),a
|
|
ld a,(A_m0)
|
|
ld b,a
|
|
ld a,(B_m0)
|
|
ld (A_m0),a
|
|
ld a,b
|
|
ld (B_m0),a
|
|
ret
|
|
|
|
|
|
; ============================================================
|
|
; 24-bit mantissa ops
|
|
; ============================================================
|
|
|
|
add24_A_plus_B:
|
|
ld a,(A_m0)
|
|
add a,(B_m0)
|
|
ld (A_m0),a
|
|
ld a,(A_m1)
|
|
adc a,(B_m1)
|
|
ld (A_m1),a
|
|
ld a,(A_m2)
|
|
adc a,(B_m2)
|
|
ld (A_m2),a
|
|
ret ; carry meaningful
|
|
|
|
|
|
sub24_A_minus_B:
|
|
ld a,(A_m0)
|
|
sub (B_m0)
|
|
ld (A_m0),a
|
|
ld a,(A_m1)
|
|
sbc a,(B_m1)
|
|
ld (A_m1),a
|
|
ld a,(A_m2)
|
|
sbc a,(B_m2)
|
|
ld (A_m2),a
|
|
ret
|
|
|
|
|
|
is_A_mant_zero:
|
|
ld a,(A_m2)
|
|
or (A_m1)
|
|
or (A_m0)
|
|
ret
|
|
|
|
|
|
shr24_A_1:
|
|
ld a,(A_m2)
|
|
srl a
|
|
ld (A_m2),a
|
|
ld a,(A_m1)
|
|
rr a
|
|
ld (A_m1),a
|
|
ld a,(A_m0)
|
|
rr a
|
|
ld (A_m0),a
|
|
ret
|
|
|
|
|
|
; Shift B mantissa right by A bits (A=0..255)
|
|
shr24_B_by_A:
|
|
ld (SHCNT),a
|
|
ld a,(SHCNT)
|
|
cp 24
|
|
jr c,.ok
|
|
xor a
|
|
ld (B_m2),a
|
|
ld (B_m1),a
|
|
ld (B_m0),a
|
|
ret
|
|
.ok:
|
|
ld a,(SHCNT)
|
|
or a
|
|
ret z
|
|
.loop:
|
|
ld a,(B_m2)
|
|
srl a
|
|
ld (B_m2),a
|
|
ld a,(B_m1)
|
|
rr a
|
|
ld (B_m1),a
|
|
ld a,(B_m0)
|
|
rr a
|
|
ld (B_m0),a
|
|
ld a,(SHCNT)
|
|
dec a
|
|
ld (SHCNT),a
|
|
jr nz,.loop
|
|
ret
|
|
|
|
|
|
normalize_A_mant:
|
|
call is_A_mant_zero
|
|
jr nz,.nz
|
|
xor a
|
|
ld (A_exp),a
|
|
ret
|
|
.nz:
|
|
ld a,(A_m2)
|
|
bit 7,a
|
|
ret nz
|
|
.left_loop:
|
|
ld a,(A_m0)
|
|
add a,a
|
|
ld (A_m0),a
|
|
ld a,(A_m1)
|
|
adc a,a
|
|
ld (A_m1),a
|
|
ld a,(A_m2)
|
|
adc a,a
|
|
ld (A_m2),a
|
|
ld a,(A_exp)
|
|
dec a
|
|
ld (A_exp),a
|
|
ld a,(A_m2)
|
|
bit 7,a
|
|
jr z,.left_loop
|
|
ret
|
|
|
|
|
|
; ============================================================
|
|
; 8x8 -> 16 multiply (unsigned), shift-add
|
|
; in: A = multiplicand, C = multiplier
|
|
; out: HL = 16-bit product
|
|
; ============================================================
|
|
mul8u:
|
|
ld h,0
|
|
ld l,0
|
|
ld b,8
|
|
.m8:
|
|
srl c
|
|
jr nc,.noadd
|
|
ld e,a
|
|
ld d,0
|
|
add hl,de
|
|
.noadd:
|
|
add a,a
|
|
djnz .m8
|
|
ret
|
|
|
|
|
|
; ============================================================
|
|
; 24x24 schoolbook multiply into P0..P5 (P0 LSB)
|
|
; ============================================================
|
|
mul24x24_schoolbook:
|
|
xor a
|
|
ld (P0),a
|
|
ld (P1),a
|
|
ld (P2),a
|
|
ld (P3),a
|
|
ld (P4),a
|
|
ld (P5),a
|
|
|
|
; (0,0) offset 0
|
|
ld a,(A_m0)
|
|
ld c,(B_m0)
|
|
call mul8u
|
|
call add16_to_P_at0
|
|
|
|
; (0,1) offset 1
|
|
ld a,(A_m0)
|
|
ld c,(B_m1)
|
|
call mul8u
|
|
call add16_to_P_at1
|
|
|
|
; (0,2) offset 2
|
|
ld a,(A_m0)
|
|
ld c,(B_m2)
|
|
call mul8u
|
|
call add16_to_P_at2
|
|
|
|
; (1,0) offset 1
|
|
ld a,(A_m1)
|
|
ld c,(B_m0)
|
|
call mul8u
|
|
call add16_to_P_at1
|
|
|
|
; (1,1) offset 2
|
|
ld a,(A_m1)
|
|
ld c,(B_m1)
|
|
call mul8u
|
|
call add16_to_P_at2
|
|
|
|
; (1,2) offset 3
|
|
ld a,(A_m1)
|
|
ld c,(B_m2)
|
|
call mul8u
|
|
call add16_to_P_at3
|
|
|
|
; (2,0) offset 2
|
|
ld a,(A_m2)
|
|
ld c,(B_m0)
|
|
call mul8u
|
|
call add16_to_P_at2
|
|
|
|
; (2,1) offset 3
|
|
ld a,(A_m2)
|
|
ld c,(B_m1)
|
|
call mul8u
|
|
call add16_to_P_at3
|
|
|
|
; (2,2) offset 4
|
|
ld a,(A_m2)
|
|
ld c,(B_m2)
|
|
call mul8u
|
|
call add16_to_P_at4
|
|
|
|
ret
|
|
|
|
|
|
add16_to_P_at0:
|
|
ld a,(P0)
|
|
add a,l
|
|
ld (P0),a
|
|
ld a,(P1)
|
|
adc a,h
|
|
ld (P1),a
|
|
ret
|
|
add16_to_P_at1:
|
|
ld a,(P1)
|
|
add a,l
|
|
ld (P1),a
|
|
ld a,(P2)
|
|
adc a,h
|
|
ld (P2),a
|
|
ret
|
|
add16_to_P_at2:
|
|
ld a,(P2)
|
|
add a,l
|
|
ld (P2),a
|
|
ld a,(P3)
|
|
adc a,h
|
|
ld (P3),a
|
|
ret
|
|
add16_to_P_at3:
|
|
ld a,(P3)
|
|
add a,l
|
|
ld (P3),a
|
|
ld a,(P4)
|
|
adc a,h
|
|
ld (P4),a
|
|
ret
|
|
add16_to_P_at4:
|
|
ld a,(P4)
|
|
add a,l
|
|
ld (P4),a
|
|
ld a,(P5)
|
|
adc a,h
|
|
ld (P5),a
|
|
ret
|
|
|
|
|
|
; ============================================================
|
|
; Normalize product P into A mantissa
|
|
; P is 48-bit, P0 LSB .. P5 MSB
|
|
; ============================================================
|
|
norm_product_to_A:
|
|
ld a,(P5)
|
|
bit 7,a
|
|
jr z,.shift23
|
|
ld a,24
|
|
call shr48_P_by_A
|
|
ld a,(A_exp)
|
|
inc a
|
|
ld (A_exp),a
|
|
jr .take
|
|
.shift23:
|
|
ld a,23
|
|
call shr48_P_by_A
|
|
.take:
|
|
ld a,(P2)
|
|
ld (A_m2),a
|
|
ld a,(P1)
|
|
ld (A_m1),a
|
|
ld a,(P0)
|
|
ld (A_m0),a
|
|
ret
|
|
|
|
|
|
shr48_P_by_A:
|
|
ld (SHCNT),a
|
|
ld a,(SHCNT)
|
|
or a
|
|
ret z
|
|
.loop:
|
|
ld a,(P5)
|
|
srl a
|
|
ld (P5),a
|
|
ld a,(P4)
|
|
rr a
|
|
ld (P4),a
|
|
ld a,(P3)
|
|
rr a
|
|
ld (P3),a
|
|
ld a,(P2)
|
|
rr a
|
|
ld (P2),a
|
|
ld a,(P1)
|
|
rr a
|
|
ld (P1),a
|
|
ld a,(P0)
|
|
rr a
|
|
ld (P0),a
|
|
ld a,(SHCNT)
|
|
dec a
|
|
ld (SHCNT),a
|
|
jr nz,.loop
|
|
ret
|
|
|
|
|
|
; ============================================================
|
|
; Mantissa division (restoring-style)
|
|
; A_m = (A_m << 23) / B_m
|
|
; ============================================================
|
|
div_mantissas_to_A:
|
|
; P = A_m as 48-bit, then shift left 23
|
|
xor a
|
|
ld (P3),a
|
|
ld (P4),a
|
|
ld (P5),a
|
|
ld a,(A_m0)
|
|
ld (P0),a
|
|
ld a,(A_m1)
|
|
ld (P1),a
|
|
ld a,(A_m2)
|
|
ld (P2),a
|
|
|
|
ld a,23
|
|
call shl48_P_by_A
|
|
|
|
; clear quotient
|
|
xor a
|
|
ld (A_m2),a
|
|
ld (A_m1),a
|
|
ld (A_m0),a
|
|
|
|
ld b,24
|
|
.div_loop:
|
|
call shl24_A_1
|
|
call shl48_P_1
|
|
|
|
; subtract divisor from high 24 bits of P (P5..P3)
|
|
call sub24_Phigh_minus_B
|
|
jr c,.restore
|
|
; success => set quotient LSB = 1
|
|
ld a,(A_m0)
|
|
or 001h
|
|
ld (A_m0),a
|
|
jr .next
|
|
.restore:
|
|
call add24_Phigh_plus_B
|
|
.next:
|
|
djnz .div_loop
|
|
ret
|
|
|
|
|
|
shl48_P_by_A:
|
|
ld (SHCNT),a
|
|
ld a,(SHCNT)
|
|
or a
|
|
ret z
|
|
.loop:
|
|
call shl48_P_1
|
|
ld a,(SHCNT)
|
|
dec a
|
|
ld (SHCNT),a
|
|
jr nz,.loop
|
|
ret
|
|
|
|
|
|
shl48_P_1:
|
|
ld a,(P0)
|
|
add a,a
|
|
ld (P0),a
|
|
ld a,(P1)
|
|
adc a,a
|
|
ld (P1),a
|
|
ld a,(P2)
|
|
adc a,a
|
|
ld (P2),a
|
|
ld a,(P3)
|
|
adc a,a
|
|
ld (P3),a
|
|
ld a,(P4)
|
|
adc a,a
|
|
ld (P4),a
|
|
ld a,(P5)
|
|
adc a,a
|
|
ld (P5),a
|
|
ret
|
|
|
|
|
|
sub24_Phigh_minus_B:
|
|
ld a,(P3)
|
|
sub (B_m0)
|
|
ld (P3),a
|
|
ld a,(P4)
|
|
sbc a,(B_m1)
|
|
ld (P4),a
|
|
ld a,(P5)
|
|
sbc a,(B_m2)
|
|
ld (P5),a
|
|
ret ; carry set indicates borrow
|
|
|
|
|
|
add24_Phigh_plus_B:
|
|
ld a,(P3)
|
|
add a,(B_m0)
|
|
ld (P3),a
|
|
ld a,(P4)
|
|
adc a,(B_m1)
|
|
ld (P4),a
|
|
ld a,(P5)
|
|
adc a,(B_m2)
|
|
ld (P5),a
|
|
ret
|
|
|
|
|
|
; ============================================================
|
|
; fp_print: fixed format printing
|
|
; Prints: [-]I.FFFFFF (FRAC_DIGITS digits)
|
|
; Uses printChar (A=char)
|
|
; ============================================================
|
|
fp_print:
|
|
; zero?
|
|
ld a,(hl)
|
|
or a
|
|
jr nz,.nz
|
|
ld a,'0'
|
|
call printChar
|
|
ld a,'.'
|
|
call printChar
|
|
ld b,FRAC_DIGITS
|
|
.zf:
|
|
ld a,'0'
|
|
call printChar
|
|
djnz .zf
|
|
ret
|
|
|
|
.nz:
|
|
; EXP -> PR_E (unbiased)
|
|
ld a,(hl)
|
|
sub FP_BIAS
|
|
ld (PR_E),a
|
|
inc hl
|
|
|
|
; sign + top fraction
|
|
ld a,(hl)
|
|
ld b,a
|
|
and 080h
|
|
jr z,.ps0
|
|
ld a,1
|
|
jr .ps1
|
|
.ps0:
|
|
xor a
|
|
.ps1:
|
|
ld (PR_SIGN),a
|
|
|
|
; mantissa with hidden 1 inserted
|
|
ld a,b
|
|
and 07Fh
|
|
or 080h
|
|
ld (PR_M2),a
|
|
inc hl
|
|
ld a,(hl)
|
|
ld (PR_M1),a
|
|
inc hl
|
|
ld a,(hl)
|
|
ld (PR_M0),a
|
|
|
|
; print '-'
|
|
ld a,(PR_SIGN)
|
|
or a
|
|
jr z,.mag
|
|
ld a,'-'
|
|
call printChar
|
|
.mag:
|
|
; S = (E - 23)
|
|
ld a,(PR_E)
|
|
sub 23
|
|
|
|
; clear int and remainder helpers
|
|
xor a
|
|
ld (PR_INT0),a
|
|
ld (PR_INT1),a
|
|
ld (PR_INT2),a
|
|
ld (PR_INT3),a
|
|
ld (PR_R3),a
|
|
|
|
bit 7,a
|
|
jr z,.S_nonneg
|
|
|
|
; S negative: INT = [M2][M1][M0][00] (i.e., M << 8), then shift right by -S
|
|
neg
|
|
ld b,a ; B = shift count
|
|
|
|
xor a
|
|
ld (PR_INT0),a
|
|
ld a,(PR_M0)
|
|
ld (PR_INT1),a
|
|
ld a,(PR_M1)
|
|
ld (PR_INT2),a
|
|
ld a,(PR_M2)
|
|
ld (PR_INT3),a
|
|
|
|
call shr32_INT_to_INT_with_remainder
|
|
jr .print_int_and_frac
|
|
|
|
.S_nonneg:
|
|
; S non-negative: INT = M (24-bit) then shift left S (cap at 31)
|
|
cp 32
|
|
jr c,.doShl
|
|
ld a,31
|
|
.doShl:
|
|
ld b,a
|
|
ld a,(PR_M0)
|
|
ld (PR_INT0),a
|
|
ld a,(PR_M1)
|
|
ld (PR_INT1),a
|
|
ld a,(PR_M2)
|
|
ld (PR_INT2),a
|
|
xor a
|
|
ld (PR_INT3),a
|
|
call shl32_INT_by_B
|
|
|
|
.print_int_and_frac:
|
|
call print_u32_dec
|
|
ld a,'.'
|
|
call printChar
|
|
ld b,FRAC_DIGITS
|
|
.fr:
|
|
call mul_remainder_by_10
|
|
ld a,(PR_R3)
|
|
add a,'0'
|
|
call printChar
|
|
xor a
|
|
ld (PR_R3),a
|
|
djnz .fr
|
|
ret
|
|
|
|
|
|
; Shift-right PR_INT by B, collect shifted-out bits into PR_R3 (simplified)
|
|
shr32_INT_to_INT_with_remainder:
|
|
xor a
|
|
ld (PR_R3),a
|
|
ld a,b
|
|
or a
|
|
ret z
|
|
.loop:
|
|
ld a,(PR_INT3)
|
|
srl a
|
|
ld (PR_INT3),a
|
|
ld a,(PR_INT2)
|
|
rr a
|
|
ld (PR_INT2),a
|
|
ld a,(PR_INT1)
|
|
rr a
|
|
ld (PR_INT1),a
|
|
ld a,(PR_INT0)
|
|
rr a
|
|
ld (PR_INT0),a
|
|
; carry has shifted-out bit; accumulate into PR_R3
|
|
ld a,(PR_R3)
|
|
add a,a
|
|
adc a,0
|
|
ld (PR_R3),a
|
|
djnz .loop
|
|
ret
|
|
|
|
|
|
shl32_INT_by_B:
|
|
ld a,b
|
|
or a
|
|
ret z
|
|
.loop:
|
|
ld a,(PR_INT0)
|
|
add a,a
|
|
ld (PR_INT0),a
|
|
ld a,(PR_INT1)
|
|
adc a,a
|
|
ld (PR_INT1),a
|
|
ld a,(PR_INT2)
|
|
adc a,a
|
|
ld (PR_INT2),a
|
|
ld a,(PR_INT3)
|
|
adc a,a
|
|
ld (PR_INT3),a
|
|
djnz .loop
|
|
ret
|
|
|
|
|
|
mul_remainder_by_10:
|
|
ld a,(PR_R3)
|
|
ld b,a
|
|
add a,a ; *2
|
|
add a,a ; *4
|
|
add a,a ; *8
|
|
add a,b ; *9
|
|
add a,b ; *10
|
|
ld (PR_R3),a
|
|
ret
|
|
|
|
|
|
; Print PR_INT (u32) as decimal
|
|
print_u32_dec:
|
|
ld a,(PR_INT0)
|
|
or (PR_INT1)
|
|
or (PR_INT2)
|
|
or (PR_INT3)
|
|
jr nz,.nz
|
|
ld a,'0'
|
|
call printChar
|
|
ret
|
|
.nz:
|
|
xor a
|
|
ld (DIGLEN),a
|
|
.dloop:
|
|
call u32_div10_inplace ; remainder in A, quotient back in PR_INT
|
|
ld hl,DIGBUF
|
|
ld b,0
|
|
ld c,(DIGLEN)
|
|
add hl,bc
|
|
add a,'0'
|
|
ld (hl),a
|
|
ld a,(DIGLEN)
|
|
inc a
|
|
ld (DIGLEN),a
|
|
ld a,(PR_INT0)
|
|
or (PR_INT1)
|
|
or (PR_INT2)
|
|
or (PR_INT3)
|
|
jr nz,.dloop
|
|
|
|
; print in reverse
|
|
ld a,(DIGLEN)
|
|
ld b,a
|
|
.pr:
|
|
dec b
|
|
ld hl,DIGBUF
|
|
ld c,b
|
|
ld b,0
|
|
add hl,bc
|
|
ld a,(hl)
|
|
call printChar
|
|
ld a,c
|
|
or a
|
|
jr nz,.pr
|
|
ret
|
|
|
|
|
|
; Divide PR_INT (u32) by 10, return remainder in A (0..9)
|
|
u32_div10_inplace:
|
|
ld b,0 ; remainder
|
|
ld hl,PR_INT3
|
|
call .step
|
|
inc hl
|
|
call .step
|
|
inc hl
|
|
call .step
|
|
inc hl
|
|
call .step
|
|
ld a,b
|
|
ret
|
|
.step:
|
|
; DE = remainder*256 + byte
|
|
ld a,b
|
|
ld d,a
|
|
ld e,(hl)
|
|
ld c,0 ; quotient byte
|
|
.div:
|
|
ld a,d
|
|
or a
|
|
jr nz,.sub
|
|
ld a,e
|
|
cp 10
|
|
jr c,.done
|
|
.sub:
|
|
ld a,e
|
|
sub 10
|
|
ld e,a
|
|
ld a,d
|
|
sbc a,0
|
|
ld d,a
|
|
inc c
|
|
jr .div
|
|
.done:
|
|
ld (hl),c
|
|
ld b,e
|
|
ret
|
|
|
|
|
|
; ============================================================
|
|
; fp_parse: parse decimal string -> float
|
|
; DE -> "[-]ddd[.ddd]\0"
|
|
; HL -> output float
|
|
; ============================================================
|
|
fp_parse:
|
|
xor a
|
|
ld (P_SIGN),a
|
|
ld (P_FRACN),a
|
|
ld (P_S0),a
|
|
ld (P_S1),a
|
|
ld (P_S2),a
|
|
ld (P_S3),a
|
|
|
|
; optional sign
|
|
ld a,(de)
|
|
cp '-'
|
|
jr nz,.chkplus
|
|
ld a,1
|
|
ld (P_SIGN),a
|
|
inc de
|
|
jr .intpart
|
|
.chkplus:
|
|
ld a,(de)
|
|
cp '+'
|
|
jr nz,.intpart
|
|
inc de
|
|
|
|
.intpart:
|
|
ld a,(de)
|
|
call is_digit
|
|
jr nc,.maybe_dot
|
|
.il:
|
|
ld a,(de)
|
|
sub '0'
|
|
ld c,a
|
|
call u32_mul10_scaled
|
|
call u32_add8_scaled
|
|
inc de
|
|
ld a,(de)
|
|
call is_digit
|
|
jr c,.il
|
|
|
|
.maybe_dot:
|
|
ld a,(de)
|
|
cp '.'
|
|
jr nz,.finish_scaled
|
|
inc de
|
|
|
|
ld b,MAX_FRAC
|
|
.fl:
|
|
ld a,(de)
|
|
call is_digit
|
|
jr nc,.finish_scaled
|
|
ld a,(de)
|
|
sub '0'
|
|
ld c,a
|
|
call u32_mul10_scaled
|
|
call u32_add8_scaled
|
|
ld a,(P_FRACN)
|
|
inc a
|
|
ld (P_FRACN),a
|
|
inc de
|
|
djnz .fl
|
|
|
|
.finish_scaled:
|
|
; convert scaled u32 to float into (HL)
|
|
call fp_from_u32_scaled_to_A
|
|
|
|
; divide by 10^k if needed
|
|
ld a,(P_FRACN)
|
|
or a
|
|
jr z,.apply_sign
|
|
|
|
; DE = &pow10_table[k]
|
|
push hl
|
|
ld e,a
|
|
ld d,0
|
|
ld hl,pow10_table
|
|
add hl,de
|
|
add hl,de
|
|
add hl,de
|
|
add hl,de
|
|
ex de,hl
|
|
pop hl
|
|
call fp_div
|
|
|
|
.apply_sign:
|
|
ld a,(P_SIGN)
|
|
or a
|
|
ret z
|
|
inc hl
|
|
ld a,(hl)
|
|
xor 080h
|
|
ld (hl),a
|
|
ret
|
|
|
|
|
|
is_digit:
|
|
cp '0'
|
|
jr c,.no
|
|
cp '9'+1
|
|
jr nc,.no
|
|
scf
|
|
ret
|
|
.no:
|
|
or a
|
|
ret
|
|
|
|
|
|
; P_S = P_S*10 (uses PR_INT and PR_R0..3 as scratch)
|
|
u32_mul10_scaled:
|
|
; PR_INT = P
|
|
ld a,(P_S0)
|
|
ld (PR_INT0),a
|
|
ld a,(P_S1)
|
|
ld (PR_INT1),a
|
|
ld a,(P_S2)
|
|
ld (PR_INT2),a
|
|
ld a,(P_S3)
|
|
ld (PR_INT3),a
|
|
; PR_INT *=2
|
|
ld b,1
|
|
call shl32_INT_by_B
|
|
|
|
; PR_R = P
|
|
ld a,(P_S0)
|
|
ld (PR_R0),a
|
|
ld a,(P_S1)
|
|
ld (PR_R1),a
|
|
ld a,(P_S2)
|
|
ld (PR_R2),a
|
|
ld a,(P_S3)
|
|
ld (PR_R3),a
|
|
; PR_R *=8 (shift left 3)
|
|
ld b,3
|
|
call shl32_R_by_B
|
|
|
|
; P = PR_INT + PR_R
|
|
ld a,(PR_INT0)
|
|
add a,(PR_R0)
|
|
ld (P_S0),a
|
|
ld a,(PR_INT1)
|
|
adc a,(PR_R1)
|
|
ld (P_S1),a
|
|
ld a,(PR_INT2)
|
|
adc a,(PR_R2)
|
|
ld (P_S2),a
|
|
ld a,(PR_INT3)
|
|
adc a,(PR_R3)
|
|
ld (P_S3),a
|
|
ret
|
|
|
|
|
|
shl32_R_by_B:
|
|
ld a,b
|
|
or a
|
|
ret z
|
|
.loop:
|
|
ld a,(PR_R0)
|
|
add a,a
|
|
ld (PR_R0),a
|
|
ld a,(PR_R1)
|
|
adc a,a
|
|
ld (PR_R1),a
|
|
ld a,(PR_R2)
|
|
adc a,a
|
|
ld (PR_R2),a
|
|
ld a,(PR_R3)
|
|
adc a,a
|
|
ld (PR_R3),a
|
|
djnz .loop
|
|
ret
|
|
|
|
|
|
; P_S += C (0..9)
|
|
u32_add8_scaled:
|
|
ld a,(P_S0)
|
|
add a,c
|
|
ld (P_S0),a
|
|
ld a,(P_S1)
|
|
adc a,0
|
|
ld (P_S1),a
|
|
ld a,(P_S2)
|
|
adc a,0
|
|
ld (P_S2),a
|
|
ld a,(P_S3)
|
|
adc a,0
|
|
ld (P_S3),a
|
|
ret
|
|
|
|
|
|
; Convert P_S (u32) to float at (HL). Positive only; sign handled by caller.
|
|
fp_from_u32_scaled_to_A:
|
|
ld a,(P_S0)
|
|
or (P_S1)
|
|
or (P_S2)
|
|
or (P_S3)
|
|
jr nz,.nz
|
|
ld (hl),0
|
|
inc hl
|
|
ld (hl),0
|
|
inc hl
|
|
ld (hl),0
|
|
inc hl
|
|
ld (hl),0
|
|
ret
|
|
|
|
.nz:
|
|
; find MSB index in B (0..31)
|
|
ld b,31
|
|
ld a,(P_S3)
|
|
ld c,a
|
|
or a
|
|
jr nz,.scan
|
|
ld b,23
|
|
ld a,(P_S2)
|
|
ld c,a
|
|
or a
|
|
jr nz,.scan
|
|
ld b,15
|
|
ld a,(P_S1)
|
|
ld c,a
|
|
or a
|
|
jr nz,.scan
|
|
ld b,7
|
|
ld a,(P_S0)
|
|
ld c,a
|
|
.scan:
|
|
.find:
|
|
bit 7,c
|
|
jr nz,.found
|
|
add c,c
|
|
dec b
|
|
jr .find
|
|
.found:
|
|
; EXP = FP_BIAS + B
|
|
ld a,b
|
|
add a,FP_BIAS
|
|
ld (hl),a
|
|
inc hl
|
|
|
|
; shift value left by (23-B), take top 24 bits
|
|
ld a,23
|
|
sub b
|
|
ld b,a
|
|
|
|
; PR_INT = P_S
|
|
ld a,(P_S0)
|
|
ld (PR_INT0),a
|
|
ld a,(P_S1)
|
|
ld (PR_INT1),a
|
|
ld a,(P_S2)
|
|
ld (PR_INT2),a
|
|
ld a,(P_S3)
|
|
ld (PR_INT3),a
|
|
call shl32_INT_by_B
|
|
|
|
; store sign=0, fraction = top 23 bits of mantissa (hidden 1 removed)
|
|
ld a,(PR_INT3)
|
|
and 07Fh
|
|
ld (hl),a
|
|
inc hl
|
|
ld a,(PR_INT2)
|
|
ld (hl),a
|
|
inc hl
|
|
ld a,(PR_INT1)
|
|
ld (hl),a
|
|
ret
|
|
|
|
.data
|
|
; ============================================================
|
|
; pow10_table: 10^k constants (k=0..6) in THIS float encoding
|
|
; Verified:
|
|
; 1.0 = 127 00 00 00
|
|
; 10.0 = 130 20 00 00
|
|
; 100.0 = 133 48 00 00
|
|
; 1000.0 = 136 7A 00 00
|
|
; 10000.0 = 140 1C 40 00
|
|
; 100000.0 = 143 43 50 00
|
|
; 1000000.0= 146 74 24 00
|
|
; ============================================================
|
|
pow10_table:
|
|
.byte 127, 0x00, 0x00, 0x00 ; 10^0 = 1
|
|
.byte 130, 0x20, 0x00, 0x00 ; 10^1 = 10
|
|
.byte 133, 0x48, 0x00, 0x00 ; 10^2 = 100
|
|
.byte 136, 0x7A, 0x00, 0x00 ; 10^3 = 1000
|
|
.byte 140, 0x1C, 0x40, 0x00 ; 10^4 = 10000
|
|
.byte 143, 0x43, 0x50, 0x00 ; 10^5 = 100000
|
|
.byte 146, 0x74, 0x24, 0x00 ; 10^6 = 1000000
|
|
|
|
|
|
; ============================================================
|
|
; BSS / WORKSPACE
|
|
; ============================================================
|
|
.bss
|
|
|
|
; Unpacked A
|
|
.comm A_exp,1
|
|
.comm A_sign,1
|
|
.comm A_m2,1
|
|
.comm A_m1,1
|
|
.comm A_m0,1
|
|
|
|
; Unpacked B
|
|
.comm B_exp,1
|
|
.comm B_sign,1
|
|
.comm B_m2,1
|
|
.comm B_m1,1
|
|
.comm B_m0,1
|
|
|
|
; 48-bit workspace (P0 LSB .. P5 MSB)
|
|
.comm P0,1
|
|
.comm P1,1
|
|
.comm P2,1
|
|
.comm P3,1
|
|
.comm P4,1
|
|
.comm P5,1
|
|
|
|
.comm SHCNT,1
|
|
|
|
; Print temps
|
|
.comm PR_SI,1
|
|
.comm PR_E,1
|
|
.comm PR_M2,1
|
|
.comm PR_M1,1
|
|
.comm PR_M0,1
|
|
.comm PR_INT0,1
|
|
.comm PR_INT1,1
|
|
.comm PR_INT2,1
|
|
.comm PR_INT3,1
|
|
.comm PR_R0,1
|
|
.comm PR_R1,1
|
|
.comm PR_R2,1
|
|
.comm PR_R3,1
|
|
|
|
; Parse temps
|
|
.comm P_SIGN,1
|
|
.comm P_FRACN,1
|
|
.comm P_S0,1
|
|
.comm P_S1,1
|
|
.comm P_S2,1
|
|
.comm P_S3,1
|
|
|
|
; Digit buffer
|
|
.comm DIGBUF,1
|
|
.comm DIGLEN,1
|