zone/float.asm
A.M. Rowsell 94d6f474cd
dev: growing this into the Zone OS project
The scope of this project is growing, mostly for fun. I doubt
I'll ever get to the point where I can actually use it on a real
Z80 system, but who knows. Mostly this is a good way to learn
about vasm and refresh my Z80 assembly knowledge.
2025-12-20 17:58:51 -05:00

1626 lines
27 KiB
NASM

; SPDX-License-Identifier: MPL-2.0
; SPDX-FileCopyrightText: (c) 2025 A.M. Rowsell
; ============================================================
; Z80 Soft Float Library (4-byte) + Print + Parse (vasm syntax)
; ============================================================
; Float format in memory (big-endian, 4 bytes):
; byte0: EXP (8-bit biased exponent, 0 = zero)
; byte1: S|F22..F16 (bit7 = sign, bits6..0 = top 7 fraction bits)
; byte2: F15..F8
; byte3: F7..F0
;
; For EXP != 0:
; value = (-1)^S * (1.F) * 2^(EXP - FP_BIAS)
; FP_BIAS = 127
;
; Calling convention (in-place ops):
; HL -> A (4 bytes)
; DE -> B (4 bytes)
; fp_add: A = A + B (stored back at HL)
; fp_sub: A = A - B
; fp_mul: A = A * B
; fp_div: A = A / B
;
; Extra:
; fp_print: print float at (HL) using external printChar (A=ASCII)
; fp_parse: parse null-terminated string at (DE) into float at (HL)
;
; Limitations:
; - No NaN/Inf/denormals
; - Truncation (no rounding)
; - fp_print prints fixed decimals with a lightweight fraction path
; - fp_parse supports optional +/- and '.' up to MAX_FRAC digits, no exponent notation
; ============================================================
.equ FP_BIAS,127
.equ FRAC_DIGITS,6
.equ MAX_FRAC,6
.extern printChar
; ============================================================
; CODE
; ============================================================
.section float
; ------------------------------------------------------------
; External routine you provide:
; printChar: prints ASCII character in A
; ------------------------------------------------------------
; printChar is external, not defined here.
; ============================================================
; Public API: fp_add / fp_sub / fp_mul / fp_div
; ============================================================
; ------------------------------------------------------------
; fp_add: A = A + B
; ------------------------------------------------------------
fp_add:
push hl
push de
call fp_unpackA
pop de
call fp_unpackB
pop hl
; zero short-cuts
ld a,(A_exp)
or a
jr nz,fp_add_checkB
; A==0 => result=B
call fp_pack_from_B_into_A
ret
fp_add_checkB:
ld a,(B_exp)
or a
ret z
; if signs same -> magnitude add
ld a,(A_sign)
ld b,a
ld a,(B_sign)
xor b
jp z,fp_add_same_sign
; signs differ -> magnitude subtract
jp fp_add_diff_sign
; ------------------------------------------------------------
; fp_sub: A = A - B (flip B sign in memory, add, flip back)
; ------------------------------------------------------------
fp_sub:
; Flip sign bit of B byte1 (DE+1)
push hl
push de
inc de
ld a,(de)
xor 080h
ld (de),a
pop de
pop hl
call fp_add
; Flip sign bit back
push hl
push de
inc de
ld a,(de)
xor 080h
ld (de),a
pop de
pop hl
ret
; ------------------------------------------------------------
; fp_mul: A = A * B
; ------------------------------------------------------------
fp_mul:
push hl
push de
call fp_unpackA
pop de
call fp_unpackB
pop hl
; if A==0 or B==0 => 0
ld a,(A_exp)
or a
jp z,fp_store_zero_A
ld a,(B_exp)
or a
jp z,fp_store_zero_A
; sign = A_sign XOR B_sign
ld a,(A_sign)
ld b,a
ld a,(B_sign)
xor b
ld (A_sign),a
; exponent = A_exp + B_exp - BIAS
ld a,(A_exp)
ld b,a
ld a,(B_exp)
add a,b
sub FP_BIAS
ld (A_exp),a
; product = A_mant * B_mant (24x24 => 48)
call mul24x24_schoolbook
; normalize product into A mantissa
call norm_product_to_A
; pack back into (HL)
call fp_packA
ret
; ------------------------------------------------------------
; fp_div: A = A / B
; ------------------------------------------------------------
fp_div:
push hl
push de
call fp_unpackA
pop de
call fp_unpackB
pop hl
; A==0 => 0
ld a,(A_exp)
or a
jp z,fp_store_zero_A
; B==0 => return 0 (simple “error” behavior)
ld a,(B_exp)
or a
jp z,fp_store_zero_A
; sign = A_sign XOR B_sign
ld a,(A_sign)
ld b,a
ld a,(B_sign)
xor b
ld (A_sign),a
; exponent = A_exp - B_exp + BIAS
ld a,(A_exp)
ld b,a
ld a,(B_exp)
ld c,a
ld a,b
sub c
add a,FP_BIAS
ld (A_exp),a
; mantissa division
call div_mantissas_to_A
call normalize_A_mant
call fp_packA
ret
; ============================================================
; Add/Sub core (unpacked)
; ============================================================
fp_add_same_sign:
call align_exponents_A_B
call add24_A_plus_B
; if carry: shift right, exponent++
jr nc,fp_add_same_sign_noCarry
call shr24_A_1
ld a,(A_exp)
inc a
ld (A_exp),a
fp_add_same_sign_noCarry:
call normalize_A_mant
call fp_packA
ret
fp_add_diff_sign:
; compare |A| vs |B|, do larger - smaller, sign = sign(larger)
call compare_mag_A_B
jr c,fp_add_diff_sign_A_ge_B
; |B| > |A| => swap
call swap_A_B_unpacked
fp_add_diff_sign_A_ge_B:
call align_exponents_A_B
call sub24_A_minus_B
call is_A_mant_zero
jp z,fp_store_zero_A
call normalize_A_mant
call fp_packA
ret
; ============================================================
; Unpack / Pack helpers
; ============================================================
; Unpack A from (HL)
fp_unpackA:
ld a,(hl)
ld (A_exp),a
or a
jp z,fp_unpackA_zeroA
inc hl
ld a,(hl)
ld b,a
; sign bit -> A_sign (0/1)
and 080h
jp z,fp_unpackA_sa0
ld a,1
jr fp_unpackA_sa1
fp_unpackA_sa0:
xor a
fp_unpackA_sa1:
ld (A_sign),a
; mantissa bytes with hidden 1 inserted
ld a,b
and 07Fh
or 080h
ld (A_m2),a
inc hl
ld a,(hl)
ld (A_m1),a
inc hl
ld a,(hl)
ld (A_m0),a
ret
fp_unpackA_zeroA:
xor a
ld (A_sign),a
ld (A_m2),a
ld (A_m1),a
ld (A_m0),a
ret
; Unpack B from (DE)
fp_unpackB:
ld a,(de)
ld (B_exp),a
or a
jp z,fp_unpackB_zeroB
inc de
ld a,(de)
ld b,a
and 080h
jp z,fp_unpackB_sb0
ld a,1
jr fp_unpackB_sb1
fp_unpackB_sb0:
xor a
fp_unpackB_sb1:
ld (B_sign),a
ld a,b
and 07Fh
or 080h
ld (B_m2),a
inc de
ld a,(de)
ld (B_m1),a
inc de
ld a,(de)
ld (B_m0),a
ret
fp_unpackB_zeroB:
xor a
ld (B_sign),a
ld (B_m2),a
ld (B_m1),a
ld (B_m0),a
ret
; Pack unpacked A back into memory at (HL)
fp_packA:
ld a,(A_exp)
or a
jr nz,fp_packA_packNZ
; store 0
ld (hl),0
inc hl
ld (hl),0
inc hl
ld (hl),0
inc hl
ld (hl),0
ret
fp_packA_packNZ:
ld a,(A_exp)
ld (hl),a
inc hl
; remove hidden 1
ld a,(A_m2)
and 07Fh
ld b,a
; apply sign bit7
ld a,(A_sign)
or a
jp z,fp_packA_sign0
ld a,b
or 080h
jr fp_packA_storeB1
fp_packA_sign0:
ld a,b
fp_packA_storeB1:
ld (hl),a
inc hl
ld a,(A_m1)
ld (hl),a
inc hl
ld a,(A_m0)
ld (hl),a
ret
; Pack from unpacked B into memory A (HL points to A destination)
fp_pack_from_B_into_A:
ld a,(B_exp)
ld (hl),a
inc hl
ld a,(B_m2)
and 07Fh
ld b,a
ld a,(B_sign)
or a
jp z,fp_pack_from_B_bs0
ld a,b
or 080h
jr fp_pack_from_B_bs1
fp_pack_from_B_bs0:
ld a,b
fp_pack_from_B_bs1:
ld (hl),a
inc hl
ld a,(B_m1)
ld (hl),a
inc hl
ld a,(B_m0)
ld (hl),a
ret
fp_store_zero_A:
xor a
ld (A_exp),a
ld (A_sign),a
ld (A_m2),a
ld (A_m1),a
ld (A_m0),a
jp fp_packA
; ============================================================
; Exponent alignment / compare / swap
; ============================================================
; Ensure A_exp >= B_exp; shift smaller mantissa right by diff
align_exponents_A_B:
ld a,(A_exp)
ld b,a
ld a,(B_exp)
cp b
jr z,align_exponents_A_B_done
jr c,align_exponents_A_B_bigger_exp ; B_exp < A_exp
call swap_A_B_unpacked ; make A the larger exponent
align_exponents_A_B_bigger_exp:
ld a,(A_exp)
ld b,a
ld a,(B_exp)
ld c,a
ld a,b
sub c ; A = diff
call shr24_B_by_A
ld a,(A_exp)
ld (B_exp),a
align_exponents_A_B_done:
ret
; Carry set if |A| >= |B|, else carry clear
compare_mag_A_B:
ld a,(A_exp)
ld b,a
ld a,(B_exp)
cp b
jr z,compare_mag_A_B_cmpMant
jr c,compare_mag_A_B_ge
or a
ret
compare_mag_A_B_ge:
scf
ret
compare_mag_A_B_cmpMant:
ld a,(A_m2)
ld b,a
ld a,(B_m2)
cp b
jr z,compare_mag_A_B_m1
jr c,compare_mag_A_B_ge2
or a
ret
compare_mag_A_B_ge2:
scf
ret
compare_mag_A_B_m1:
ld a,(A_m1)
ld b,a
ld a,(B_m1)
cp b
jr z,compare_mag_A_B_m0
jr c,compare_mag_A_B_ge3
or a
ret
compare_mag_A_B_ge3:
scf
ret
compare_mag_A_B_m0:
ld a,(A_m0)
ld b,a
ld a,(B_m0)
cp b
jr c,compare_mag_A_B_ge4
scf
ret
compare_mag_A_B_ge4:
scf
ret
swap_A_B_unpacked:
ld a,(A_exp)
ld b,a
ld a,(B_exp)
ld (A_exp),a
ld a,b
ld (B_exp),a
ld a,(A_sign)
ld b,a
ld a,(B_sign)
ld (A_sign),a
ld a,b
ld (B_sign),a
ld a,(A_m2)
ld b,a
ld a,(B_m2)
ld (A_m2),a
ld a,b
ld (B_m2),a
ld a,(A_m1)
ld b,a
ld a,(B_m1)
ld (A_m1),a
ld a,b
ld (B_m1),a
ld a,(A_m0)
ld b,a
ld a,(B_m0)
ld (A_m0),a
ld a,b
ld (B_m0),a
ret
; ============================================================
; 24-bit mantissa ops
; ============================================================
add24_A_plus_B:
ld a,(B_m0)
ld b,a
ld a,(A_m0)
add a,b
ld (A_m0),a
ld a,(B_m1)
ld b,a
ld a,(A_m1)
adc a,b
ld (A_m1),a
ld a,(B_m2)
ld b,a
ld a,(A_m2)
adc a,b
ld (A_m2),a
ret ; carry meaningful
sub24_A_minus_B:
ld a,(B_m0)
ld b,a
ld a,(A_m0)
sub b
ld (A_m0),a
ld a,(B_m1)
ld b,a
ld a,(A_m1)
sbc a,b
ld (A_m1),a
ld a,(B_m2)
ld b,a
ld a,(A_m2)
sbc a,b
ld (A_m2),a
ret
is_A_mant_zero:
ld a,(A_m2)
ld b,a
ld a,(A_m1)
or b
ld b,a
ld a,(A_m0)
or b
ret
shr24_A_1:
ld a,(A_m2)
srl a
ld (A_m2),a
ld a,(A_m1)
rr a
ld (A_m1),a
ld a,(A_m0)
rr a
ld (A_m0),a
ret
shl24_A_1:
ld a,(A_m0)
add a,a
ld (A_m0),a
ld a,(A_m1)
adc a,a
ld (A_m1),a
ld a,(A_m2)
adc a,a
ld (A_m2),a
ret
; Shift B mantissa right by A bits (A=0..255)
shr24_B_by_A:
ld (SHCNT),a
ld a,(SHCNT)
cp 24
jr c,shr24_B_by_A_ok
xor a
ld (B_m2),a
ld (B_m1),a
ld (B_m0),a
ret
shr24_B_by_A_ok:
ld a,(SHCNT)
or a
ret z
shr24_B_by_A_loop:
ld a,(B_m2)
srl a
ld (B_m2),a
ld a,(B_m1)
rr a
ld (B_m1),a
ld a,(B_m0)
rr a
ld (B_m0),a
ld a,(SHCNT)
dec a
ld (SHCNT),a
jr nz,shr24_B_by_A_loop
ret
normalize_A_mant:
call is_A_mant_zero
jr nz,normalize_A_mant_nz
xor a
ld (A_exp),a
ret
normalize_A_mant_nz:
ld a,(A_m2)
bit 7,a
ret nz
normalize_A_mant_left_loop:
ld a,(A_m0)
add a,a
ld (A_m0),a
ld a,(A_m1)
adc a,a
ld (A_m1),a
ld a,(A_m2)
adc a,a
ld (A_m2),a
ld a,(A_exp)
dec a
ld (A_exp),a
ld a,(A_m2)
bit 7,a
jr z,normalize_A_mant_left_loop
ret
; ============================================================
; 8x8 -> 16 multiply (unsigned), shift-add
; in: A = multiplicand, C = multiplier
; out: HL = 16-bit product
; ============================================================
mul8u:
ld h,0
ld l,0
ld b,8
mul8u_m8:
srl c
jr nc,mul8u_noadd
ld e,a
ld d,0
add hl,de
mul8u_noadd:
add a,a
djnz mul8u_m8
ret
; ============================================================
; 24x24 schoolbook multiply into P0..P5 (P0 LSB)
; ============================================================
mul24x24_schoolbook:
xor a
ld (P0),a
ld (P1),a
ld (P2),a
ld (P3),a
ld (P4),a
ld (P5),a
; (0,0) offset 0
ld a,(B_m0)
ld c,a
ld a,(A_m0)
call mul8u
call add16_to_P_at0
; (0,1) offset 1
ld a,(B_m1)
ld c,a
ld a,(A_m0)
call mul8u
call add16_to_P_at1
; (0,2) offset 2
ld a,(B_m2)
ld c,a
ld a,(A_m0)
call mul8u
call add16_to_P_at2
; (1,0) offset 1
ld a,(B_m0)
ld c,a
ld a,(A_m1)
call mul8u
call add16_to_P_at1
; (1,1) offset 2
ld a,(B_m1)
ld c,a
ld a,(A_m1)
call mul8u
call add16_to_P_at2
; (1,2) offset 3
ld a,(B_m2)
ld c,a
ld a,(A_m1)
call mul8u
call add16_to_P_at3
; (2,0) offset 2
ld a,(B_m0)
ld c,a
ld a,(A_m2)
call mul8u
call add16_to_P_at2
; (2,1) offset 3
ld a,(B_m1)
ld c,a
ld a,(A_m2)
call mul8u
call add16_to_P_at3
; (2,2) offset 4
ld a,(B_m2)
ld c,a
ld a,(A_m2)
call mul8u
call add16_to_P_at4
ret
add16_to_P_at0:
ld a,(P0)
add a,l
ld (P0),a
ld a,(P1)
adc a,h
ld (P1),a
ret
add16_to_P_at1:
ld a,(P1)
add a,l
ld (P1),a
ld a,(P2)
adc a,h
ld (P2),a
ret
add16_to_P_at2:
ld a,(P2)
add a,l
ld (P2),a
ld a,(P3)
adc a,h
ld (P3),a
ret
add16_to_P_at3:
ld a,(P3)
add a,l
ld (P3),a
ld a,(P4)
adc a,h
ld (P4),a
ret
add16_to_P_at4:
ld a,(P4)
add a,l
ld (P4),a
ld a,(P5)
adc a,h
ld (P5),a
ret
; ============================================================
; Normalize product P into A mantissa
; P is 48-bit, P0 LSB .. P5 MSB
; ============================================================
norm_product_to_A:
ld a,(P5)
bit 7,a
jr z,norm_product_shift23
ld a,24
call shr48_P_by_A
ld a,(A_exp)
inc a
ld (A_exp),a
jr norm_product_take
norm_product_shift23:
ld a,23
call shr48_P_by_A
norm_product_take:
ld a,(P2)
ld (A_m2),a
ld a,(P1)
ld (A_m1),a
ld a,(P0)
ld (A_m0),a
ret
shr48_P_by_A:
ld (SHCNT),a
ld a,(SHCNT)
or a
ret z
shr48_P_by_A_loop:
ld a,(P5)
srl a
ld (P5),a
ld a,(P4)
rr a
ld (P4),a
ld a,(P3)
rr a
ld (P3),a
ld a,(P2)
rr a
ld (P2),a
ld a,(P1)
rr a
ld (P1),a
ld a,(P0)
rr a
ld (P0),a
ld a,(SHCNT)
dec a
ld (SHCNT),a
jr nz,shr48_P_by_A_loop
ret
; ============================================================
; Mantissa division (restoring-style)
; A_m = (A_m << 23) / B_m
; ============================================================
div_mantissas_to_A:
; P = A_m as 48-bit, then shift left 23
xor a
ld (P3),a
ld (P4),a
ld (P5),a
ld a,(A_m0)
ld (P0),a
ld a,(A_m1)
ld (P1),a
ld a,(A_m2)
ld (P2),a
ld a,23
call shl48_P_by_A
; clear quotient
xor a
ld (A_m2),a
ld (A_m1),a
ld (A_m0),a
ld b,24
div_mantissas_loop:
call shl24_A_1
call shl48_P_1
; subtract divisor from high 24 bits of P (P5..P3)
call sub24_Phigh_minus_B
jr c,div_mantissas_restore
; success => set quotient LSB = 1
ld a,(A_m0)
or 001h
ld (A_m0),a
jr div_mantissas_next
div_mantissas_restore:
call add24_Phigh_plus_B
div_mantissas_next:
djnz div_mantissas_loop
ret
shl48_P_by_A:
ld (SHCNT),a
ld a,(SHCNT)
or a
ret z
shl48_P_by_A_loop:
call shl48_P_1
ld a,(SHCNT)
dec a
ld (SHCNT),a
jr nz,shl48_P_by_A_loop
ret
shl48_P_1:
ld a,(P0)
add a,a
ld (P0),a
ld a,(P1)
adc a,a
ld (P1),a
ld a,(P2)
adc a,a
ld (P2),a
ld a,(P3)
adc a,a
ld (P3),a
ld a,(P4)
adc a,a
ld (P4),a
ld a,(P5)
adc a,a
ld (P5),a
ret
sub24_Phigh_minus_B:
ld a,(B_m0)
ld b,a
ld a,(P3)
sub b
ld (P3),a
ld a,(B_m1)
ld b,a
ld a,(P4)
sbc a,b
ld (P4),a
ld a,(B_m2)
ld b,a
ld a,(P5)
sbc a,b
ld (P5),a
ret ; carry set indicates borrow
add24_Phigh_plus_B:
ld a,(B_m0)
ld b,a
ld a,(P3)
add a,b
ld (P3),a
ld a,(B_m1)
ld b,a
ld a,(P4)
adc a,b
ld (P4),a
ld a,(B_m2)
ld b,a
ld a,(P5)
adc a,b
ld (P5),a
ret
; ============================================================
; fp_print: fixed format printing
; Prints: [-]I.FFFFFF (FRAC_DIGITS digits)
; Uses printChar (A=char)
; ============================================================
fp_print:
; zero?
ld a,(hl)
or a
jr nz,fp_print_nz
ld a,'0'
call printChar
ld a,'.'
call printChar
ld b,FRAC_DIGITS
fp_print_zf:
ld a,'0'
call printChar
djnz fp_print_zf
ret
fp_print_nz:
; EXP -> PR_E (unbiased)
ld a,(hl)
sub FP_BIAS
ld (PR_E),a
inc hl
; sign + top fraction
ld a,(hl)
ld b,a
and 080h
jp z,fp_print_ps0
ld a,1
jr fp_print_ps1
fp_print_ps0:
xor a
fp_print_ps1:
ld (PR_SIGN),a
; mantissa with hidden 1 inserted
ld a,b
and 07Fh
or 080h
ld (PR_M2),a
inc hl
ld a,(hl)
ld (PR_M1),a
inc hl
ld a,(hl)
ld (PR_M0),a
; print '-'
ld a,(PR_SIGN)
or a
jp z,fp_print_mag
ld a,'-'
call printChar
fp_print_mag:
; S = (E - 23)
ld a,(PR_E)
sub 23
; clear int and remainder helpers
xor a
ld (PR_INT0),a
ld (PR_INT1),a
ld (PR_INT2),a
ld (PR_INT3),a
ld (PR_R3),a
bit 7,a
jp z,fp_print_S_nonneg
; S negative: INT = [M2][M1][M0][00] (i.e., M << 8), then shift right by -S
neg
ld b,a ; B = shift count
xor a
ld (PR_INT0),a
ld a,(PR_M0)
ld (PR_INT1),a
ld a,(PR_M1)
ld (PR_INT2),a
ld a,(PR_M2)
ld (PR_INT3),a
call shr32_INT_to_INT_with_remainder
jr fp_print_print_int_and_frac
fp_print_S_nonneg:
; S non-negative: INT = M (24-bit) then shift left S (cap at 31)
cp 32
jr c,fp_print_doShl
ld a,31
fp_print_doShl:
ld b,a
ld a,(PR_M0)
ld (PR_INT0),a
ld a,(PR_M1)
ld (PR_INT1),a
ld a,(PR_M2)
ld (PR_INT2),a
xor a
ld (PR_INT3),a
call shl32_INT_by_B
fp_print_print_int_and_frac:
call print_u32_dec
ld a,'.'
call printChar
ld b,FRAC_DIGITS
fp_print_fr:
call mul_remainder_by_10
ld a,(PR_R3)
add a,'0'
call printChar
xor a
ld (PR_R3),a
djnz fp_print_fr
ret
; Shift-right PR_INT by B, collect shifted-out bits into PR_R3 (simplified)
shr32_INT_to_INT_with_remainder:
xor a
ld (PR_R3),a
ld a,b
or a
ret z
shr32_INT_to_INT_with_remainder_loop:
ld a,(PR_INT3)
srl a
ld (PR_INT3),a
ld a,(PR_INT2)
rr a
ld (PR_INT2),a
ld a,(PR_INT1)
rr a
ld (PR_INT1),a
ld a,(PR_INT0)
rr a
ld (PR_INT0),a
; carry has shifted-out bit; accumulate into PR_R3
ld a,(PR_R3)
add a,a
adc a,0
ld (PR_R3),a
djnz shr32_INT_to_INT_with_remainder_loop
ret
shl32_INT_by_B:
ld a,b
or a
ret z
shl32_INT_by_B_loop:
ld a,(PR_INT0)
add a,a
ld (PR_INT0),a
ld a,(PR_INT1)
adc a,a
ld (PR_INT1),a
ld a,(PR_INT2)
adc a,a
ld (PR_INT2),a
ld a,(PR_INT3)
adc a,a
ld (PR_INT3),a
djnz shl32_INT_by_B_loop
ret
mul_remainder_by_10:
ld a,(PR_R3)
ld b,a
add a,a ; *2
add a,a ; *4
add a,a ; *8
add a,b ; *9
add a,b ; *10
ld (PR_R3),a
ret
; Print PR_INT (u32) as decimal
print_u32_dec:
ld a,(PR_INT0)
ld b,a
ld a,(PR_INT1)
or b
ld b,a
ld a,(PR_INT2)
or b
ld b,a
ld a,(PR_INT3)
or b
jr nz,print_u32_dec_nz
ld a,'0'
call printChar
ret
print_u32_dec_nz:
xor a
ld (DIGLEN),a
print_u32_dec_dloop:
call u32_div10_inplace ; remainder in A, quotient back in PR_INT
ld hl,DIGBUF
ld b,0
ld a,(DIGLEN)
ld c,a
add hl,bc
add a,'0'
ld (hl),a
ld a,(DIGLEN)
inc a
ld (DIGLEN),a
ld a,(PR_INT0)
ld b,a
ld a,(PR_INT1)
or b
ld b,a
ld a,(PR_INT2)
or b
ld b,a
ld a,(PR_INT3)
or b
jr nz,print_u32_dec_dloop
; print in reverse
ld a,(DIGLEN)
ld b,a
print_u32_dec_pr:
dec b
ld hl,DIGBUF
ld c,b
ld b,0
add hl,bc
ld a,(hl)
call printChar
ld a,c
or a
jr nz,print_u32_dec_pr
ret
; Divide PR_INT (u32) by 10, return remainder in A (0..9)
u32_div10_inplace:
ld b,0 ; remainder
ld hl,PR_INT3
call u32_div10_step
inc hl
call u32_div10_step
inc hl
call u32_div10_step
inc hl
call u32_div10_step
ld a,b
ret
u32_div10_step:
; DE = remainder*256 + byte
ld a,b
ld d,a
ld e,(hl)
ld c,0 ; quotient byte
u32_div10_div:
ld a,d
or a
jr nz,u32_div10_sub
ld a,e
cp 10
jr c,u32_div10_done
u32_div10_sub:
ld a,e
sub 10
ld e,a
ld a,d
sbc a,0
ld d,a
inc c
jr u32_div10_div
u32_div10_done:
ld (hl),c
ld b,e
ret
; ============================================================
; fp_parse: parse decimal string -> float
; DE -> "[-]ddd[.ddd]\0"
; HL -> output float
; ============================================================
fp_parse:
xor a
ld (P_SIGN),a
ld (P_FRACN),a
ld (P_S0),a
ld (P_S1),a
ld (P_S2),a
ld (P_S3),a
; optional sign
ld a,(de)
cp '-'
jr nz,fp_parse_chkplus
ld a,1
ld (P_SIGN),a
inc de
jr fp_parse_intpart
fp_parse_chkplus:
ld a,(de)
cp '+'
jr nz,fp_parse_intpart
inc de
fp_parse_intpart:
ld a,(de)
call is_digit
jr nc,fp_parse_maybe_dot
fp_parse_il:
ld a,(de)
sub '0'
ld c,a
call u32_mul10_scaled
call u32_add8_scaled
inc de
ld a,(de)
call is_digit
jr c,fp_parse_il
fp_parse_maybe_dot:
ld a,(de)
cp '.'
jr nz,fp_parse_finish_scaled
inc de
ld b,MAX_FRAC
fp_parse_fl:
ld a,(de)
call is_digit
jr nc,fp_parse_finish_scaled
ld a,(de)
sub '0'
ld c,a
call u32_mul10_scaled
call u32_add8_scaled
ld a,(P_FRACN)
inc a
ld (P_FRACN),a
inc de
djnz fp_parse_fl
fp_parse_finish_scaled:
; convert scaled u32 to float into (HL)
call fp_from_u32_scaled_to_A
; divide by 10^k if needed
ld a,(P_FRACN)
or a
jp z,fp_parse_apply_sign
; DE = &pow10_table[k]
push hl
ld e,a
ld d,0
ld hl,pow10_table
add hl,de
add hl,de
add hl,de
add hl,de
ex de,hl
pop hl
call fp_div
fp_parse_apply_sign:
ld a,(P_SIGN)
or a
ret z
inc hl
ld a,(hl)
xor 080h
ld (hl),a
ret
is_digit:
cp '0'
jr c,is_digit_no
cp '9'+1
jr nc,is_digit_no
scf
ret
is_digit_no:
or a
ret
; P_S = P_S*10 (uses PR_INT and PR_R0..3 as scratch)
u32_mul10_scaled:
; PR_INT = P
ld a,(P_S0)
ld (PR_INT0),a
ld a,(P_S1)
ld (PR_INT1),a
ld a,(P_S2)
ld (PR_INT2),a
ld a,(P_S3)
ld (PR_INT3),a
; PR_INT *=2
ld b,1
call shl32_INT_by_B
; PR_R = P
ld a,(P_S0)
ld (PR_R0),a
ld a,(P_S1)
ld (PR_R1),a
ld a,(P_S2)
ld (PR_R2),a
ld a,(P_S3)
ld (PR_R3),a
; PR_R *=8 (shift left 3)
ld b,3
call shl32_R_by_B
; P = PR_INT + PR_R
ld a,(PR_R0)
ld b,a
ld a,(PR_INT0)
add a,b
ld (P_S0),a
ld a,(PR_R1)
ld b,a
ld a,(PR_INT1)
adc a,b
ld (P_S1),a
ld a,(PR_R2)
ld b,a
ld a,(PR_INT2)
adc a,b
ld (P_S2),a
ld a,(PR_R3)
ld b,a
ld a,(PR_INT3)
adc a,b
ld (P_S3),a
ret
shl32_R_by_B:
ld a,b
or a
ret z
shl32_R_by_B_loop:
ld a,(PR_R0)
add a,a
ld (PR_R0),a
ld a,(PR_R1)
adc a,a
ld (PR_R1),a
ld a,(PR_R2)
adc a,a
ld (PR_R2),a
ld a,(PR_R3)
adc a,a
ld (PR_R3),a
djnz shl32_R_by_B_loop
ret
; P_S += C (0..9)
u32_add8_scaled:
ld a,(P_S0)
add a,c
ld (P_S0),a
ld a,(P_S1)
adc a,0
ld (P_S1),a
ld a,(P_S2)
adc a,0
ld (P_S2),a
ld a,(P_S3)
adc a,0
ld (P_S3),a
ret
; Convert P_S (u32) to float at (HL). Positive only; sign handled by caller.
fp_from_u32_scaled_to_A:
ld a,(P_S0)
ld b,a
ld a,(P_S1)
or b
ld b,a
ld a,(P_S2)
or b
ld b,a
ld a,(P_S3)
or b
jr nz,fp_from_u32_scaled_to_A_nz
ld (hl),0
inc hl
ld (hl),0
inc hl
ld (hl),0
inc hl
ld (hl),0
ret
fp_from_u32_scaled_to_A_nz:
; find MSB index in B (0..31)
ld b,31
ld a,(P_S3)
ld c,a
or a
jr nz,fp_from_u32_scaled_to_A_scan
ld b,23
ld a,(P_S2)
ld c,a
or a
jr nz,fp_from_u32_scaled_to_A_scan
ld b,15
ld a,(P_S1)
ld c,a
or a
jr nz,fp_from_u32_scaled_to_A_scan
ld b,7
ld a,(P_S0)
ld c,a
fp_from_u32_scaled_to_A_scan:
fp_from_u32_scaled_to_A_find:
bit 7,c
jr nz,fp_from_u32_scaled_to_A_found
ld a,c
add a,a
ld c,a
dec b
jr fp_from_u32_scaled_to_A_find
fp_from_u32_scaled_to_A_found:
; EXP = FP_BIAS + B
ld a,b
add a,FP_BIAS
ld (hl),a
inc hl
; shift value left by (23-B), take top 24 bits
ld a,23
sub b
ld b,a
; PR_INT = P_S
ld a,(P_S0)
ld (PR_INT0),a
ld a,(P_S1)
ld (PR_INT1),a
ld a,(P_S2)
ld (PR_INT2),a
ld a,(P_S3)
ld (PR_INT3),a
call shl32_INT_by_B
; store sign=0, fraction = top 23 bits of mantissa (hidden 1 removed)
ld a,(PR_INT3)
and 07Fh
ld (hl),a
inc hl
ld a,(PR_INT2)
ld (hl),a
inc hl
ld a,(PR_INT1)
ld (hl),a
ret
; ============================================================
; BSS / WORKSPACE
; ============================================================
.balign 16
.bss
; Unpacked A
A_exp: .space 1
A_sign: .space 1
A_m2: .space 1
A_m1: .space 1
A_m0: .space 1
; Unpacked B
B_exp: .space 1
B_sign: .space 1
B_m2: .space 1
B_m1: .space 1
B_m0: .space 1
; 48-bit workspace (P0 LSB .. P5 MSB)
P0: .space 1
P1: .space 1
P2: .space 1
P3: .space 1
P4: .space 1
P5: .space 1
SHCNT: .space 1
; Print temps
PR_SIGN: .space 1
PR_E: .space 1
PR_M2: .space 1
PR_M1: .space 1
PR_M0: .space 1
PR_INT0: .space 1
PR_INT1: .space 1
PR_INT2: .space 1
PR_INT3: .space 1
PR_R0: .space 1
PR_R1: .space 1
PR_R2: .space 1
PR_R3: .space 1
; Parse temps
P_SIGN: .space 1
P_FRACN: .space 1
P_S0: .space 1
P_S1: .space 1
P_S2: .space 1
P_S3: .space 1
; Digit buffer
DIGBUF: .space 1
DIGLEN: .space 1
; ============================================================
; pow10_table: 10^k constants (k=0..6) in THIS float encoding
; Verified:
; 1.0 = 127 00 00 00
; 10.0 = 130 20 00 00
; 100.0 = 133 48 00 00
; 1000.0 = 136 7A 00 00
; 10000.0 = 140 1C 40 00
; 100000.0 = 143 43 50 00
; 1000000.0= 146 74 24 00
; ============================================================
.section float
pow10_table:
.byte 127, 0x00, 0x00, 0x00 ; 10^0 = 1
.byte 130, 0x20, 0x00, 0x00 ; 10^1 = 10
.byte 133, 0x48, 0x00, 0x00 ; 10^2 = 100
.byte 136, 0x7A, 0x00, 0x00 ; 10^3 = 1000
.byte 140, 0x1C, 0x40, 0x00 ; 10^4 = 10000
.byte 143, 0x43, 0x50, 0x00 ; 10^5 = 100000
.byte 146, 0x74, 0x24, 0x00 ; 10^6 = 1000000