zone/float.asm

1685 lines
26 KiB
NASM

; SPDX-License-Identifier: MPL-2.0
; SPDX-FileCopyrightText: (c) 2025 A.M. Rowsell
; ============================================================
; Z80 Soft Float Library (4-byte) + Print + Parse (vasm syntax)
; ============================================================
; Float format in memory (big-endian, 4 bytes):
; byte0: EXP (8-bit biased exponent, 0 = zero)
; byte1: S|F22..F16 (bit7 = sign, bits6..0 = top 7 fraction bits)
; byte2: F15..F8
; byte3: F7..F0
; For EXP != 0:
; value = (-1)^S * (1.F) * 2^(EXP - FP_BIAS)
; FP_BIAS = 127
; Calling convention (in-place ops):
; HL -> A (4 bytes)
; DE -> B (4 bytes)
; fp_add: A = A + B (stored back at HL)
; fp_sub: A = A - B
; fp_mul: A = A * B
; fp_div: A = A / B
; Extra:
; fp_print: print float at (HL) using external os_print_vec (A=ASCII)
; fp_parse: parse null-terminated string at (DE) into float at (HL)
; Limitations:
; - No NaN/Inf/denormals
; - Truncation (no rounding)
; - fp_print prints fixed decimals with a lightweight fraction path
; - fp_parse supports optional +/- and '.' up to MAX_FRAC digits, no exponent notation
; ============================================================
.equ FP_BIAS, 127
.equ FRAC_DIGITS, 6
.equ MAX_FRAC, 6
.extern os_print_vec
; ============================================================
; CODE
; ============================================================
.section "zone", "acrx"
; ------------------------------------------------------------
; External routine you provide:
; os_print_vec: prints ASCII character in A
; ------------------------------------------------------------
; os_print_vec is external, not defined here.
; ============================================================
; Public API: fp_add / fp_sub / fp_mul / fp_div
; ============================================================
; ------------------------------------------------------------
; fp_add: A = A + B
; ------------------------------------------------------------
fp_add:
push hl
push de
call fp_unpackA
pop de
call fp_unpackB
pop hl
; zero short-cuts
ld a, (A_exp)
or a
jr nz, fp_add_checkB
; A==0 => result=B
call fp_pack_from_B_into_A
ret
fp_add_checkB:
ld a, (B_exp)
or a
ret z
; if signs same -> magnitude add
ld a, (A_sign)
ld b, a
ld a, (B_sign)
xor b
jp z, fp_add_same_sign
; signs differ -> magnitude subtract
jp fp_add_diff_sign
; ------------------------------------------------------------
; fp_sub: A = A - B (flip B sign in memory, add, flip back)
; ------------------------------------------------------------
fp_sub:
; Flip sign bit of B byte1 (DE+1)
push hl
push de
inc de
ld a, (de)
xor 0x80
ld (de), a
pop de
pop hl
call fp_add
; Flip sign bit back
push hl
push de
inc de
ld a, (de)
xor 0x80
ld (de), a
pop de
pop hl
ret
; ------------------------------------------------------------
; fp_mul: A = A * B
; ------------------------------------------------------------
fp_mul:
push hl
push de
call fp_unpackA
pop de
call fp_unpackB
pop hl
; if A==0 or B==0 => 0
ld a, (A_exp)
or a
jp z, fp_store_zero_A
ld a, (B_exp)
or a
jp z, fp_store_zero_A
; sign = A_sign XOR B_sign
ld a, (A_sign)
ld b, a
ld a, (B_sign)
xor b
ld (A_sign), a
; exponent = A_exp + B_exp - BIAS
ld a, (A_exp)
ld b, a
ld a, (B_exp)
add a, b
sub FP_BIAS
ld (A_exp), a
; product = A_mant * B_mant (24x24 => 48)
call mul24x24_schoolbook
; normalize product into A mantissa
call norm_product_to_A
; pack back into (HL)
call fp_packA
ret
; ------------------------------------------------------------
; fp_div: A = A / B
; ------------------------------------------------------------
fp_div:
push hl
push de
call fp_unpackA
pop de
call fp_unpackB
pop hl
; A==0 => 0
ld a, (A_exp)
or a
jp z, fp_store_zero_A
; B==0 => return 0 (simple “error” behavior)
ld a, (B_exp)
or a
jp z, fp_store_zero_A
; sign = A_sign XOR B_sign
ld a, (A_sign)
ld b, a
ld a, (B_sign)
xor b
ld (A_sign), a
; exponent = A_exp - B_exp + BIAS
ld a, (A_exp)
ld b, a
ld a, (B_exp)
ld c, a
ld a, b
sub c
add a, FP_BIAS
ld (A_exp), a
; mantissa division
call div_mantissas_to_A
call normalize_A_mant
call fp_packA
ret
; ============================================================
; Add/Sub core (unpacked)
; ============================================================
fp_add_same_sign:
call align_exponents_A_B
call add24_A_plus_B
; if carry: shift right, exponent++
jr nc, fp_add_same_sign_noCarry
call shr24_A_1
ld a, (A_exp)
inc a
ld (A_exp), a
fp_add_same_sign_noCarry:
call normalize_A_mant
call fp_packA
ret
fp_add_diff_sign:
; compare |A| vs |B|, do larger - smaller, sign = sign(larger)
call compare_mag_A_B
jr c, fp_add_diff_sign_A_ge_B
; |B| > |A| => swap
call swap_A_B_unpacked
fp_add_diff_sign_A_ge_B:
call align_exponents_A_B
call sub24_A_minus_B
call is_A_mant_zero
jp z, fp_store_zero_A
call normalize_A_mant
call fp_packA
ret
; ============================================================
; Unpack / Pack helpers
; ============================================================
; Unpack A from (HL)
fp_unpackA:
ld a, (hl)
ld (A_exp), a
or a
jp z, fp_unpackA_zeroA
inc hl
ld a, (hl)
ld b, a
; sign bit -> A_sign (0/1)
and 0x80
jp z, fp_unpackA_sa0
ld a, 1
jr fp_unpackA_sa1
fp_unpackA_sa0:
xor a
fp_unpackA_sa1:
ld (A_sign), a
; mantissa bytes with hidden 1 inserted
ld a, b
and 0x7F
or 0x80
ld (A_m2), a
inc hl
ld a, (hl)
ld (A_m1), a
inc hl
ld a, (hl)
ld (A_m0), a
ret
fp_unpackA_zeroA:
xor a
ld (A_sign), a
ld (A_m2), a
ld (A_m1), a
ld (A_m0), a
ret
; Unpack B from (DE)
fp_unpackB:
ld a, (de)
ld (B_exp), a
or a
jp z, fp_unpackB_zeroB
inc de
ld a, (de)
ld b, a
and 0x80
jp z, fp_unpackB_sb0
ld a, 1
jr fp_unpackB_sb1
fp_unpackB_sb0:
xor a
fp_unpackB_sb1:
ld (B_sign), a
ld a, b
and 0x7F
or 0x80
ld (B_m2), a
inc de
ld a, (de)
ld (B_m1), a
inc de
ld a, (de)
ld (B_m0), a
ret
fp_unpackB_zeroB:
xor a
ld (B_sign), a
ld (B_m2), a
ld (B_m1), a
ld (B_m0), a
ret
; Pack unpacked A back into memory at (HL)
fp_packA:
ld a, (A_exp)
or a
jr nz, fp_packA_packNZ
; store 0
ld (hl), 0
inc hl
ld (hl), 0
inc hl
ld (hl), 0
inc hl
ld (hl), 0
ret
fp_packA_packNZ:
ld a, (A_exp)
ld (hl), a
inc hl
; remove hidden 1
ld a, (A_m2)
and 0x7F
ld b, a
; apply sign bit7
ld a, (A_sign)
or a
jp z, fp_packA_sign0
ld a, b
or 0x80
jr fp_packA_storeB1
fp_packA_sign0:
ld a, b
fp_packA_storeB1:
ld (hl), a
inc hl
ld a, (A_m1)
ld (hl), a
inc hl
ld a, (A_m0)
ld (hl), a
ret
; Pack from unpacked B into memory A (HL points to A destination)
fp_pack_from_B_into_A:
ld a, (B_exp)
ld (hl), a
inc hl
ld a, (B_m2)
and 0x7F
ld b, a
ld a, (B_sign)
or a
jp z, fp_pack_from_B_bs0
ld a, b
or 0x80
jr fp_pack_from_B_bs1
fp_pack_from_B_bs0:
ld a, b
fp_pack_from_B_bs1:
ld (hl), a
inc hl
ld a, (B_m1)
ld (hl), a
inc hl
ld a, (B_m0)
ld (hl), a
ret
fp_store_zero_A:
xor a
ld (A_exp), a
ld (A_sign), a
ld (A_m2), a
ld (A_m1), a
ld (A_m0), a
jp fp_packA
; ============================================================
; Exponent alignment / compare / swap
; ============================================================
; Ensure A_exp >= B_exp; shift smaller mantissa right by diff
align_exponents_A_B:
ld a, (A_exp)
ld b, a
ld a, (B_exp)
cp b
jr z, align_exponents_A_B_done
jr c, align_exponents_A_B_bigger_exp; B_exp < A_exp
call swap_A_B_unpacked; make A the larger exponent
align_exponents_A_B_bigger_exp:
ld a, (A_exp)
ld b, a
ld a, (B_exp)
ld c, a
ld a, b
sub c; A = diff
call shr24_B_by_A
ld a, (A_exp)
ld (B_exp), a
align_exponents_A_B_done:
ret
; Carry set if |A| >= |B|, else carry clear
compare_mag_A_B:
ld a, (A_exp)
ld b, a
ld a, (B_exp)
cp b
jr z, compare_mag_A_B_cmpMant
jr c, compare_mag_A_B_ge
or a
ret
compare_mag_A_B_ge:
scf
ret
compare_mag_A_B_cmpMant:
ld a, (A_m2)
ld b, a
ld a, (B_m2)
cp b
jr z, compare_mag_A_B_m1
jr c, compare_mag_A_B_ge2
or a
ret
compare_mag_A_B_ge2:
scf
ret
compare_mag_A_B_m1:
ld a, (A_m1)
ld b, a
ld a, (B_m1)
cp b
jr z, compare_mag_A_B_m0
jr c, compare_mag_A_B_ge3
or a
ret
compare_mag_A_B_ge3:
scf
ret
compare_mag_A_B_m0:
ld a, (A_m0)
ld b, a
ld a, (B_m0)
cp b
jr c, compare_mag_A_B_ge4
scf
ret
compare_mag_A_B_ge4:
scf
ret
swap_A_B_unpacked:
ld a, (A_exp)
ld b, a
ld a, (B_exp)
ld (A_exp), a
ld a, b
ld (B_exp), a
ld a, (A_sign)
ld b, a
ld a, (B_sign)
ld (A_sign), a
ld a, b
ld (B_sign), a
ld a, (A_m2)
ld b, a
ld a, (B_m2)
ld (A_m2), a
ld a, b
ld (B_m2), a
ld a, (A_m1)
ld b, a
ld a, (B_m1)
ld (A_m1), a
ld a, b
ld (B_m1), a
ld a, (A_m0)
ld b, a
ld a, (B_m0)
ld (A_m0), a
ld a, b
ld (B_m0), a
ret
; ============================================================
; 24-bit mantissa ops
; ============================================================
add24_A_plus_B:
ld a, (B_m0)
ld b, a
ld a, (A_m0)
add a, b
ld (A_m0), a
ld a, (B_m1)
ld b, a
ld a, (A_m1)
adc a, b
ld (A_m1), a
ld a, (B_m2)
ld b, a
ld a, (A_m2)
adc a, b
ld (A_m2), a
ret ; carry meaningful
sub24_A_minus_B:
ld a, (B_m0)
ld b, a
ld a, (A_m0)
sub b
ld (A_m0), a
ld a, (B_m1)
ld b, a
ld a, (A_m1)
sbc a, b
ld (A_m1), a
ld a, (B_m2)
ld b, a
ld a, (A_m2)
sbc a, b
ld (A_m2), a
ret
is_A_mant_zero:
ld a, (A_m2)
ld b, a
ld a, (A_m1)
or b
ld b, a
ld a, (A_m0)
or b
ret
shr24_A_1:
ld a, (A_m2)
srl a
ld (A_m2), a
ld a, (A_m1)
rr a
ld (A_m1), a
ld a, (A_m0)
rr a
ld (A_m0), a
ret
shl24_A_1:
ld a, (A_m0)
add a, a
ld (A_m0), a
ld a, (A_m1)
adc a, a
ld (A_m1), a
ld a, (A_m2)
adc a, a
ld (A_m2), a
ret
; Shift B mantissa right by A bits (A=0..255)
shr24_B_by_A:
ld (SHCNT), a
ld a, (SHCNT)
cp 24
jr c, shr24_B_by_A_ok
xor a
ld (B_m2), a
ld (B_m1), a
ld (B_m0), a
ret
shr24_B_by_A_ok:
ld a, (SHCNT)
or a
ret z
shr24_B_by_A_loop:
ld a, (B_m2)
srl a
ld (B_m2), a
ld a, (B_m1)
rr a
ld (B_m1), a
ld a, (B_m0)
rr a
ld (B_m0), a
ld a, (SHCNT)
dec a
ld (SHCNT), a
jr nz, shr24_B_by_A_loop
ret
normalize_A_mant:
call is_A_mant_zero
jr nz, normalize_A_mant_nz
xor a
ld (A_exp), a
ret
normalize_A_mant_nz:
ld a, (A_m2)
bit 7, a
ret nz
normalize_A_mant_left_loop:
ld a, (A_m0)
add a, a
ld (A_m0), a
ld a, (A_m1)
adc a, a
ld (A_m1), a
ld a, (A_m2)
adc a, a
ld (A_m2), a
ld a, (A_exp)
dec a
ld (A_exp), a
ld a, (A_m2)
bit 7, a
jr z, normalize_A_mant_left_loop
ret
; ============================================================
; 8x8 -> 16 multiply (unsigned), shift-add
; in: A = multiplicand, C = multiplier
; out: HL = 16-bit product
; ============================================================
mul8u:
ld h, 0
ld l, 0
ld b, 8
mul8u_m8:
srl c
jr nc, mul8u_noadd
ld e, a
ld d, 0
add hl, de
mul8u_noadd:
add a, a
djnz mul8u_m8
ret
; ============================================================
; 24x24 schoolbook multiply into P0..P5 (P0 LSB)
; ============================================================
mul24x24_schoolbook:
xor a
ld (P0), a
ld (P1), a
ld (P2), a
ld (P3), a
ld (P4), a
ld (P5), a
; (0, 0) offset 0
ld a, (B_m0)
ld c, a
ld a, (A_m0)
call mul8u
call add16_to_P_at0
; (0, 1) offset 1
ld a, (B_m1)
ld c, a
ld a, (A_m0)
call mul8u
call add16_to_P_at1
; (0, 2) offset 2
ld a, (B_m2)
ld c, a
ld a, (A_m0)
call mul8u
call add16_to_P_at2
; (1, 0) offset 1
ld a, (B_m0)
ld c, a
ld a, (A_m1)
call mul8u
call add16_to_P_at1
; (1, 1) offset 2
ld a, (B_m1)
ld c, a
ld a, (A_m1)
call mul8u
call add16_to_P_at2
; (1, 2) offset 3
ld a, (B_m2)
ld c, a
ld a, (A_m1)
call mul8u
call add16_to_P_at3
; (2, 0) offset 2
ld a, (B_m0)
ld c, a
ld a, (A_m2)
call mul8u
call add16_to_P_at2
; (2, 1) offset 3
ld a, (B_m1)
ld c, a
ld a, (A_m2)
call mul8u
call add16_to_P_at3
; (2, 2) offset 4
ld a, (B_m2)
ld c, a
ld a, (A_m2)
call mul8u
call add16_to_P_at4
ret
add16_to_P_at0:
ld a, (P0)
add a, l
ld (P0), a
ld a, (P1)
adc a, h
ld (P1), a
ret
add16_to_P_at1:
ld a, (P1)
add a, l
ld (P1), a
ld a, (P2)
adc a, h
ld (P2), a
ret
add16_to_P_at2:
ld a, (P2)
add a, l
ld (P2), a
ld a, (P3)
adc a, h
ld (P3), a
ret
add16_to_P_at3:
ld a, (P3)
add a, l
ld (P3), a
ld a, (P4)
adc a, h
ld (P4), a
ret
add16_to_P_at4:
ld a, (P4)
add a, l
ld (P4), a
ld a, (P5)
adc a, h
ld (P5), a
ret
; ============================================================
; Normalize product P into A mantissa
; P is 48-bit, P0 LSB .. P5 MSB
; ============================================================
norm_product_to_A:
ld a, (P5)
bit 7, a
jr z, norm_product_shift23
ld a, 24
call shr48_P_by_A
ld a, (A_exp)
inc a
ld (A_exp), a
jr norm_product_take
norm_product_shift23:
ld a, 23
call shr48_P_by_A
norm_product_take:
ld a, (P2)
ld (A_m2), a
ld a, (P1)
ld (A_m1), a
ld a, (P0)
ld (A_m0), a
ret
shr48_P_by_A:
ld (SHCNT), a
ld a, (SHCNT)
or a
ret z
shr48_P_by_A_loop:
ld a, (P5)
srl a
ld (P5), a
ld a, (P4)
rr a
ld (P4), a
ld a, (P3)
rr a
ld (P3), a
ld a, (P2)
rr a
ld (P2), a
ld a, (P1)
rr a
ld (P1), a
ld a, (P0)
rr a
ld (P0), a
ld a, (SHCNT)
dec a
ld (SHCNT), a
jr nz, shr48_P_by_A_loop
ret
; ============================================================
; Mantissa division (restoring-style)
; A_m = (A_m << 23) / B_m
; ============================================================
div_mantissas_to_A:
; P = A_m as 48-bit, then shift left 23
xor a
ld (P3), a
ld (P4), a
ld (P5), a
ld a, (A_m0)
ld (P0), a
ld a, (A_m1)
ld (P1), a
ld a, (A_m2)
ld (P2), a
ld a, 23
call shl48_P_by_A
; clear quotient
xor a
ld (A_m2), a
ld (A_m1), a
ld (A_m0), a
ld b, 24
div_mantissas_loop:
call shl24_A_1
call shl48_P_1
; subtract divisor from high 24 bits of P (P5..P3)
call sub24_Phigh_minus_B
jr c, div_mantissas_restore
; success => set quotient LSB = 1
ld a, (A_m0)
or 0x1
ld (A_m0), a
jr div_mantissas_next
div_mantissas_restore:
call add24_Phigh_plus_B
div_mantissas_next:
djnz div_mantissas_loop
ret
shl48_P_by_A:
ld (SHCNT), a
ld a, (SHCNT)
or a
ret z
shl48_P_by_A_loop:
call shl48_P_1
ld a, (SHCNT)
dec a
ld (SHCNT), a
jr nz, shl48_P_by_A_loop
ret
shl48_P_1:
ld a, (P0)
add a, a
ld (P0), a
ld a, (P1)
adc a, a
ld (P1), a
ld a, (P2)
adc a, a
ld (P2), a
ld a, (P3)
adc a, a
ld (P3), a
ld a, (P4)
adc a, a
ld (P4), a
ld a, (P5)
adc a, a
ld (P5), a
ret
sub24_Phigh_minus_B:
ld a, (B_m0)
ld b, a
ld a, (P3)
sub b
ld (P3), a
ld a, (B_m1)
ld b, a
ld a, (P4)
sbc a, b
ld (P4), a
ld a, (B_m2)
ld b, a
ld a, (P5)
sbc a, b
ld (P5), a
ret ; carry set indicates borrow
add24_Phigh_plus_B:
ld a, (B_m0)
ld b, a
ld a, (P3)
add a, b
ld (P3), a
ld a, (B_m1)
ld b, a
ld a, (P4)
adc a, b
ld (P4), a
ld a, (B_m2)
ld b, a
ld a, (P5)
adc a, b
ld (P5), a
ret
; ============================================================
; fp_print: fixed format printing
; Prints: [-]I.FFFFFF (FRAC_DIGITS digits)
; Uses os_print_vec (A=char)
; ============================================================
fp_print:
; zero?
ld a, (hl)
or a
jr nz, fp_print_nz
ld a, '0'
call os_print_vec
ld a, '.'
call os_print_vec
ld b, FRAC_DIGITS
fp_print_zf:
ld a, '0'
call os_print_vec
djnz fp_print_zf
ret
fp_print_nz:
; EXP -> PR_E (unbiased)
ld a, (hl)
sub FP_BIAS
ld (PR_E), a
inc hl
; sign + top fraction
ld a, (hl)
ld b, a
and 0x80
jp z, fp_print_ps0
ld a, 1
jr fp_print_ps1
fp_print_ps0:
xor a
fp_print_ps1:
ld (PR_SIGN), a
; mantissa with hidden 1 inserted
ld a, b
and 0x7F
or 0x80
ld (PR_M2), a
inc hl
ld a, (hl)
ld (PR_M1), a
inc hl
ld a, (hl)
ld (PR_M0), a
; print '-'
ld a, (PR_SIGN)
or a
jp z, fp_print_mag
ld a, '-'
call os_print_vec
fp_print_mag:
; S = (E - 23)
ld a, (PR_E)
sub 23
; clear int and remainder helpers
xor a
ld (PR_INT0), a
ld (PR_INT1), a
ld (PR_INT2), a
ld (PR_INT3), a
ld (PR_R3), a
bit 7, a
jp z, fp_print_S_nonneg
; S negative: INT = [M2][M1][M0][00] (i.e., M << 8), then shift right by -S
neg
ld b, a; B = shift count
xor a
ld (PR_INT0), a
ld a, (PR_M0)
ld (PR_INT1), a
ld a, (PR_M1)
ld (PR_INT2), a
ld a, (PR_M2)
ld (PR_INT3), a
call shr32_INT_to_INT_with_remainder
jr fp_print_print_int_and_frac
fp_print_S_nonneg:
; S non-negative: INT = M (24-bit) then shift left S (cap at 31)
cp 32
jr c, fp_print_doShl
ld a, 31
fp_print_doShl:
ld b, a
ld a, (PR_M0)
ld (PR_INT0), a
ld a, (PR_M1)
ld (PR_INT1), a
ld a, (PR_M2)
ld (PR_INT2), a
xor a
ld (PR_INT3), a
call shl32_INT_by_B
fp_print_print_int_and_frac:
call print_u32_dec
ld a, '.'
call os_print_vec
ld b, FRAC_DIGITS
fp_print_fr:
call mul_remainder_by_10
ld a, (PR_R3)
add a, '0'
call os_print_vec
xor a
ld (PR_R3), a
djnz fp_print_fr
ret
; Shift-right PR_INT by B, collect shifted-out bits into PR_R3 (simplified)
shr32_INT_to_INT_with_remainder:
xor a
ld (PR_R3), a
ld a, b
or a
ret z
shr32_INT_to_INT_with_remainder_loop:
ld a, (PR_INT3)
srl a
ld (PR_INT3), a
ld a, (PR_INT2)
rr a
ld (PR_INT2), a
ld a, (PR_INT1)
rr a
ld (PR_INT1), a
ld a, (PR_INT0)
rr a
ld (PR_INT0), a
; carry has shifted-out bit; accumulate into PR_R3
ld a, (PR_R3)
add a, a
adc a, 0
ld (PR_R3), a
djnz shr32_INT_to_INT_with_remainder_loop
ret
shl32_INT_by_B:
ld a, b
or a
ret z
shl32_INT_by_B_loop:
ld a, (PR_INT0)
add a, a
ld (PR_INT0), a
ld a, (PR_INT1)
adc a, a
ld (PR_INT1), a
ld a, (PR_INT2)
adc a, a
ld (PR_INT2), a
ld a, (PR_INT3)
adc a, a
ld (PR_INT3), a
djnz shl32_INT_by_B_loop
ret
mul_remainder_by_10:
ld a, (PR_R3)
ld b, a
add a, a; *2
add a, a; *4
add a, a; *8
add a, b; *9
add a, b; *10
ld (PR_R3), a
ret
; Print PR_INT (u32) as decimal
print_u32_dec:
ld a, (PR_INT0)
ld b, a
ld a, (PR_INT1)
or b
ld b, a
ld a, (PR_INT2)
or b
ld b, a
ld a, (PR_INT3)
or b
jr nz, print_u32_dec_nz
ld a, '0'
call os_print_vec
ret
print_u32_dec_nz:
xor a
ld (DIGLEN), a
print_u32_dec_dloop:
call u32_div10_inplace; remainder in A, quotient back in PR_INT
ld hl, DIGBUF
ld b, 0
ld a, (DIGLEN)
ld c, a
add hl, bc
add a, '0'
ld (hl), a
ld a, (DIGLEN)
inc a
ld (DIGLEN), a
ld a, (PR_INT0)
ld b, a
ld a, (PR_INT1)
or b
ld b, a
ld a, (PR_INT2)
or b
ld b, a
ld a, (PR_INT3)
or b
jr nz, print_u32_dec_dloop
; print in reverse
ld a, (DIGLEN)
ld b, a
print_u32_dec_pr:
dec b
ld hl, DIGBUF
ld c, b
ld b, 0
add hl, bc
ld a, (hl)
call os_print_vec
ld a, c
or a
jr nz, print_u32_dec_pr
ret
; Divide PR_INT (u32) by 10, return remainder in A (0..9)
u32_div10_inplace:
ld b, 0; remainder
ld hl, PR_INT3
call u32_div10_step
inc hl
call u32_div10_step
inc hl
call u32_div10_step
inc hl
call u32_div10_step
ld a, b
ret
u32_div10_step:
; DE = remainder*256 + byte
ld a, b
ld d, a
ld e, (hl)
ld c, 0; quotient byte
u32_div10_div:
ld a, d
or a
jr nz, u32_div10_sub
ld a, e
cp 10
jr c, u32_div10_done
u32_div10_sub:
ld a, e
sub 10
ld e, a
ld a, d
sbc a, 0
ld d, a
inc c
jr u32_div10_div
u32_div10_done:
ld (hl), c
ld b, e
ret
; ============================================================
; fp_parse: parse decimal string -> float
; DE -> "[-]ddd[.ddd]\0"
; HL -> output float
; ============================================================
fp_parse:
xor a
ld (P_SIGN), a
ld (P_FRACN), a
ld (P_S0), a
ld (P_S1), a
ld (P_S2), a
ld (P_S3), a
; optional sign
ld a, (de)
cp '-'
jr nz, fp_parse_chkplus
ld a, 1
ld (P_SIGN), a
inc de
jr fp_parse_intpart
fp_parse_chkplus:
ld a, (de)
cp '+'
jr nz, fp_parse_intpart
inc de
fp_parse_intpart:
ld a, (de)
call is_digit
jr nc, fp_parse_maybe_dot
fp_parse_il:
ld a, (de)
sub '0'
ld c, a
call u32_mul10_scaled
call u32_add8_scaled
inc de
ld a, (de)
call is_digit
jr c, fp_parse_il
fp_parse_maybe_dot:
ld a, (de)
cp '.'
jr nz, fp_parse_finish_scaled
inc de
ld b, MAX_FRAC
fp_parse_fl:
ld a, (de)
call is_digit
jr nc, fp_parse_finish_scaled
ld a, (de)
sub '0'
ld c, a
call u32_mul10_scaled
call u32_add8_scaled
ld a, (P_FRACN)
inc a
ld (P_FRACN), a
inc de
djnz fp_parse_fl
fp_parse_finish_scaled:
; convert scaled u32 to float into (HL)
call fp_from_u32_scaled_to_A
; divide by 10^k if needed
ld a, (P_FRACN)
or a
jp z, fp_parse_apply_sign
; DE = &pow10_table[k]
push hl
ld e, a
ld d, 0
ld hl, pow10_table
add hl, de
add hl, de
add hl, de
add hl, de
ex de, hl
pop hl
call fp_div
fp_parse_apply_sign:
ld a, (P_SIGN)
or a
ret z
inc hl
ld a, (hl)
xor 0x80
ld (hl), a
ret
is_digit:
cp '0'
jr c, is_digit_no
cp '9'+1
jr nc, is_digit_no
scf
ret
is_digit_no:
or a
ret
; P_S = P_S*10 (uses PR_INT and PR_R0..3 as scratch)
u32_mul10_scaled:
; PR_INT = P
ld a, (P_S0)
ld (PR_INT0), a
ld a, (P_S1)
ld (PR_INT1), a
ld a, (P_S2)
ld (PR_INT2), a
ld a, (P_S3)
ld (PR_INT3), a
; PR_INT *=2
ld b, 1
call shl32_INT_by_B
; PR_R = P
ld a, (P_S0)
ld (PR_R0), a
ld a, (P_S1)
ld (PR_R1), a
ld a, (P_S2)
ld (PR_R2), a
ld a, (P_S3)
ld (PR_R3), a
; PR_R *=8 (shift left 3)
ld b, 3
call shl32_R_by_B
; P = PR_INT + PR_R
ld a, (PR_R0)
ld b, a
ld a, (PR_INT0)
add a, b
ld (P_S0), a
ld a, (PR_R1)
ld b, a
ld a, (PR_INT1)
adc a, b
ld (P_S1), a
ld a, (PR_R2)
ld b, a
ld a, (PR_INT2)
adc a, b
ld (P_S2), a
ld a, (PR_R3)
ld b, a
ld a, (PR_INT3)
adc a, b
ld (P_S3), a
ret
shl32_R_by_B:
ld a, b
or a
ret z
shl32_R_by_B_loop:
ld a, (PR_R0)
add a, a
ld (PR_R0), a
ld a, (PR_R1)
adc a, a
ld (PR_R1), a
ld a, (PR_R2)
adc a, a
ld (PR_R2), a
ld a, (PR_R3)
adc a, a
ld (PR_R3), a
djnz shl32_R_by_B_loop
ret
; P_S += C (0..9)
u32_add8_scaled:
ld a, (P_S0)
add a, c
ld (P_S0), a
ld a, (P_S1)
adc a, 0
ld (P_S1), a
ld a, (P_S2)
adc a, 0
ld (P_S2), a
ld a, (P_S3)
adc a, 0
ld (P_S3), a
ret
; Convert P_S (u32) to float at (HL). Positive only; sign handled by caller.
fp_from_u32_scaled_to_A:
ld a, (P_S0)
ld b, a
ld a, (P_S1)
or b
ld b, a
ld a, (P_S2)
or b
ld b, a
ld a, (P_S3)
or b
jr nz, fp_from_u32_scaled_to_A_nz
ld (hl), 0
inc hl
ld (hl), 0
inc hl
ld (hl), 0
inc hl
ld (hl), 0
ret
fp_from_u32_scaled_to_A_nz:
; find MSB index in B (0..31)
ld b, 31
ld a, (P_S3)
ld c, a
or a
jr nz, fp_from_u32_scaled_to_A_scan
ld b, 23
ld a, (P_S2)
ld c, a
or a
jr nz, fp_from_u32_scaled_to_A_scan
ld b, 15
ld a, (P_S1)
ld c, a
or a
jr nz, fp_from_u32_scaled_to_A_scan
ld b, 7
ld a, (P_S0)
ld c, a
fp_from_u32_scaled_to_A_scan:
fp_from_u32_scaled_to_A_find:
bit 7, c
jr nz, fp_from_u32_scaled_to_A_found
ld a, c
add a, a
ld c, a
dec b
jr fp_from_u32_scaled_to_A_find
fp_from_u32_scaled_to_A_found:
; EXP = FP_BIAS + B
ld a, b
add a, FP_BIAS
ld (hl), a
inc hl
; shift value left by (23-B), take top 24 bits
ld a, 23
sub b
ld b, a
; PR_INT = P_S
ld a, (P_S0)
ld (PR_INT0), a
ld a, (P_S1)
ld (PR_INT1), a
ld a, (P_S2)
ld (PR_INT2), a
ld a, (P_S3)
ld (PR_INT3), a
call shl32_INT_by_B
; store sign=0, fraction = top 23 bits of mantissa (hidden 1 removed)
ld a, (PR_INT3)
and 0x7F
ld (hl), a
inc hl
ld a, (PR_INT2)
ld (hl), a
inc hl
ld a, (PR_INT1)
ld (hl), a
ret
; ============================================================
; BSS / WORKSPACE
; ============================================================
.balign 16
.bss
; Unpacked A
A_exp: .space 1
A_sign: .space 1
A_m2: .space 1
A_m1: .space 1
A_m0: .space 1
; Unpacked B
B_exp: .space 1
B_sign: .space 1
B_m2: .space 1
B_m1: .space 1
B_m0: .space 1
; 48-bit workspace (P0 LSB .. P5 MSB)
P0:
.space 1
P1:
.space 1
P2:
.space 1
P3:
.space 1
P4:
.space 1
P5:
.space 1
SHCNT:
.space 1
; Print temps
PR_SIGN: .space 1
PR_E: .space 1
PR_M2: .space 1
PR_M1: .space 1
PR_M0: .space 1
PR_INT0: .space 1
PR_INT1: .space 1
PR_INT2: .space 1
PR_INT3: .space 1
PR_R0: .space 1
PR_R1: .space 1
PR_R2: .space 1
PR_R3: .space 1
; Parse temps
P_SIGN: .space 1
P_FRACN: .space 1
P_S0: .space 1
P_S1: .space 1
P_S2: .space 1
P_S3: .space 1
; Digit buffer
DIGBUF:
.space 1
DIGLEN:
.space 1
; ============================================================
; pow10_table: 10^k constants (k=0..6) in THIS float encoding
; Verified:
; 1.0 = 127 00 00 00
; 10.0 = 130 20 00 00
; 100.0 = 133 48 00 00
; 1000.0 = 136 7A 00 00
; 10000.0 = 140 1C 40 00
; 100000.0 = 143 43 50 00
; 1000000.0= 146 74 24 00
; ============================================================
.section "zone", "acrx"
pow10_table:
.byte 127, 0x00, 0x00, 0x00; 10^0 = 1
.byte 130, 0x20, 0x00, 0x00; 10^1 = 10
.byte 133, 0x48, 0x00, 0x00; 10^2 = 100
.byte 136, 0x7A, 0x00, 0x00; 10^3 = 1000
.byte 140, 0x1C, 0x40, 0x00; 10^4 = 10000
.byte 143, 0x43, 0x50, 0x00; 10^5 = 100000
.byte 146, 0x74, 0x24, 0x00; 10^6 = 1000000