; SPDX-License-Identifier: MPL-2.0 ; SPDX-FileCopyrightText: (c) 2025 A.M. Rowsell ; ============================================================ ; Z80 Soft Float Library (4-byte) + Print + Parse (vasm syntax) ; ============================================================ ; Float format in memory (big-endian, 4 bytes): ; byte0: EXP (8-bit biased exponent, 0 = zero) ; byte1: S|F22..F16 (bit7 = sign, bits6..0 = top 7 fraction bits) ; byte2: F15..F8 ; byte3: F7..F0 ; For EXP != 0: ; value = (-1)^S * (1.F) * 2^(EXP - FP_BIAS) ; FP_BIAS = 127 ; Calling convention (in-place ops): ; HL -> A (4 bytes) ; DE -> B (4 bytes) ; fp_add: A = A + B (stored back at HL) ; fp_sub: A = A - B ; fp_mul: A = A * B ; fp_div: A = A / B ; Extra: ; fp_print: print float at (HL) using external os_print_vec (A=ASCII) ; fp_parse: parse null-terminated string at (DE) into float at (HL) ; Limitations: ; - No NaN/Inf/denormals ; - Truncation (no rounding) ; - fp_print prints fixed decimals with a lightweight fraction path ; - fp_parse supports optional +/- and '.' up to MAX_FRAC digits, no exponent notation ; ============================================================ .equ FP_BIAS, 127 .equ FRAC_DIGITS, 6 .equ MAX_FRAC, 6 .extern os_print_vec ; ============================================================ ; CODE ; ============================================================ .section "zone", "acrx" ; ------------------------------------------------------------ ; External routine you provide: ; os_print_vec: prints ASCII character in A ; ------------------------------------------------------------ ; os_print_vec is external, not defined here. ; ============================================================ ; Public API: fp_add / fp_sub / fp_mul / fp_div ; ============================================================ ; ------------------------------------------------------------ ; fp_add: A = A + B ; ------------------------------------------------------------ fp_add: push hl push de call fp_unpackA pop de call fp_unpackB pop hl ; zero short-cuts ld a, (A_exp) or a jr nz, fp_add_checkB ; A==0 => result=B call fp_pack_from_B_into_A ret fp_add_checkB: ld a, (B_exp) or a ret z ; if signs same -> magnitude add ld a, (A_sign) ld b, a ld a, (B_sign) xor b jp z, fp_add_same_sign ; signs differ -> magnitude subtract jp fp_add_diff_sign ; ------------------------------------------------------------ ; fp_sub: A = A - B (flip B sign in memory, add, flip back) ; ------------------------------------------------------------ fp_sub: ; Flip sign bit of B byte1 (DE+1) push hl push de inc de ld a, (de) xor 0x80 ld (de), a pop de pop hl call fp_add ; Flip sign bit back push hl push de inc de ld a, (de) xor 0x80 ld (de), a pop de pop hl ret ; ------------------------------------------------------------ ; fp_mul: A = A * B ; ------------------------------------------------------------ fp_mul: push hl push de call fp_unpackA pop de call fp_unpackB pop hl ; if A==0 or B==0 => 0 ld a, (A_exp) or a jp z, fp_store_zero_A ld a, (B_exp) or a jp z, fp_store_zero_A ; sign = A_sign XOR B_sign ld a, (A_sign) ld b, a ld a, (B_sign) xor b ld (A_sign), a ; exponent = A_exp + B_exp - BIAS ld a, (A_exp) ld b, a ld a, (B_exp) add a, b sub FP_BIAS ld (A_exp), a ; product = A_mant * B_mant (24x24 => 48) call mul24x24_schoolbook ; normalize product into A mantissa call norm_product_to_A ; pack back into (HL) call fp_packA ret ; ------------------------------------------------------------ ; fp_div: A = A / B ; ------------------------------------------------------------ fp_div: push hl push de call fp_unpackA pop de call fp_unpackB pop hl ; A==0 => 0 ld a, (A_exp) or a jp z, fp_store_zero_A ; B==0 => return 0 (simple “error” behavior) ld a, (B_exp) or a jp z, fp_store_zero_A ; sign = A_sign XOR B_sign ld a, (A_sign) ld b, a ld a, (B_sign) xor b ld (A_sign), a ; exponent = A_exp - B_exp + BIAS ld a, (A_exp) ld b, a ld a, (B_exp) ld c, a ld a, b sub c add a, FP_BIAS ld (A_exp), a ; mantissa division call div_mantissas_to_A call normalize_A_mant call fp_packA ret ; ============================================================ ; Add/Sub core (unpacked) ; ============================================================ fp_add_same_sign: call align_exponents_A_B call add24_A_plus_B ; if carry: shift right, exponent++ jr nc, fp_add_same_sign_noCarry call shr24_A_1 ld a, (A_exp) inc a ld (A_exp), a fp_add_same_sign_noCarry: call normalize_A_mant call fp_packA ret fp_add_diff_sign: ; compare |A| vs |B|, do larger - smaller, sign = sign(larger) call compare_mag_A_B jr c, fp_add_diff_sign_A_ge_B ; |B| > |A| => swap call swap_A_B_unpacked fp_add_diff_sign_A_ge_B: call align_exponents_A_B call sub24_A_minus_B call is_A_mant_zero jp z, fp_store_zero_A call normalize_A_mant call fp_packA ret ; ============================================================ ; Unpack / Pack helpers ; ============================================================ ; Unpack A from (HL) fp_unpackA: ld a, (hl) ld (A_exp), a or a jp z, fp_unpackA_zeroA inc hl ld a, (hl) ld b, a ; sign bit -> A_sign (0/1) and 0x80 jp z, fp_unpackA_sa0 ld a, 1 jr fp_unpackA_sa1 fp_unpackA_sa0: xor a fp_unpackA_sa1: ld (A_sign), a ; mantissa bytes with hidden 1 inserted ld a, b and 0x7F or 0x80 ld (A_m2), a inc hl ld a, (hl) ld (A_m1), a inc hl ld a, (hl) ld (A_m0), a ret fp_unpackA_zeroA: xor a ld (A_sign), a ld (A_m2), a ld (A_m1), a ld (A_m0), a ret ; Unpack B from (DE) fp_unpackB: ld a, (de) ld (B_exp), a or a jp z, fp_unpackB_zeroB inc de ld a, (de) ld b, a and 0x80 jp z, fp_unpackB_sb0 ld a, 1 jr fp_unpackB_sb1 fp_unpackB_sb0: xor a fp_unpackB_sb1: ld (B_sign), a ld a, b and 0x7F or 0x80 ld (B_m2), a inc de ld a, (de) ld (B_m1), a inc de ld a, (de) ld (B_m0), a ret fp_unpackB_zeroB: xor a ld (B_sign), a ld (B_m2), a ld (B_m1), a ld (B_m0), a ret ; Pack unpacked A back into memory at (HL) fp_packA: ld a, (A_exp) or a jr nz, fp_packA_packNZ ; store 0 ld (hl), 0 inc hl ld (hl), 0 inc hl ld (hl), 0 inc hl ld (hl), 0 ret fp_packA_packNZ: ld a, (A_exp) ld (hl), a inc hl ; remove hidden 1 ld a, (A_m2) and 0x7F ld b, a ; apply sign bit7 ld a, (A_sign) or a jp z, fp_packA_sign0 ld a, b or 0x80 jr fp_packA_storeB1 fp_packA_sign0: ld a, b fp_packA_storeB1: ld (hl), a inc hl ld a, (A_m1) ld (hl), a inc hl ld a, (A_m0) ld (hl), a ret ; Pack from unpacked B into memory A (HL points to A destination) fp_pack_from_B_into_A: ld a, (B_exp) ld (hl), a inc hl ld a, (B_m2) and 0x7F ld b, a ld a, (B_sign) or a jp z, fp_pack_from_B_bs0 ld a, b or 0x80 jr fp_pack_from_B_bs1 fp_pack_from_B_bs0: ld a, b fp_pack_from_B_bs1: ld (hl), a inc hl ld a, (B_m1) ld (hl), a inc hl ld a, (B_m0) ld (hl), a ret fp_store_zero_A: xor a ld (A_exp), a ld (A_sign), a ld (A_m2), a ld (A_m1), a ld (A_m0), a jp fp_packA ; ============================================================ ; Exponent alignment / compare / swap ; ============================================================ ; Ensure A_exp >= B_exp; shift smaller mantissa right by diff align_exponents_A_B: ld a, (A_exp) ld b, a ld a, (B_exp) cp b jr z, align_exponents_A_B_done jr c, align_exponents_A_B_bigger_exp; B_exp < A_exp call swap_A_B_unpacked; make A the larger exponent align_exponents_A_B_bigger_exp: ld a, (A_exp) ld b, a ld a, (B_exp) ld c, a ld a, b sub c; A = diff call shr24_B_by_A ld a, (A_exp) ld (B_exp), a align_exponents_A_B_done: ret ; Carry set if |A| >= |B|, else carry clear compare_mag_A_B: ld a, (A_exp) ld b, a ld a, (B_exp) cp b jr z, compare_mag_A_B_cmpMant jr c, compare_mag_A_B_ge or a ret compare_mag_A_B_ge: scf ret compare_mag_A_B_cmpMant: ld a, (A_m2) ld b, a ld a, (B_m2) cp b jr z, compare_mag_A_B_m1 jr c, compare_mag_A_B_ge2 or a ret compare_mag_A_B_ge2: scf ret compare_mag_A_B_m1: ld a, (A_m1) ld b, a ld a, (B_m1) cp b jr z, compare_mag_A_B_m0 jr c, compare_mag_A_B_ge3 or a ret compare_mag_A_B_ge3: scf ret compare_mag_A_B_m0: ld a, (A_m0) ld b, a ld a, (B_m0) cp b jr c, compare_mag_A_B_ge4 scf ret compare_mag_A_B_ge4: scf ret swap_A_B_unpacked: ld a, (A_exp) ld b, a ld a, (B_exp) ld (A_exp), a ld a, b ld (B_exp), a ld a, (A_sign) ld b, a ld a, (B_sign) ld (A_sign), a ld a, b ld (B_sign), a ld a, (A_m2) ld b, a ld a, (B_m2) ld (A_m2), a ld a, b ld (B_m2), a ld a, (A_m1) ld b, a ld a, (B_m1) ld (A_m1), a ld a, b ld (B_m1), a ld a, (A_m0) ld b, a ld a, (B_m0) ld (A_m0), a ld a, b ld (B_m0), a ret ; ============================================================ ; 24-bit mantissa ops ; ============================================================ add24_A_plus_B: ld a, (B_m0) ld b, a ld a, (A_m0) add a, b ld (A_m0), a ld a, (B_m1) ld b, a ld a, (A_m1) adc a, b ld (A_m1), a ld a, (B_m2) ld b, a ld a, (A_m2) adc a, b ld (A_m2), a ret ; carry meaningful sub24_A_minus_B: ld a, (B_m0) ld b, a ld a, (A_m0) sub b ld (A_m0), a ld a, (B_m1) ld b, a ld a, (A_m1) sbc a, b ld (A_m1), a ld a, (B_m2) ld b, a ld a, (A_m2) sbc a, b ld (A_m2), a ret is_A_mant_zero: ld a, (A_m2) ld b, a ld a, (A_m1) or b ld b, a ld a, (A_m0) or b ret shr24_A_1: ld a, (A_m2) srl a ld (A_m2), a ld a, (A_m1) rr a ld (A_m1), a ld a, (A_m0) rr a ld (A_m0), a ret shl24_A_1: ld a, (A_m0) add a, a ld (A_m0), a ld a, (A_m1) adc a, a ld (A_m1), a ld a, (A_m2) adc a, a ld (A_m2), a ret ; Shift B mantissa right by A bits (A=0..255) shr24_B_by_A: ld (SHCNT), a ld a, (SHCNT) cp 24 jr c, shr24_B_by_A_ok xor a ld (B_m2), a ld (B_m1), a ld (B_m0), a ret shr24_B_by_A_ok: ld a, (SHCNT) or a ret z shr24_B_by_A_loop: ld a, (B_m2) srl a ld (B_m2), a ld a, (B_m1) rr a ld (B_m1), a ld a, (B_m0) rr a ld (B_m0), a ld a, (SHCNT) dec a ld (SHCNT), a jr nz, shr24_B_by_A_loop ret normalize_A_mant: call is_A_mant_zero jr nz, normalize_A_mant_nz xor a ld (A_exp), a ret normalize_A_mant_nz: ld a, (A_m2) bit 7, a ret nz normalize_A_mant_left_loop: ld a, (A_m0) add a, a ld (A_m0), a ld a, (A_m1) adc a, a ld (A_m1), a ld a, (A_m2) adc a, a ld (A_m2), a ld a, (A_exp) dec a ld (A_exp), a ld a, (A_m2) bit 7, a jr z, normalize_A_mant_left_loop ret ; ============================================================ ; 8x8 -> 16 multiply (unsigned), shift-add ; in: A = multiplicand, C = multiplier ; out: HL = 16-bit product ; ============================================================ mul8u: ld h, 0 ld l, 0 ld b, 8 mul8u_m8: srl c jr nc, mul8u_noadd ld e, a ld d, 0 add hl, de mul8u_noadd: add a, a djnz mul8u_m8 ret ; ============================================================ ; 24x24 schoolbook multiply into P0..P5 (P0 LSB) ; ============================================================ mul24x24_schoolbook: xor a ld (P0), a ld (P1), a ld (P2), a ld (P3), a ld (P4), a ld (P5), a ; (0, 0) offset 0 ld a, (B_m0) ld c, a ld a, (A_m0) call mul8u call add16_to_P_at0 ; (0, 1) offset 1 ld a, (B_m1) ld c, a ld a, (A_m0) call mul8u call add16_to_P_at1 ; (0, 2) offset 2 ld a, (B_m2) ld c, a ld a, (A_m0) call mul8u call add16_to_P_at2 ; (1, 0) offset 1 ld a, (B_m0) ld c, a ld a, (A_m1) call mul8u call add16_to_P_at1 ; (1, 1) offset 2 ld a, (B_m1) ld c, a ld a, (A_m1) call mul8u call add16_to_P_at2 ; (1, 2) offset 3 ld a, (B_m2) ld c, a ld a, (A_m1) call mul8u call add16_to_P_at3 ; (2, 0) offset 2 ld a, (B_m0) ld c, a ld a, (A_m2) call mul8u call add16_to_P_at2 ; (2, 1) offset 3 ld a, (B_m1) ld c, a ld a, (A_m2) call mul8u call add16_to_P_at3 ; (2, 2) offset 4 ld a, (B_m2) ld c, a ld a, (A_m2) call mul8u call add16_to_P_at4 ret add16_to_P_at0: ld a, (P0) add a, l ld (P0), a ld a, (P1) adc a, h ld (P1), a ret add16_to_P_at1: ld a, (P1) add a, l ld (P1), a ld a, (P2) adc a, h ld (P2), a ret add16_to_P_at2: ld a, (P2) add a, l ld (P2), a ld a, (P3) adc a, h ld (P3), a ret add16_to_P_at3: ld a, (P3) add a, l ld (P3), a ld a, (P4) adc a, h ld (P4), a ret add16_to_P_at4: ld a, (P4) add a, l ld (P4), a ld a, (P5) adc a, h ld (P5), a ret ; ============================================================ ; Normalize product P into A mantissa ; P is 48-bit, P0 LSB .. P5 MSB ; ============================================================ norm_product_to_A: ld a, (P5) bit 7, a jr z, norm_product_shift23 ld a, 24 call shr48_P_by_A ld a, (A_exp) inc a ld (A_exp), a jr norm_product_take norm_product_shift23: ld a, 23 call shr48_P_by_A norm_product_take: ld a, (P2) ld (A_m2), a ld a, (P1) ld (A_m1), a ld a, (P0) ld (A_m0), a ret shr48_P_by_A: ld (SHCNT), a ld a, (SHCNT) or a ret z shr48_P_by_A_loop: ld a, (P5) srl a ld (P5), a ld a, (P4) rr a ld (P4), a ld a, (P3) rr a ld (P3), a ld a, (P2) rr a ld (P2), a ld a, (P1) rr a ld (P1), a ld a, (P0) rr a ld (P0), a ld a, (SHCNT) dec a ld (SHCNT), a jr nz, shr48_P_by_A_loop ret ; ============================================================ ; Mantissa division (restoring-style) ; A_m = (A_m << 23) / B_m ; ============================================================ div_mantissas_to_A: ; P = A_m as 48-bit, then shift left 23 xor a ld (P3), a ld (P4), a ld (P5), a ld a, (A_m0) ld (P0), a ld a, (A_m1) ld (P1), a ld a, (A_m2) ld (P2), a ld a, 23 call shl48_P_by_A ; clear quotient xor a ld (A_m2), a ld (A_m1), a ld (A_m0), a ld b, 24 div_mantissas_loop: call shl24_A_1 call shl48_P_1 ; subtract divisor from high 24 bits of P (P5..P3) call sub24_Phigh_minus_B jr c, div_mantissas_restore ; success => set quotient LSB = 1 ld a, (A_m0) or 0x1 ld (A_m0), a jr div_mantissas_next div_mantissas_restore: call add24_Phigh_plus_B div_mantissas_next: djnz div_mantissas_loop ret shl48_P_by_A: ld (SHCNT), a ld a, (SHCNT) or a ret z shl48_P_by_A_loop: call shl48_P_1 ld a, (SHCNT) dec a ld (SHCNT), a jr nz, shl48_P_by_A_loop ret shl48_P_1: ld a, (P0) add a, a ld (P0), a ld a, (P1) adc a, a ld (P1), a ld a, (P2) adc a, a ld (P2), a ld a, (P3) adc a, a ld (P3), a ld a, (P4) adc a, a ld (P4), a ld a, (P5) adc a, a ld (P5), a ret sub24_Phigh_minus_B: ld a, (B_m0) ld b, a ld a, (P3) sub b ld (P3), a ld a, (B_m1) ld b, a ld a, (P4) sbc a, b ld (P4), a ld a, (B_m2) ld b, a ld a, (P5) sbc a, b ld (P5), a ret ; carry set indicates borrow add24_Phigh_plus_B: ld a, (B_m0) ld b, a ld a, (P3) add a, b ld (P3), a ld a, (B_m1) ld b, a ld a, (P4) adc a, b ld (P4), a ld a, (B_m2) ld b, a ld a, (P5) adc a, b ld (P5), a ret ; ============================================================ ; fp_print: fixed format printing ; Prints: [-]I.FFFFFF (FRAC_DIGITS digits) ; Uses os_print_vec (A=char) ; ============================================================ fp_print: ; zero? ld a, (hl) or a jr nz, fp_print_nz ld a, '0' call os_print_vec ld a, '.' call os_print_vec ld b, FRAC_DIGITS fp_print_zf: ld a, '0' call os_print_vec djnz fp_print_zf ret fp_print_nz: ; EXP -> PR_E (unbiased) ld a, (hl) sub FP_BIAS ld (PR_E), a inc hl ; sign + top fraction ld a, (hl) ld b, a and 0x80 jp z, fp_print_ps0 ld a, 1 jr fp_print_ps1 fp_print_ps0: xor a fp_print_ps1: ld (PR_SIGN), a ; mantissa with hidden 1 inserted ld a, b and 0x7F or 0x80 ld (PR_M2), a inc hl ld a, (hl) ld (PR_M1), a inc hl ld a, (hl) ld (PR_M0), a ; print '-' ld a, (PR_SIGN) or a jp z, fp_print_mag ld a, '-' call os_print_vec fp_print_mag: ; S = (E - 23) ld a, (PR_E) sub 23 ; clear int and remainder helpers xor a ld (PR_INT0), a ld (PR_INT1), a ld (PR_INT2), a ld (PR_INT3), a ld (PR_R3), a bit 7, a jp z, fp_print_S_nonneg ; S negative: INT = [M2][M1][M0][00] (i.e., M << 8), then shift right by -S neg ld b, a; B = shift count xor a ld (PR_INT0), a ld a, (PR_M0) ld (PR_INT1), a ld a, (PR_M1) ld (PR_INT2), a ld a, (PR_M2) ld (PR_INT3), a call shr32_INT_to_INT_with_remainder jr fp_print_print_int_and_frac fp_print_S_nonneg: ; S non-negative: INT = M (24-bit) then shift left S (cap at 31) cp 32 jr c, fp_print_doShl ld a, 31 fp_print_doShl: ld b, a ld a, (PR_M0) ld (PR_INT0), a ld a, (PR_M1) ld (PR_INT1), a ld a, (PR_M2) ld (PR_INT2), a xor a ld (PR_INT3), a call shl32_INT_by_B fp_print_print_int_and_frac: call print_u32_dec ld a, '.' call os_print_vec ld b, FRAC_DIGITS fp_print_fr: call mul_remainder_by_10 ld a, (PR_R3) add a, '0' call os_print_vec xor a ld (PR_R3), a djnz fp_print_fr ret ; Shift-right PR_INT by B, collect shifted-out bits into PR_R3 (simplified) shr32_INT_to_INT_with_remainder: xor a ld (PR_R3), a ld a, b or a ret z shr32_INT_to_INT_with_remainder_loop: ld a, (PR_INT3) srl a ld (PR_INT3), a ld a, (PR_INT2) rr a ld (PR_INT2), a ld a, (PR_INT1) rr a ld (PR_INT1), a ld a, (PR_INT0) rr a ld (PR_INT0), a ; carry has shifted-out bit; accumulate into PR_R3 ld a, (PR_R3) add a, a adc a, 0 ld (PR_R3), a djnz shr32_INT_to_INT_with_remainder_loop ret shl32_INT_by_B: ld a, b or a ret z shl32_INT_by_B_loop: ld a, (PR_INT0) add a, a ld (PR_INT0), a ld a, (PR_INT1) adc a, a ld (PR_INT1), a ld a, (PR_INT2) adc a, a ld (PR_INT2), a ld a, (PR_INT3) adc a, a ld (PR_INT3), a djnz shl32_INT_by_B_loop ret mul_remainder_by_10: ld a, (PR_R3) ld b, a add a, a; *2 add a, a; *4 add a, a; *8 add a, b; *9 add a, b; *10 ld (PR_R3), a ret ; Print PR_INT (u32) as decimal print_u32_dec: ld a, (PR_INT0) ld b, a ld a, (PR_INT1) or b ld b, a ld a, (PR_INT2) or b ld b, a ld a, (PR_INT3) or b jr nz, print_u32_dec_nz ld a, '0' call os_print_vec ret print_u32_dec_nz: xor a ld (DIGLEN), a print_u32_dec_dloop: call u32_div10_inplace; remainder in A, quotient back in PR_INT ld hl, DIGBUF ld b, 0 ld a, (DIGLEN) ld c, a add hl, bc add a, '0' ld (hl), a ld a, (DIGLEN) inc a ld (DIGLEN), a ld a, (PR_INT0) ld b, a ld a, (PR_INT1) or b ld b, a ld a, (PR_INT2) or b ld b, a ld a, (PR_INT3) or b jr nz, print_u32_dec_dloop ; print in reverse ld a, (DIGLEN) ld b, a print_u32_dec_pr: dec b ld hl, DIGBUF ld c, b ld b, 0 add hl, bc ld a, (hl) call os_print_vec ld a, c or a jr nz, print_u32_dec_pr ret ; Divide PR_INT (u32) by 10, return remainder in A (0..9) u32_div10_inplace: ld b, 0; remainder ld hl, PR_INT3 call u32_div10_step inc hl call u32_div10_step inc hl call u32_div10_step inc hl call u32_div10_step ld a, b ret u32_div10_step: ; DE = remainder*256 + byte ld a, b ld d, a ld e, (hl) ld c, 0; quotient byte u32_div10_div: ld a, d or a jr nz, u32_div10_sub ld a, e cp 10 jr c, u32_div10_done u32_div10_sub: ld a, e sub 10 ld e, a ld a, d sbc a, 0 ld d, a inc c jr u32_div10_div u32_div10_done: ld (hl), c ld b, e ret ; ============================================================ ; fp_parse: parse decimal string -> float ; DE -> "[-]ddd[.ddd]\0" ; HL -> output float ; ============================================================ fp_parse: xor a ld (P_SIGN), a ld (P_FRACN), a ld (P_S0), a ld (P_S1), a ld (P_S2), a ld (P_S3), a ; optional sign ld a, (de) cp '-' jr nz, fp_parse_chkplus ld a, 1 ld (P_SIGN), a inc de jr fp_parse_intpart fp_parse_chkplus: ld a, (de) cp '+' jr nz, fp_parse_intpart inc de fp_parse_intpart: ld a, (de) call is_digit jr nc, fp_parse_maybe_dot fp_parse_il: ld a, (de) sub '0' ld c, a call u32_mul10_scaled call u32_add8_scaled inc de ld a, (de) call is_digit jr c, fp_parse_il fp_parse_maybe_dot: ld a, (de) cp '.' jr nz, fp_parse_finish_scaled inc de ld b, MAX_FRAC fp_parse_fl: ld a, (de) call is_digit jr nc, fp_parse_finish_scaled ld a, (de) sub '0' ld c, a call u32_mul10_scaled call u32_add8_scaled ld a, (P_FRACN) inc a ld (P_FRACN), a inc de djnz fp_parse_fl fp_parse_finish_scaled: ; convert scaled u32 to float into (HL) call fp_from_u32_scaled_to_A ; divide by 10^k if needed ld a, (P_FRACN) or a jp z, fp_parse_apply_sign ; DE = &pow10_table[k] push hl ld e, a ld d, 0 ld hl, pow10_table add hl, de add hl, de add hl, de add hl, de ex de, hl pop hl call fp_div fp_parse_apply_sign: ld a, (P_SIGN) or a ret z inc hl ld a, (hl) xor 0x80 ld (hl), a ret is_digit: cp '0' jr c, is_digit_no cp '9'+1 jr nc, is_digit_no scf ret is_digit_no: or a ret ; P_S = P_S*10 (uses PR_INT and PR_R0..3 as scratch) u32_mul10_scaled: ; PR_INT = P ld a, (P_S0) ld (PR_INT0), a ld a, (P_S1) ld (PR_INT1), a ld a, (P_S2) ld (PR_INT2), a ld a, (P_S3) ld (PR_INT3), a ; PR_INT *=2 ld b, 1 call shl32_INT_by_B ; PR_R = P ld a, (P_S0) ld (PR_R0), a ld a, (P_S1) ld (PR_R1), a ld a, (P_S2) ld (PR_R2), a ld a, (P_S3) ld (PR_R3), a ; PR_R *=8 (shift left 3) ld b, 3 call shl32_R_by_B ; P = PR_INT + PR_R ld a, (PR_R0) ld b, a ld a, (PR_INT0) add a, b ld (P_S0), a ld a, (PR_R1) ld b, a ld a, (PR_INT1) adc a, b ld (P_S1), a ld a, (PR_R2) ld b, a ld a, (PR_INT2) adc a, b ld (P_S2), a ld a, (PR_R3) ld b, a ld a, (PR_INT3) adc a, b ld (P_S3), a ret shl32_R_by_B: ld a, b or a ret z shl32_R_by_B_loop: ld a, (PR_R0) add a, a ld (PR_R0), a ld a, (PR_R1) adc a, a ld (PR_R1), a ld a, (PR_R2) adc a, a ld (PR_R2), a ld a, (PR_R3) adc a, a ld (PR_R3), a djnz shl32_R_by_B_loop ret ; P_S += C (0..9) u32_add8_scaled: ld a, (P_S0) add a, c ld (P_S0), a ld a, (P_S1) adc a, 0 ld (P_S1), a ld a, (P_S2) adc a, 0 ld (P_S2), a ld a, (P_S3) adc a, 0 ld (P_S3), a ret ; Convert P_S (u32) to float at (HL). Positive only; sign handled by caller. fp_from_u32_scaled_to_A: ld a, (P_S0) ld b, a ld a, (P_S1) or b ld b, a ld a, (P_S2) or b ld b, a ld a, (P_S3) or b jr nz, fp_from_u32_scaled_to_A_nz ld (hl), 0 inc hl ld (hl), 0 inc hl ld (hl), 0 inc hl ld (hl), 0 ret fp_from_u32_scaled_to_A_nz: ; find MSB index in B (0..31) ld b, 31 ld a, (P_S3) ld c, a or a jr nz, fp_from_u32_scaled_to_A_scan ld b, 23 ld a, (P_S2) ld c, a or a jr nz, fp_from_u32_scaled_to_A_scan ld b, 15 ld a, (P_S1) ld c, a or a jr nz, fp_from_u32_scaled_to_A_scan ld b, 7 ld a, (P_S0) ld c, a fp_from_u32_scaled_to_A_scan: fp_from_u32_scaled_to_A_find: bit 7, c jr nz, fp_from_u32_scaled_to_A_found ld a, c add a, a ld c, a dec b jr fp_from_u32_scaled_to_A_find fp_from_u32_scaled_to_A_found: ; EXP = FP_BIAS + B ld a, b add a, FP_BIAS ld (hl), a inc hl ; shift value left by (23-B), take top 24 bits ld a, 23 sub b ld b, a ; PR_INT = P_S ld a, (P_S0) ld (PR_INT0), a ld a, (P_S1) ld (PR_INT1), a ld a, (P_S2) ld (PR_INT2), a ld a, (P_S3) ld (PR_INT3), a call shl32_INT_by_B ; store sign=0, fraction = top 23 bits of mantissa (hidden 1 removed) ld a, (PR_INT3) and 0x7F ld (hl), a inc hl ld a, (PR_INT2) ld (hl), a inc hl ld a, (PR_INT1) ld (hl), a ret ; ============================================================ ; BSS / WORKSPACE ; ============================================================ .balign 16 .bss ; Unpacked A A_exp: .space 1 A_sign: .space 1 A_m2: .space 1 A_m1: .space 1 A_m0: .space 1 ; Unpacked B B_exp: .space 1 B_sign: .space 1 B_m2: .space 1 B_m1: .space 1 B_m0: .space 1 ; 48-bit workspace (P0 LSB .. P5 MSB) P0: .space 1 P1: .space 1 P2: .space 1 P3: .space 1 P4: .space 1 P5: .space 1 SHCNT: .space 1 ; Print temps PR_SIGN: .space 1 PR_E: .space 1 PR_M2: .space 1 PR_M1: .space 1 PR_M0: .space 1 PR_INT0: .space 1 PR_INT1: .space 1 PR_INT2: .space 1 PR_INT3: .space 1 PR_R0: .space 1 PR_R1: .space 1 PR_R2: .space 1 PR_R3: .space 1 ; Parse temps P_SIGN: .space 1 P_FRACN: .space 1 P_S0: .space 1 P_S1: .space 1 P_S2: .space 1 P_S3: .space 1 ; Digit buffer DIGBUF: .space 1 DIGLEN: .space 1 ; ============================================================ ; pow10_table: 10^k constants (k=0..6) in THIS float encoding ; Verified: ; 1.0 = 127 00 00 00 ; 10.0 = 130 20 00 00 ; 100.0 = 133 48 00 00 ; 1000.0 = 136 7A 00 00 ; 10000.0 = 140 1C 40 00 ; 100000.0 = 143 43 50 00 ; 1000000.0= 146 74 24 00 ; ============================================================ .section "zone", "acrx" pow10_table: .byte 127, 0x00, 0x00, 0x00; 10^0 = 1 .byte 130, 0x20, 0x00, 0x00; 10^1 = 10 .byte 133, 0x48, 0x00, 0x00; 10^2 = 100 .byte 136, 0x7A, 0x00, 0x00; 10^3 = 1000 .byte 140, 0x1C, 0x40, 0x00; 10^4 = 10000 .byte 143, 0x43, 0x50, 0x00; 10^5 = 100000 .byte 146, 0x74, 0x24, 0x00; 10^6 = 1000000