; SPDX-License-Identifier: MPL-2.0 ; SPDX-FileCopyrightText: (c) 2025 A.M. Rowsell ; ============================================================ ; Z80 Soft Float Library (4-byte) + Print + Parse (vasm syntax) ; ============================================================ ; Float format in memory (big-endian, 4 bytes): ; byte0: EXP (8-bit biased exponent, 0 = zero) ; byte1: S|F22..F16 (bit7 = sign, bits6..0 = top 7 fraction bits) ; byte2: F15..F8 ; byte3: F7..F0 ; ; For EXP != 0: ; value = (-1)^S * (1.F) * 2^(EXP - FP_BIAS) ; FP_BIAS = 127 ; ; Calling convention (in-place ops): ; HL -> A (4 bytes) ; DE -> B (4 bytes) ; fp_add: A = A + B (stored back at HL) ; fp_sub: A = A - B ; fp_mul: A = A * B ; fp_div: A = A / B ; ; Extra: ; fp_print: print float at (HL) using external os_print_vec (A=ASCII) ; fp_parse: parse null-terminated string at (DE) into float at (HL) ; ; Limitations: ; - No NaN/Inf/denormals ; - Truncation (no rounding) ; - fp_print prints fixed decimals with a lightweight fraction path ; - fp_parse supports optional +/- and '.' up to MAX_FRAC digits, no exponent notation ; ============================================================ .equ FP_BIAS,127 .equ FRAC_DIGITS,6 .equ MAX_FRAC,6 .extern os_print_vec ; ============================================================ ; CODE ; ============================================================ .section "zone","acrx" ; ------------------------------------------------------------ ; External routine you provide: ; os_print_vec: prints ASCII character in A ; ------------------------------------------------------------ ; os_print_vec is external, not defined here. ; ============================================================ ; Public API: fp_add / fp_sub / fp_mul / fp_div ; ============================================================ ; ------------------------------------------------------------ ; fp_add: A = A + B ; ------------------------------------------------------------ fp_add: push hl push de call fp_unpackA pop de call fp_unpackB pop hl ; zero short-cuts ld a,(A_exp) or a jr nz,fp_add_checkB ; A==0 => result=B call fp_pack_from_B_into_A ret fp_add_checkB: ld a,(B_exp) or a ret z ; if signs same -> magnitude add ld a,(A_sign) ld b,a ld a,(B_sign) xor b jp z,fp_add_same_sign ; signs differ -> magnitude subtract jp fp_add_diff_sign ; ------------------------------------------------------------ ; fp_sub: A = A - B (flip B sign in memory, add, flip back) ; ------------------------------------------------------------ fp_sub: ; Flip sign bit of B byte1 (DE+1) push hl push de inc de ld a,(de) xor 0x80 ld (de),a pop de pop hl call fp_add ; Flip sign bit back push hl push de inc de ld a,(de) xor 0x80 ld (de),a pop de pop hl ret ; ------------------------------------------------------------ ; fp_mul: A = A * B ; ------------------------------------------------------------ fp_mul: push hl push de call fp_unpackA pop de call fp_unpackB pop hl ; if A==0 or B==0 => 0 ld a,(A_exp) or a jp z,fp_store_zero_A ld a,(B_exp) or a jp z,fp_store_zero_A ; sign = A_sign XOR B_sign ld a,(A_sign) ld b,a ld a,(B_sign) xor b ld (A_sign),a ; exponent = A_exp + B_exp - BIAS ld a,(A_exp) ld b,a ld a,(B_exp) add a,b sub FP_BIAS ld (A_exp),a ; product = A_mant * B_mant (24x24 => 48) call mul24x24_schoolbook ; normalize product into A mantissa call norm_product_to_A ; pack back into (HL) call fp_packA ret ; ------------------------------------------------------------ ; fp_div: A = A / B ; ------------------------------------------------------------ fp_div: push hl push de call fp_unpackA pop de call fp_unpackB pop hl ; A==0 => 0 ld a,(A_exp) or a jp z,fp_store_zero_A ; B==0 => return 0 (simple “error” behavior) ld a,(B_exp) or a jp z,fp_store_zero_A ; sign = A_sign XOR B_sign ld a,(A_sign) ld b,a ld a,(B_sign) xor b ld (A_sign),a ; exponent = A_exp - B_exp + BIAS ld a,(A_exp) ld b,a ld a,(B_exp) ld c,a ld a,b sub c add a,FP_BIAS ld (A_exp),a ; mantissa division call div_mantissas_to_A call normalize_A_mant call fp_packA ret ; ============================================================ ; Add/Sub core (unpacked) ; ============================================================ fp_add_same_sign: call align_exponents_A_B call add24_A_plus_B ; if carry: shift right, exponent++ jr nc,fp_add_same_sign_noCarry call shr24_A_1 ld a,(A_exp) inc a ld (A_exp),a fp_add_same_sign_noCarry: call normalize_A_mant call fp_packA ret fp_add_diff_sign: ; compare |A| vs |B|, do larger - smaller, sign = sign(larger) call compare_mag_A_B jr c,fp_add_diff_sign_A_ge_B ; |B| > |A| => swap call swap_A_B_unpacked fp_add_diff_sign_A_ge_B: call align_exponents_A_B call sub24_A_minus_B call is_A_mant_zero jp z,fp_store_zero_A call normalize_A_mant call fp_packA ret ; ============================================================ ; Unpack / Pack helpers ; ============================================================ ; Unpack A from (HL) fp_unpackA: ld a,(hl) ld (A_exp),a or a jp z,fp_unpackA_zeroA inc hl ld a,(hl) ld b,a ; sign bit -> A_sign (0/1) and 0x80 jp z,fp_unpackA_sa0 ld a,1 jr fp_unpackA_sa1 fp_unpackA_sa0: xor a fp_unpackA_sa1: ld (A_sign),a ; mantissa bytes with hidden 1 inserted ld a,b and 0x7F or 0x80 ld (A_m2),a inc hl ld a,(hl) ld (A_m1),a inc hl ld a,(hl) ld (A_m0),a ret fp_unpackA_zeroA: xor a ld (A_sign),a ld (A_m2),a ld (A_m1),a ld (A_m0),a ret ; Unpack B from (DE) fp_unpackB: ld a,(de) ld (B_exp),a or a jp z,fp_unpackB_zeroB inc de ld a,(de) ld b,a and 0x80 jp z,fp_unpackB_sb0 ld a,1 jr fp_unpackB_sb1 fp_unpackB_sb0: xor a fp_unpackB_sb1: ld (B_sign),a ld a,b and 0x7F or 0x80 ld (B_m2),a inc de ld a,(de) ld (B_m1),a inc de ld a,(de) ld (B_m0),a ret fp_unpackB_zeroB: xor a ld (B_sign),a ld (B_m2),a ld (B_m1),a ld (B_m0),a ret ; Pack unpacked A back into memory at (HL) fp_packA: ld a,(A_exp) or a jr nz,fp_packA_packNZ ; store 0 ld (hl),0 inc hl ld (hl),0 inc hl ld (hl),0 inc hl ld (hl),0 ret fp_packA_packNZ: ld a,(A_exp) ld (hl),a inc hl ; remove hidden 1 ld a,(A_m2) and 0x7F ld b,a ; apply sign bit7 ld a,(A_sign) or a jp z,fp_packA_sign0 ld a,b or 0x80 jr fp_packA_storeB1 fp_packA_sign0: ld a,b fp_packA_storeB1: ld (hl),a inc hl ld a,(A_m1) ld (hl),a inc hl ld a,(A_m0) ld (hl),a ret ; Pack from unpacked B into memory A (HL points to A destination) fp_pack_from_B_into_A: ld a,(B_exp) ld (hl),a inc hl ld a,(B_m2) and 0x7F ld b,a ld a,(B_sign) or a jp z,fp_pack_from_B_bs0 ld a,b or 0x80 jr fp_pack_from_B_bs1 fp_pack_from_B_bs0: ld a,b fp_pack_from_B_bs1: ld (hl),a inc hl ld a,(B_m1) ld (hl),a inc hl ld a,(B_m0) ld (hl),a ret fp_store_zero_A: xor a ld (A_exp),a ld (A_sign),a ld (A_m2),a ld (A_m1),a ld (A_m0),a jp fp_packA ; ============================================================ ; Exponent alignment / compare / swap ; ============================================================ ; Ensure A_exp >= B_exp; shift smaller mantissa right by diff align_exponents_A_B: ld a,(A_exp) ld b,a ld a,(B_exp) cp b jr z,align_exponents_A_B_done jr c,align_exponents_A_B_bigger_exp ; B_exp < A_exp call swap_A_B_unpacked ; make A the larger exponent align_exponents_A_B_bigger_exp: ld a,(A_exp) ld b,a ld a,(B_exp) ld c,a ld a,b sub c ; A = diff call shr24_B_by_A ld a,(A_exp) ld (B_exp),a align_exponents_A_B_done: ret ; Carry set if |A| >= |B|, else carry clear compare_mag_A_B: ld a,(A_exp) ld b,a ld a,(B_exp) cp b jr z,compare_mag_A_B_cmpMant jr c,compare_mag_A_B_ge or a ret compare_mag_A_B_ge: scf ret compare_mag_A_B_cmpMant: ld a,(A_m2) ld b,a ld a,(B_m2) cp b jr z,compare_mag_A_B_m1 jr c,compare_mag_A_B_ge2 or a ret compare_mag_A_B_ge2: scf ret compare_mag_A_B_m1: ld a,(A_m1) ld b,a ld a,(B_m1) cp b jr z,compare_mag_A_B_m0 jr c,compare_mag_A_B_ge3 or a ret compare_mag_A_B_ge3: scf ret compare_mag_A_B_m0: ld a,(A_m0) ld b,a ld a,(B_m0) cp b jr c,compare_mag_A_B_ge4 scf ret compare_mag_A_B_ge4: scf ret swap_A_B_unpacked: ld a,(A_exp) ld b,a ld a,(B_exp) ld (A_exp),a ld a,b ld (B_exp),a ld a,(A_sign) ld b,a ld a,(B_sign) ld (A_sign),a ld a,b ld (B_sign),a ld a,(A_m2) ld b,a ld a,(B_m2) ld (A_m2),a ld a,b ld (B_m2),a ld a,(A_m1) ld b,a ld a,(B_m1) ld (A_m1),a ld a,b ld (B_m1),a ld a,(A_m0) ld b,a ld a,(B_m0) ld (A_m0),a ld a,b ld (B_m0),a ret ; ============================================================ ; 24-bit mantissa ops ; ============================================================ add24_A_plus_B: ld a,(B_m0) ld b,a ld a,(A_m0) add a,b ld (A_m0),a ld a,(B_m1) ld b,a ld a,(A_m1) adc a,b ld (A_m1),a ld a,(B_m2) ld b,a ld a,(A_m2) adc a,b ld (A_m2),a ret ; carry meaningful sub24_A_minus_B: ld a,(B_m0) ld b,a ld a,(A_m0) sub b ld (A_m0),a ld a,(B_m1) ld b,a ld a,(A_m1) sbc a,b ld (A_m1),a ld a,(B_m2) ld b,a ld a,(A_m2) sbc a,b ld (A_m2),a ret is_A_mant_zero: ld a,(A_m2) ld b,a ld a,(A_m1) or b ld b,a ld a,(A_m0) or b ret shr24_A_1: ld a,(A_m2) srl a ld (A_m2),a ld a,(A_m1) rr a ld (A_m1),a ld a,(A_m0) rr a ld (A_m0),a ret shl24_A_1: ld a,(A_m0) add a,a ld (A_m0),a ld a,(A_m1) adc a,a ld (A_m1),a ld a,(A_m2) adc a,a ld (A_m2),a ret ; Shift B mantissa right by A bits (A=0..255) shr24_B_by_A: ld (SHCNT),a ld a,(SHCNT) cp 24 jr c,shr24_B_by_A_ok xor a ld (B_m2),a ld (B_m1),a ld (B_m0),a ret shr24_B_by_A_ok: ld a,(SHCNT) or a ret z shr24_B_by_A_loop: ld a,(B_m2) srl a ld (B_m2),a ld a,(B_m1) rr a ld (B_m1),a ld a,(B_m0) rr a ld (B_m0),a ld a,(SHCNT) dec a ld (SHCNT),a jr nz,shr24_B_by_A_loop ret normalize_A_mant: call is_A_mant_zero jr nz,normalize_A_mant_nz xor a ld (A_exp),a ret normalize_A_mant_nz: ld a,(A_m2) bit 7,a ret nz normalize_A_mant_left_loop: ld a,(A_m0) add a,a ld (A_m0),a ld a,(A_m1) adc a,a ld (A_m1),a ld a,(A_m2) adc a,a ld (A_m2),a ld a,(A_exp) dec a ld (A_exp),a ld a,(A_m2) bit 7,a jr z,normalize_A_mant_left_loop ret ; ============================================================ ; 8x8 -> 16 multiply (unsigned), shift-add ; in: A = multiplicand, C = multiplier ; out: HL = 16-bit product ; ============================================================ mul8u: ld h,0 ld l,0 ld b,8 mul8u_m8: srl c jr nc,mul8u_noadd ld e,a ld d,0 add hl,de mul8u_noadd: add a,a djnz mul8u_m8 ret ; ============================================================ ; 24x24 schoolbook multiply into P0..P5 (P0 LSB) ; ============================================================ mul24x24_schoolbook: xor a ld (P0),a ld (P1),a ld (P2),a ld (P3),a ld (P4),a ld (P5),a ; (0,0) offset 0 ld a,(B_m0) ld c,a ld a,(A_m0) call mul8u call add16_to_P_at0 ; (0,1) offset 1 ld a,(B_m1) ld c,a ld a,(A_m0) call mul8u call add16_to_P_at1 ; (0,2) offset 2 ld a,(B_m2) ld c,a ld a,(A_m0) call mul8u call add16_to_P_at2 ; (1,0) offset 1 ld a,(B_m0) ld c,a ld a,(A_m1) call mul8u call add16_to_P_at1 ; (1,1) offset 2 ld a,(B_m1) ld c,a ld a,(A_m1) call mul8u call add16_to_P_at2 ; (1,2) offset 3 ld a,(B_m2) ld c,a ld a,(A_m1) call mul8u call add16_to_P_at3 ; (2,0) offset 2 ld a,(B_m0) ld c,a ld a,(A_m2) call mul8u call add16_to_P_at2 ; (2,1) offset 3 ld a,(B_m1) ld c,a ld a,(A_m2) call mul8u call add16_to_P_at3 ; (2,2) offset 4 ld a,(B_m2) ld c,a ld a,(A_m2) call mul8u call add16_to_P_at4 ret add16_to_P_at0: ld a,(P0) add a,l ld (P0),a ld a,(P1) adc a,h ld (P1),a ret add16_to_P_at1: ld a,(P1) add a,l ld (P1),a ld a,(P2) adc a,h ld (P2),a ret add16_to_P_at2: ld a,(P2) add a,l ld (P2),a ld a,(P3) adc a,h ld (P3),a ret add16_to_P_at3: ld a,(P3) add a,l ld (P3),a ld a,(P4) adc a,h ld (P4),a ret add16_to_P_at4: ld a,(P4) add a,l ld (P4),a ld a,(P5) adc a,h ld (P5),a ret ; ============================================================ ; Normalize product P into A mantissa ; P is 48-bit, P0 LSB .. P5 MSB ; ============================================================ norm_product_to_A: ld a,(P5) bit 7,a jr z,norm_product_shift23 ld a,24 call shr48_P_by_A ld a,(A_exp) inc a ld (A_exp),a jr norm_product_take norm_product_shift23: ld a,23 call shr48_P_by_A norm_product_take: ld a,(P2) ld (A_m2),a ld a,(P1) ld (A_m1),a ld a,(P0) ld (A_m0),a ret shr48_P_by_A: ld (SHCNT),a ld a,(SHCNT) or a ret z shr48_P_by_A_loop: ld a,(P5) srl a ld (P5),a ld a,(P4) rr a ld (P4),a ld a,(P3) rr a ld (P3),a ld a,(P2) rr a ld (P2),a ld a,(P1) rr a ld (P1),a ld a,(P0) rr a ld (P0),a ld a,(SHCNT) dec a ld (SHCNT),a jr nz,shr48_P_by_A_loop ret ; ============================================================ ; Mantissa division (restoring-style) ; A_m = (A_m << 23) / B_m ; ============================================================ div_mantissas_to_A: ; P = A_m as 48-bit, then shift left 23 xor a ld (P3),a ld (P4),a ld (P5),a ld a,(A_m0) ld (P0),a ld a,(A_m1) ld (P1),a ld a,(A_m2) ld (P2),a ld a,23 call shl48_P_by_A ; clear quotient xor a ld (A_m2),a ld (A_m1),a ld (A_m0),a ld b,24 div_mantissas_loop: call shl24_A_1 call shl48_P_1 ; subtract divisor from high 24 bits of P (P5..P3) call sub24_Phigh_minus_B jr c,div_mantissas_restore ; success => set quotient LSB = 1 ld a,(A_m0) or 0x1 ld (A_m0),a jr div_mantissas_next div_mantissas_restore: call add24_Phigh_plus_B div_mantissas_next: djnz div_mantissas_loop ret shl48_P_by_A: ld (SHCNT),a ld a,(SHCNT) or a ret z shl48_P_by_A_loop: call shl48_P_1 ld a,(SHCNT) dec a ld (SHCNT),a jr nz,shl48_P_by_A_loop ret shl48_P_1: ld a,(P0) add a,a ld (P0),a ld a,(P1) adc a,a ld (P1),a ld a,(P2) adc a,a ld (P2),a ld a,(P3) adc a,a ld (P3),a ld a,(P4) adc a,a ld (P4),a ld a,(P5) adc a,a ld (P5),a ret sub24_Phigh_minus_B: ld a,(B_m0) ld b,a ld a,(P3) sub b ld (P3),a ld a,(B_m1) ld b,a ld a,(P4) sbc a,b ld (P4),a ld a,(B_m2) ld b,a ld a,(P5) sbc a,b ld (P5),a ret ; carry set indicates borrow add24_Phigh_plus_B: ld a,(B_m0) ld b,a ld a,(P3) add a,b ld (P3),a ld a,(B_m1) ld b,a ld a,(P4) adc a,b ld (P4),a ld a,(B_m2) ld b,a ld a,(P5) adc a,b ld (P5),a ret ; ============================================================ ; fp_print: fixed format printing ; Prints: [-]I.FFFFFF (FRAC_DIGITS digits) ; Uses os_print_vec (A=char) ; ============================================================ fp_print: ; zero? ld a,(hl) or a jr nz,fp_print_nz ld a,'0' call os_print_vec ld a,'.' call os_print_vec ld b,FRAC_DIGITS fp_print_zf: ld a,'0' call os_print_vec djnz fp_print_zf ret fp_print_nz: ; EXP -> PR_E (unbiased) ld a,(hl) sub FP_BIAS ld (PR_E),a inc hl ; sign + top fraction ld a,(hl) ld b,a and 0x80 jp z,fp_print_ps0 ld a,1 jr fp_print_ps1 fp_print_ps0: xor a fp_print_ps1: ld (PR_SIGN),a ; mantissa with hidden 1 inserted ld a,b and 0x7F or 0x80 ld (PR_M2),a inc hl ld a,(hl) ld (PR_M1),a inc hl ld a,(hl) ld (PR_M0),a ; print '-' ld a,(PR_SIGN) or a jp z,fp_print_mag ld a,'-' call os_print_vec fp_print_mag: ; S = (E - 23) ld a,(PR_E) sub 23 ; clear int and remainder helpers xor a ld (PR_INT0),a ld (PR_INT1),a ld (PR_INT2),a ld (PR_INT3),a ld (PR_R3),a bit 7,a jp z,fp_print_S_nonneg ; S negative: INT = [M2][M1][M0][00] (i.e., M << 8), then shift right by -S neg ld b,a ; B = shift count xor a ld (PR_INT0),a ld a,(PR_M0) ld (PR_INT1),a ld a,(PR_M1) ld (PR_INT2),a ld a,(PR_M2) ld (PR_INT3),a call shr32_INT_to_INT_with_remainder jr fp_print_print_int_and_frac fp_print_S_nonneg: ; S non-negative: INT = M (24-bit) then shift left S (cap at 31) cp 32 jr c,fp_print_doShl ld a,31 fp_print_doShl: ld b,a ld a,(PR_M0) ld (PR_INT0),a ld a,(PR_M1) ld (PR_INT1),a ld a,(PR_M2) ld (PR_INT2),a xor a ld (PR_INT3),a call shl32_INT_by_B fp_print_print_int_and_frac: call print_u32_dec ld a,'.' call os_print_vec ld b,FRAC_DIGITS fp_print_fr: call mul_remainder_by_10 ld a,(PR_R3) add a,'0' call os_print_vec xor a ld (PR_R3),a djnz fp_print_fr ret ; Shift-right PR_INT by B, collect shifted-out bits into PR_R3 (simplified) shr32_INT_to_INT_with_remainder: xor a ld (PR_R3),a ld a,b or a ret z shr32_INT_to_INT_with_remainder_loop: ld a,(PR_INT3) srl a ld (PR_INT3),a ld a,(PR_INT2) rr a ld (PR_INT2),a ld a,(PR_INT1) rr a ld (PR_INT1),a ld a,(PR_INT0) rr a ld (PR_INT0),a ; carry has shifted-out bit; accumulate into PR_R3 ld a,(PR_R3) add a,a adc a,0 ld (PR_R3),a djnz shr32_INT_to_INT_with_remainder_loop ret shl32_INT_by_B: ld a,b or a ret z shl32_INT_by_B_loop: ld a,(PR_INT0) add a,a ld (PR_INT0),a ld a,(PR_INT1) adc a,a ld (PR_INT1),a ld a,(PR_INT2) adc a,a ld (PR_INT2),a ld a,(PR_INT3) adc a,a ld (PR_INT3),a djnz shl32_INT_by_B_loop ret mul_remainder_by_10: ld a,(PR_R3) ld b,a add a,a ; *2 add a,a ; *4 add a,a ; *8 add a,b ; *9 add a,b ; *10 ld (PR_R3),a ret ; Print PR_INT (u32) as decimal print_u32_dec: ld a,(PR_INT0) ld b,a ld a,(PR_INT1) or b ld b,a ld a,(PR_INT2) or b ld b,a ld a,(PR_INT3) or b jr nz,print_u32_dec_nz ld a,'0' call os_print_vec ret print_u32_dec_nz: xor a ld (DIGLEN),a print_u32_dec_dloop: call u32_div10_inplace ; remainder in A, quotient back in PR_INT ld hl,DIGBUF ld b,0 ld a,(DIGLEN) ld c,a add hl,bc add a,'0' ld (hl),a ld a,(DIGLEN) inc a ld (DIGLEN),a ld a,(PR_INT0) ld b,a ld a,(PR_INT1) or b ld b,a ld a,(PR_INT2) or b ld b,a ld a,(PR_INT3) or b jr nz,print_u32_dec_dloop ; print in reverse ld a,(DIGLEN) ld b,a print_u32_dec_pr: dec b ld hl,DIGBUF ld c,b ld b,0 add hl,bc ld a,(hl) call os_print_vec ld a,c or a jr nz,print_u32_dec_pr ret ; Divide PR_INT (u32) by 10, return remainder in A (0..9) u32_div10_inplace: ld b,0 ; remainder ld hl,PR_INT3 call u32_div10_step inc hl call u32_div10_step inc hl call u32_div10_step inc hl call u32_div10_step ld a,b ret u32_div10_step: ; DE = remainder*256 + byte ld a,b ld d,a ld e,(hl) ld c,0 ; quotient byte u32_div10_div: ld a,d or a jr nz,u32_div10_sub ld a,e cp 10 jr c,u32_div10_done u32_div10_sub: ld a,e sub 10 ld e,a ld a,d sbc a,0 ld d,a inc c jr u32_div10_div u32_div10_done: ld (hl),c ld b,e ret ; ============================================================ ; fp_parse: parse decimal string -> float ; DE -> "[-]ddd[.ddd]\0" ; HL -> output float ; ============================================================ fp_parse: xor a ld (P_SIGN),a ld (P_FRACN),a ld (P_S0),a ld (P_S1),a ld (P_S2),a ld (P_S3),a ; optional sign ld a,(de) cp '-' jr nz,fp_parse_chkplus ld a,1 ld (P_SIGN),a inc de jr fp_parse_intpart fp_parse_chkplus: ld a,(de) cp '+' jr nz,fp_parse_intpart inc de fp_parse_intpart: ld a,(de) call is_digit jr nc,fp_parse_maybe_dot fp_parse_il: ld a,(de) sub '0' ld c,a call u32_mul10_scaled call u32_add8_scaled inc de ld a,(de) call is_digit jr c,fp_parse_il fp_parse_maybe_dot: ld a,(de) cp '.' jr nz,fp_parse_finish_scaled inc de ld b,MAX_FRAC fp_parse_fl: ld a,(de) call is_digit jr nc,fp_parse_finish_scaled ld a,(de) sub '0' ld c,a call u32_mul10_scaled call u32_add8_scaled ld a,(P_FRACN) inc a ld (P_FRACN),a inc de djnz fp_parse_fl fp_parse_finish_scaled: ; convert scaled u32 to float into (HL) call fp_from_u32_scaled_to_A ; divide by 10^k if needed ld a,(P_FRACN) or a jp z,fp_parse_apply_sign ; DE = &pow10_table[k] push hl ld e,a ld d,0 ld hl,pow10_table add hl,de add hl,de add hl,de add hl,de ex de,hl pop hl call fp_div fp_parse_apply_sign: ld a,(P_SIGN) or a ret z inc hl ld a,(hl) xor 0x80 ld (hl),a ret is_digit: cp '0' jr c,is_digit_no cp '9'+1 jr nc,is_digit_no scf ret is_digit_no: or a ret ; P_S = P_S*10 (uses PR_INT and PR_R0..3 as scratch) u32_mul10_scaled: ; PR_INT = P ld a,(P_S0) ld (PR_INT0),a ld a,(P_S1) ld (PR_INT1),a ld a,(P_S2) ld (PR_INT2),a ld a,(P_S3) ld (PR_INT3),a ; PR_INT *=2 ld b,1 call shl32_INT_by_B ; PR_R = P ld a,(P_S0) ld (PR_R0),a ld a,(P_S1) ld (PR_R1),a ld a,(P_S2) ld (PR_R2),a ld a,(P_S3) ld (PR_R3),a ; PR_R *=8 (shift left 3) ld b,3 call shl32_R_by_B ; P = PR_INT + PR_R ld a,(PR_R0) ld b,a ld a,(PR_INT0) add a,b ld (P_S0),a ld a,(PR_R1) ld b,a ld a,(PR_INT1) adc a,b ld (P_S1),a ld a,(PR_R2) ld b,a ld a,(PR_INT2) adc a,b ld (P_S2),a ld a,(PR_R3) ld b,a ld a,(PR_INT3) adc a,b ld (P_S3),a ret shl32_R_by_B: ld a,b or a ret z shl32_R_by_B_loop: ld a,(PR_R0) add a,a ld (PR_R0),a ld a,(PR_R1) adc a,a ld (PR_R1),a ld a,(PR_R2) adc a,a ld (PR_R2),a ld a,(PR_R3) adc a,a ld (PR_R3),a djnz shl32_R_by_B_loop ret ; P_S += C (0..9) u32_add8_scaled: ld a,(P_S0) add a,c ld (P_S0),a ld a,(P_S1) adc a,0 ld (P_S1),a ld a,(P_S2) adc a,0 ld (P_S2),a ld a,(P_S3) adc a,0 ld (P_S3),a ret ; Convert P_S (u32) to float at (HL). Positive only; sign handled by caller. fp_from_u32_scaled_to_A: ld a,(P_S0) ld b,a ld a,(P_S1) or b ld b,a ld a,(P_S2) or b ld b,a ld a,(P_S3) or b jr nz,fp_from_u32_scaled_to_A_nz ld (hl),0 inc hl ld (hl),0 inc hl ld (hl),0 inc hl ld (hl),0 ret fp_from_u32_scaled_to_A_nz: ; find MSB index in B (0..31) ld b,31 ld a,(P_S3) ld c,a or a jr nz,fp_from_u32_scaled_to_A_scan ld b,23 ld a,(P_S2) ld c,a or a jr nz,fp_from_u32_scaled_to_A_scan ld b,15 ld a,(P_S1) ld c,a or a jr nz,fp_from_u32_scaled_to_A_scan ld b,7 ld a,(P_S0) ld c,a fp_from_u32_scaled_to_A_scan: fp_from_u32_scaled_to_A_find: bit 7,c jr nz,fp_from_u32_scaled_to_A_found ld a,c add a,a ld c,a dec b jr fp_from_u32_scaled_to_A_find fp_from_u32_scaled_to_A_found: ; EXP = FP_BIAS + B ld a,b add a,FP_BIAS ld (hl),a inc hl ; shift value left by (23-B), take top 24 bits ld a,23 sub b ld b,a ; PR_INT = P_S ld a,(P_S0) ld (PR_INT0),a ld a,(P_S1) ld (PR_INT1),a ld a,(P_S2) ld (PR_INT2),a ld a,(P_S3) ld (PR_INT3),a call shl32_INT_by_B ; store sign=0, fraction = top 23 bits of mantissa (hidden 1 removed) ld a,(PR_INT3) and 0x7F ld (hl),a inc hl ld a,(PR_INT2) ld (hl),a inc hl ld a,(PR_INT1) ld (hl),a ret ; ============================================================ ; BSS / WORKSPACE ; ============================================================ .balign 16 .bss ; Unpacked A A_exp: .space 1 A_sign: .space 1 A_m2: .space 1 A_m1: .space 1 A_m0: .space 1 ; Unpacked B B_exp: .space 1 B_sign: .space 1 B_m2: .space 1 B_m1: .space 1 B_m0: .space 1 ; 48-bit workspace (P0 LSB .. P5 MSB) P0: .space 1 P1: .space 1 P2: .space 1 P3: .space 1 P4: .space 1 P5: .space 1 SHCNT: .space 1 ; Print temps PR_SIGN: .space 1 PR_E: .space 1 PR_M2: .space 1 PR_M1: .space 1 PR_M0: .space 1 PR_INT0: .space 1 PR_INT1: .space 1 PR_INT2: .space 1 PR_INT3: .space 1 PR_R0: .space 1 PR_R1: .space 1 PR_R2: .space 1 PR_R3: .space 1 ; Parse temps P_SIGN: .space 1 P_FRACN: .space 1 P_S0: .space 1 P_S1: .space 1 P_S2: .space 1 P_S3: .space 1 ; Digit buffer DIGBUF: .space 1 DIGLEN: .space 1 ; ============================================================ ; pow10_table: 10^k constants (k=0..6) in THIS float encoding ; Verified: ; 1.0 = 127 00 00 00 ; 10.0 = 130 20 00 00 ; 100.0 = 133 48 00 00 ; 1000.0 = 136 7A 00 00 ; 10000.0 = 140 1C 40 00 ; 100000.0 = 143 43 50 00 ; 1000000.0= 146 74 24 00 ; ============================================================ .section "zone","acrx" pow10_table: .byte 127, 0x00, 0x00, 0x00 ; 10^0 = 1 .byte 130, 0x20, 0x00, 0x00 ; 10^1 = 10 .byte 133, 0x48, 0x00, 0x00 ; 10^2 = 100 .byte 136, 0x7A, 0x00, 0x00 ; 10^3 = 1000 .byte 140, 0x1C, 0x40, 0x00 ; 10^4 = 10000 .byte 143, 0x43, 0x50, 0x00 ; 10^5 = 100000 .byte 146, 0x74, 0x24, 0x00 ; 10^6 = 1000000