From 1353091ddad5170ae74b34d01f71150366ce4310 Mon Sep 17 00:00:00 2001 From: "A.M. Rowsell" Date: Sat, 20 Dec 2025 14:40:01 -0500 Subject: [PATCH] init: creation of initial repo for this project --- float.asm | 1545 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1545 insertions(+) create mode 100644 float.asm diff --git a/float.asm b/float.asm new file mode 100644 index 0000000..67d71a4 --- /dev/null +++ b/float.asm @@ -0,0 +1,1545 @@ +; ============================================================ +; Z80 Soft Float Library (4-byte) + Print + Parse (vasm syntax) +; ============================================================ +; Float format in memory (big-endian, 4 bytes): +; byte0: EXP (8-bit biased exponent, 0 = zero) +; byte1: S|F22..F16 (bit7 = sign, bits6..0 = top 7 fraction bits) +; byte2: F15..F8 +; byte3: F7..F0 +; +; For EXP != 0: +; value = (-1)^S * (1.F) * 2^(EXP - FP_BIAS) +; FP_BIAS = 127 +; +; Calling convention (in-place ops): +; HL -> A (4 bytes) +; DE -> B (4 bytes) +; fp_add: A = A + B (stored back at HL) +; fp_sub: A = A - B +; fp_mul: A = A * B +; fp_div: A = A / B +; +; Extra: +; fp_print: print float at (HL) using external printChar (A=ASCII) +; fp_parse: parse null-terminated string at (DE) into float at (HL) +; +; Limitations: +; - No NaN/Inf/denormals +; - Truncation (no rounding) +; - fp_print prints fixed decimals with a lightweight fraction path +; - fp_parse supports optional +/- and '.' up to MAX_FRAC digits, no exponent notation +; ============================================================ + +.equ FP_BIAS,127 +.equ FRAC_DIGITS,6 +.equ MAX_FRAC,6 + +.global printChar +; ============================================================ +; CODE +; ============================================================ +.text + +; ------------------------------------------------------------ +; External routine you provide: +; printChar: prints ASCII character in A +; ------------------------------------------------------------ +; printChar is external, not defined here. + +; ============================================================ +; Public API: fp_add / fp_sub / fp_mul / fp_div +; ============================================================ + +; ------------------------------------------------------------ +; fp_add: A = A + B +; ------------------------------------------------------------ +fp_add: + push hl + push de + call fp_unpackA + pop de + call fp_unpackB + pop hl + + ; zero short-cuts + ld a,(A_exp) + or a + jr nz,.checkB + ; A==0 => result=B + call fp_pack_from_B_into_A + ret +.checkB: + ld a,(B_exp) + or a + ret z + + ; if signs same -> magnitude add + ld a,(A_sign) + ld b,a + ld a,(B_sign) + xor b + jr z,fp_add_same_sign + + ; signs differ -> magnitude subtract + jp fp_add_diff_sign + + +; ------------------------------------------------------------ +; fp_sub: A = A - B (flip B sign in memory, add, flip back) +; ------------------------------------------------------------ +fp_sub: + ; Flip sign bit of B byte1 (DE+1) + push hl + push de + inc de + ld a,(de) + xor 080h + ld (de),a + pop de + pop hl + + call fp_add + + ; Flip sign bit back + push hl + push de + inc de + ld a,(de) + xor 080h + ld (de),a + pop de + pop hl + ret + + +; ------------------------------------------------------------ +; fp_mul: A = A * B +; ------------------------------------------------------------ +fp_mul: + push hl + push de + call fp_unpackA + pop de + call fp_unpackB + pop hl + + ; if A==0 or B==0 => 0 + ld a,(A_exp) + or a + jr z,fp_store_zero_A + ld a,(B_exp) + or a + jr z,fp_store_zero_A + + ; sign = A_sign XOR B_sign + ld a,(A_sign) + ld b,a + ld a,(B_sign) + xor b + ld (A_sign),a + + ; exponent = A_exp + B_exp - BIAS + ld a,(A_exp) + ld b,a + ld a,(B_exp) + add a,b + sub FP_BIAS + ld (A_exp),a + + ; product = A_mant * B_mant (24x24 => 48) + call mul24x24_schoolbook + + ; normalize product into A mantissa + call norm_product_to_A + + ; pack back into (HL) + call fp_packA + ret + + +; ------------------------------------------------------------ +; fp_div: A = A / B +; ------------------------------------------------------------ +fp_div: + push hl + push de + call fp_unpackA + pop de + call fp_unpackB + pop hl + + ; A==0 => 0 + ld a,(A_exp) + or a + jr z,fp_store_zero_A + + ; B==0 => return 0 (simple “error” behavior) + ld a,(B_exp) + or a + jr z,fp_store_zero_A + + ; sign = A_sign XOR B_sign + ld a,(A_sign) + ld b,a + ld a,(B_sign) + xor b + ld (A_sign),a + + ; exponent = A_exp - B_exp + BIAS + ld a,(A_exp) + ld b,a + ld a,(B_exp) + ld c,a + ld a,b + sub c + add a,FP_BIAS + ld (A_exp),a + + ; mantissa division + call div_mantissas_to_A + call normalize_A_mant + + call fp_packA + ret + + +; ============================================================ +; Add/Sub core (unpacked) +; ============================================================ + +fp_add_same_sign: + call align_exponents_A_B + call add24_A_plus_B + + ; if carry: shift right, exponent++ + jr nc,.noCarry + call shr24_A_1 + ld a,(A_exp) + inc a + ld (A_exp),a +.noCarry: + call normalize_A_mant + call fp_packA + ret + + +fp_add_diff_sign: + ; compare |A| vs |B|, do larger - smaller, sign = sign(larger) + call compare_mag_A_B + jr c,.A_ge_B + ; |B| > |A| => swap + call swap_A_B_unpacked +.A_ge_B: + call align_exponents_A_B + call sub24_A_minus_B + call is_A_mant_zero + jr z,fp_store_zero_A + call normalize_A_mant + call fp_packA + ret + + +; ============================================================ +; Unpack / Pack helpers +; ============================================================ + +; Unpack A from (HL) +fp_unpackA: + ld a,(hl) + ld (A_exp),a + or a + jr z,.zeroA + inc hl + ld a,(hl) + ld b,a + ; sign bit -> A_sign (0/1) + and 080h + jr z,.sa0 + ld a,1 + jr .sa1 +.sa0: + xor a +.sa1: + ld (A_sign),a + + ; mantissa bytes with hidden 1 inserted + ld a,b + and 07Fh + or 080h + ld (A_m2),a + inc hl + ld a,(hl) + ld (A_m1),a + inc hl + ld a,(hl) + ld (A_m0),a + ret +.zeroA: + xor a + ld (A_sign),a + ld (A_m2),a + ld (A_m1),a + ld (A_m0),a + ret + + +; Unpack B from (DE) +fp_unpackB: + ld a,(de) + ld (B_exp),a + or a + jr z,.zeroB + inc de + ld a,(de) + ld b,a + and 080h + jr z,.sb0 + ld a,1 + jr .sb1 +.sb0: + xor a +.sb1: + ld (B_sign),a + + ld a,b + and 07Fh + or 080h + ld (B_m2),a + inc de + ld a,(de) + ld (B_m1),a + inc de + ld a,(de) + ld (B_m0),a + ret +.zeroB: + xor a + ld (B_sign),a + ld (B_m2),a + ld (B_m1),a + ld (B_m0),a + ret + + +; Pack unpacked A back into memory at (HL) +fp_packA: + ld a,(A_exp) + or a + jr nz,.packNZ + ; store 0 + ld (hl),0 + inc hl + ld (hl),0 + inc hl + ld (hl),0 + inc hl + ld (hl),0 + ret + +.packNZ: + ld a,(A_exp) + ld (hl),a + inc hl + + ; remove hidden 1 + ld a,(A_m2) + and 07Fh + ld b,a + + ; apply sign bit7 + ld a,(A_sign) + or a + jr z,.sign0 + ld a,b + or 080h + jr .storeB1 +.sign0: + ld a,b +.storeB1: + ld (hl),a + inc hl + ld a,(A_m1) + ld (hl),a + inc hl + ld a,(A_m0) + ld (hl),a + ret + + +; Pack from unpacked B into memory A (HL points to A destination) +fp_pack_from_B_into_A: + ld a,(B_exp) + ld (hl),a + inc hl + ld a,(B_m2) + and 07Fh + ld b,a + ld a,(B_sign) + or a + jr z,.bs0 + ld a,b + or 080h + jr .bs1 +.bs0: + ld a,b +.bs1: + ld (hl),a + inc hl + ld a,(B_m1) + ld (hl),a + inc hl + ld a,(B_m0) + ld (hl),a + ret + + +fp_store_zero_A: + xor a + ld (A_exp),a + ld (A_sign),a + ld (A_m2),a + ld (A_m1),a + ld (A_m0),a + jp fp_packA + + +; ============================================================ +; Exponent alignment / compare / swap +; ============================================================ + +; Ensure A_exp >= B_exp; shift smaller mantissa right by diff +align_exponents_A_B: + ld a,(A_exp) + ld b,a + ld a,(B_exp) + cp b + jr z,.done + jr c,.A_bigger_exp ; B_exp < A_exp + call swap_A_B_unpacked ; make A the larger exponent +.A_bigger_exp: + ld a,(A_exp) + ld b,a + ld a,(B_exp) + ld c,a + ld a,b + sub c ; A = diff + call shr24_B_by_A + ld a,(A_exp) + ld (B_exp),a +.done: + ret + + +; Carry set if |A| >= |B|, else carry clear +compare_mag_A_B: + ld a,(A_exp) + ld b,a + ld a,(B_exp) + cp b + jr z,.cmpMant + jr c,.A_ge + or a + ret +.A_ge: + scf + ret +.cmpMant: + ld a,(A_m2) + ld b,a + ld a,(B_m2) + cp b + jr z,.m1 + jr c,.A_ge2 + or a + ret +.A_ge2: + scf + ret +.m1: + ld a,(A_m1) + ld b,a + ld a,(B_m1) + cp b + jr z,.m0 + jr c,.A_ge3 + or a + ret +.A_ge3: + scf + ret +.m0: + ld a,(A_m0) + ld b,a + ld a,(B_m0) + cp b + jr c,.A_ge4 + scf + ret +.A_ge4: + scf + ret + + +swap_A_B_unpacked: + ld a,(A_exp) + ld b,a + ld a,(B_exp) + ld (A_exp),a + ld a,b + ld (B_exp),a + ld a,(A_sign) + ld b,a + ld a,(B_sign) + ld (A_sign),a + ld a,b + ld (B_sign),a + ld a,(A_m2) + ld b,a + ld a,(B_m2) + ld (A_m2),a + ld a,b + ld (B_m2),a + ld a,(A_m1) + ld b,a + ld a,(B_m1) + ld (A_m1),a + ld a,b + ld (B_m1),a + ld a,(A_m0) + ld b,a + ld a,(B_m0) + ld (A_m0),a + ld a,b + ld (B_m0),a + ret + + +; ============================================================ +; 24-bit mantissa ops +; ============================================================ + +add24_A_plus_B: + ld a,(A_m0) + add a,(B_m0) + ld (A_m0),a + ld a,(A_m1) + adc a,(B_m1) + ld (A_m1),a + ld a,(A_m2) + adc a,(B_m2) + ld (A_m2),a + ret ; carry meaningful + + +sub24_A_minus_B: + ld a,(A_m0) + sub (B_m0) + ld (A_m0),a + ld a,(A_m1) + sbc a,(B_m1) + ld (A_m1),a + ld a,(A_m2) + sbc a,(B_m2) + ld (A_m2),a + ret + + +is_A_mant_zero: + ld a,(A_m2) + or (A_m1) + or (A_m0) + ret + + +shr24_A_1: + ld a,(A_m2) + srl a + ld (A_m2),a + ld a,(A_m1) + rr a + ld (A_m1),a + ld a,(A_m0) + rr a + ld (A_m0),a + ret + + +; Shift B mantissa right by A bits (A=0..255) +shr24_B_by_A: + ld (SHCNT),a + ld a,(SHCNT) + cp 24 + jr c,.ok + xor a + ld (B_m2),a + ld (B_m1),a + ld (B_m0),a + ret +.ok: + ld a,(SHCNT) + or a + ret z +.loop: + ld a,(B_m2) + srl a + ld (B_m2),a + ld a,(B_m1) + rr a + ld (B_m1),a + ld a,(B_m0) + rr a + ld (B_m0),a + ld a,(SHCNT) + dec a + ld (SHCNT),a + jr nz,.loop + ret + + +normalize_A_mant: + call is_A_mant_zero + jr nz,.nz + xor a + ld (A_exp),a + ret +.nz: + ld a,(A_m2) + bit 7,a + ret nz +.left_loop: + ld a,(A_m0) + add a,a + ld (A_m0),a + ld a,(A_m1) + adc a,a + ld (A_m1),a + ld a,(A_m2) + adc a,a + ld (A_m2),a + ld a,(A_exp) + dec a + ld (A_exp),a + ld a,(A_m2) + bit 7,a + jr z,.left_loop + ret + + +; ============================================================ +; 8x8 -> 16 multiply (unsigned), shift-add +; in: A = multiplicand, C = multiplier +; out: HL = 16-bit product +; ============================================================ +mul8u: + ld h,0 + ld l,0 + ld b,8 +.m8: + srl c + jr nc,.noadd + ld e,a + ld d,0 + add hl,de +.noadd: + add a,a + djnz .m8 + ret + + +; ============================================================ +; 24x24 schoolbook multiply into P0..P5 (P0 LSB) +; ============================================================ +mul24x24_schoolbook: + xor a + ld (P0),a + ld (P1),a + ld (P2),a + ld (P3),a + ld (P4),a + ld (P5),a + + ; (0,0) offset 0 + ld a,(A_m0) + ld c,(B_m0) + call mul8u + call add16_to_P_at0 + + ; (0,1) offset 1 + ld a,(A_m0) + ld c,(B_m1) + call mul8u + call add16_to_P_at1 + + ; (0,2) offset 2 + ld a,(A_m0) + ld c,(B_m2) + call mul8u + call add16_to_P_at2 + + ; (1,0) offset 1 + ld a,(A_m1) + ld c,(B_m0) + call mul8u + call add16_to_P_at1 + + ; (1,1) offset 2 + ld a,(A_m1) + ld c,(B_m1) + call mul8u + call add16_to_P_at2 + + ; (1,2) offset 3 + ld a,(A_m1) + ld c,(B_m2) + call mul8u + call add16_to_P_at3 + + ; (2,0) offset 2 + ld a,(A_m2) + ld c,(B_m0) + call mul8u + call add16_to_P_at2 + + ; (2,1) offset 3 + ld a,(A_m2) + ld c,(B_m1) + call mul8u + call add16_to_P_at3 + + ; (2,2) offset 4 + ld a,(A_m2) + ld c,(B_m2) + call mul8u + call add16_to_P_at4 + + ret + + +add16_to_P_at0: + ld a,(P0) + add a,l + ld (P0),a + ld a,(P1) + adc a,h + ld (P1),a + ret +add16_to_P_at1: + ld a,(P1) + add a,l + ld (P1),a + ld a,(P2) + adc a,h + ld (P2),a + ret +add16_to_P_at2: + ld a,(P2) + add a,l + ld (P2),a + ld a,(P3) + adc a,h + ld (P3),a + ret +add16_to_P_at3: + ld a,(P3) + add a,l + ld (P3),a + ld a,(P4) + adc a,h + ld (P4),a + ret +add16_to_P_at4: + ld a,(P4) + add a,l + ld (P4),a + ld a,(P5) + adc a,h + ld (P5),a + ret + + +; ============================================================ +; Normalize product P into A mantissa +; P is 48-bit, P0 LSB .. P5 MSB +; ============================================================ +norm_product_to_A: + ld a,(P5) + bit 7,a + jr z,.shift23 + ld a,24 + call shr48_P_by_A + ld a,(A_exp) + inc a + ld (A_exp),a + jr .take +.shift23: + ld a,23 + call shr48_P_by_A +.take: + ld a,(P2) + ld (A_m2),a + ld a,(P1) + ld (A_m1),a + ld a,(P0) + ld (A_m0),a + ret + + +shr48_P_by_A: + ld (SHCNT),a + ld a,(SHCNT) + or a + ret z +.loop: + ld a,(P5) + srl a + ld (P5),a + ld a,(P4) + rr a + ld (P4),a + ld a,(P3) + rr a + ld (P3),a + ld a,(P2) + rr a + ld (P2),a + ld a,(P1) + rr a + ld (P1),a + ld a,(P0) + rr a + ld (P0),a + ld a,(SHCNT) + dec a + ld (SHCNT),a + jr nz,.loop + ret + + +; ============================================================ +; Mantissa division (restoring-style) +; A_m = (A_m << 23) / B_m +; ============================================================ +div_mantissas_to_A: + ; P = A_m as 48-bit, then shift left 23 + xor a + ld (P3),a + ld (P4),a + ld (P5),a + ld a,(A_m0) + ld (P0),a + ld a,(A_m1) + ld (P1),a + ld a,(A_m2) + ld (P2),a + + ld a,23 + call shl48_P_by_A + + ; clear quotient + xor a + ld (A_m2),a + ld (A_m1),a + ld (A_m0),a + + ld b,24 +.div_loop: + call shl24_A_1 + call shl48_P_1 + + ; subtract divisor from high 24 bits of P (P5..P3) + call sub24_Phigh_minus_B + jr c,.restore + ; success => set quotient LSB = 1 + ld a,(A_m0) + or 001h + ld (A_m0),a + jr .next +.restore: + call add24_Phigh_plus_B +.next: + djnz .div_loop + ret + + +shl48_P_by_A: + ld (SHCNT),a + ld a,(SHCNT) + or a + ret z +.loop: + call shl48_P_1 + ld a,(SHCNT) + dec a + ld (SHCNT),a + jr nz,.loop + ret + + +shl48_P_1: + ld a,(P0) + add a,a + ld (P0),a + ld a,(P1) + adc a,a + ld (P1),a + ld a,(P2) + adc a,a + ld (P2),a + ld a,(P3) + adc a,a + ld (P3),a + ld a,(P4) + adc a,a + ld (P4),a + ld a,(P5) + adc a,a + ld (P5),a + ret + + +sub24_Phigh_minus_B: + ld a,(P3) + sub (B_m0) + ld (P3),a + ld a,(P4) + sbc a,(B_m1) + ld (P4),a + ld a,(P5) + sbc a,(B_m2) + ld (P5),a + ret ; carry set indicates borrow + + +add24_Phigh_plus_B: + ld a,(P3) + add a,(B_m0) + ld (P3),a + ld a,(P4) + adc a,(B_m1) + ld (P4),a + ld a,(P5) + adc a,(B_m2) + ld (P5),a + ret + + +; ============================================================ +; fp_print: fixed format printing +; Prints: [-]I.FFFFFF (FRAC_DIGITS digits) +; Uses printChar (A=char) +; ============================================================ +fp_print: + ; zero? + ld a,(hl) + or a + jr nz,.nz + ld a,'0' + call printChar + ld a,'.' + call printChar + ld b,FRAC_DIGITS +.zf: + ld a,'0' + call printChar + djnz .zf + ret + +.nz: + ; EXP -> PR_E (unbiased) + ld a,(hl) + sub FP_BIAS + ld (PR_E),a + inc hl + + ; sign + top fraction + ld a,(hl) + ld b,a + and 080h + jr z,.ps0 + ld a,1 + jr .ps1 +.ps0: + xor a +.ps1: + ld (PR_SIGN),a + + ; mantissa with hidden 1 inserted + ld a,b + and 07Fh + or 080h + ld (PR_M2),a + inc hl + ld a,(hl) + ld (PR_M1),a + inc hl + ld a,(hl) + ld (PR_M0),a + + ; print '-' + ld a,(PR_SIGN) + or a + jr z,.mag + ld a,'-' + call printChar +.mag: + ; S = (E - 23) + ld a,(PR_E) + sub 23 + + ; clear int and remainder helpers + xor a + ld (PR_INT0),a + ld (PR_INT1),a + ld (PR_INT2),a + ld (PR_INT3),a + ld (PR_R3),a + + bit 7,a + jr z,.S_nonneg + + ; S negative: INT = [M2][M1][M0][00] (i.e., M << 8), then shift right by -S + neg + ld b,a ; B = shift count + + xor a + ld (PR_INT0),a + ld a,(PR_M0) + ld (PR_INT1),a + ld a,(PR_M1) + ld (PR_INT2),a + ld a,(PR_M2) + ld (PR_INT3),a + + call shr32_INT_to_INT_with_remainder + jr .print_int_and_frac + +.S_nonneg: + ; S non-negative: INT = M (24-bit) then shift left S (cap at 31) + cp 32 + jr c,.doShl + ld a,31 +.doShl: + ld b,a + ld a,(PR_M0) + ld (PR_INT0),a + ld a,(PR_M1) + ld (PR_INT1),a + ld a,(PR_M2) + ld (PR_INT2),a + xor a + ld (PR_INT3),a + call shl32_INT_by_B + +.print_int_and_frac: + call print_u32_dec + ld a,'.' + call printChar + ld b,FRAC_DIGITS +.fr: + call mul_remainder_by_10 + ld a,(PR_R3) + add a,'0' + call printChar + xor a + ld (PR_R3),a + djnz .fr + ret + + +; Shift-right PR_INT by B, collect shifted-out bits into PR_R3 (simplified) +shr32_INT_to_INT_with_remainder: + xor a + ld (PR_R3),a + ld a,b + or a + ret z +.loop: + ld a,(PR_INT3) + srl a + ld (PR_INT3),a + ld a,(PR_INT2) + rr a + ld (PR_INT2),a + ld a,(PR_INT1) + rr a + ld (PR_INT1),a + ld a,(PR_INT0) + rr a + ld (PR_INT0),a + ; carry has shifted-out bit; accumulate into PR_R3 + ld a,(PR_R3) + add a,a + adc a,0 + ld (PR_R3),a + djnz .loop + ret + + +shl32_INT_by_B: + ld a,b + or a + ret z +.loop: + ld a,(PR_INT0) + add a,a + ld (PR_INT0),a + ld a,(PR_INT1) + adc a,a + ld (PR_INT1),a + ld a,(PR_INT2) + adc a,a + ld (PR_INT2),a + ld a,(PR_INT3) + adc a,a + ld (PR_INT3),a + djnz .loop + ret + + +mul_remainder_by_10: + ld a,(PR_R3) + ld b,a + add a,a ; *2 + add a,a ; *4 + add a,a ; *8 + add a,b ; *9 + add a,b ; *10 + ld (PR_R3),a + ret + + +; Print PR_INT (u32) as decimal +print_u32_dec: + ld a,(PR_INT0) + or (PR_INT1) + or (PR_INT2) + or (PR_INT3) + jr nz,.nz + ld a,'0' + call printChar + ret +.nz: + xor a + ld (DIGLEN),a +.dloop: + call u32_div10_inplace ; remainder in A, quotient back in PR_INT + ld hl,DIGBUF + ld b,0 + ld c,(DIGLEN) + add hl,bc + add a,'0' + ld (hl),a + ld a,(DIGLEN) + inc a + ld (DIGLEN),a + ld a,(PR_INT0) + or (PR_INT1) + or (PR_INT2) + or (PR_INT3) + jr nz,.dloop + + ; print in reverse + ld a,(DIGLEN) + ld b,a +.pr: + dec b + ld hl,DIGBUF + ld c,b + ld b,0 + add hl,bc + ld a,(hl) + call printChar + ld a,c + or a + jr nz,.pr + ret + + +; Divide PR_INT (u32) by 10, return remainder in A (0..9) +u32_div10_inplace: + ld b,0 ; remainder + ld hl,PR_INT3 + call .step + inc hl + call .step + inc hl + call .step + inc hl + call .step + ld a,b + ret +.step: + ; DE = remainder*256 + byte + ld a,b + ld d,a + ld e,(hl) + ld c,0 ; quotient byte +.div: + ld a,d + or a + jr nz,.sub + ld a,e + cp 10 + jr c,.done +.sub: + ld a,e + sub 10 + ld e,a + ld a,d + sbc a,0 + ld d,a + inc c + jr .div +.done: + ld (hl),c + ld b,e + ret + + +; ============================================================ +; fp_parse: parse decimal string -> float +; DE -> "[-]ddd[.ddd]\0" +; HL -> output float +; ============================================================ +fp_parse: + xor a + ld (P_SIGN),a + ld (P_FRACN),a + ld (P_S0),a + ld (P_S1),a + ld (P_S2),a + ld (P_S3),a + + ; optional sign + ld a,(de) + cp '-' + jr nz,.chkplus + ld a,1 + ld (P_SIGN),a + inc de + jr .intpart +.chkplus: + ld a,(de) + cp '+' + jr nz,.intpart + inc de + +.intpart: + ld a,(de) + call is_digit + jr nc,.maybe_dot +.il: + ld a,(de) + sub '0' + ld c,a + call u32_mul10_scaled + call u32_add8_scaled + inc de + ld a,(de) + call is_digit + jr c,.il + +.maybe_dot: + ld a,(de) + cp '.' + jr nz,.finish_scaled + inc de + + ld b,MAX_FRAC +.fl: + ld a,(de) + call is_digit + jr nc,.finish_scaled + ld a,(de) + sub '0' + ld c,a + call u32_mul10_scaled + call u32_add8_scaled + ld a,(P_FRACN) + inc a + ld (P_FRACN),a + inc de + djnz .fl + +.finish_scaled: + ; convert scaled u32 to float into (HL) + call fp_from_u32_scaled_to_A + + ; divide by 10^k if needed + ld a,(P_FRACN) + or a + jr z,.apply_sign + + ; DE = &pow10_table[k] + push hl + ld e,a + ld d,0 + ld hl,pow10_table + add hl,de + add hl,de + add hl,de + add hl,de + ex de,hl + pop hl + call fp_div + +.apply_sign: + ld a,(P_SIGN) + or a + ret z + inc hl + ld a,(hl) + xor 080h + ld (hl),a + ret + + +is_digit: + cp '0' + jr c,.no + cp '9'+1 + jr nc,.no + scf + ret +.no: + or a + ret + + +; P_S = P_S*10 (uses PR_INT and PR_R0..3 as scratch) +u32_mul10_scaled: + ; PR_INT = P + ld a,(P_S0) + ld (PR_INT0),a + ld a,(P_S1) + ld (PR_INT1),a + ld a,(P_S2) + ld (PR_INT2),a + ld a,(P_S3) + ld (PR_INT3),a + ; PR_INT *=2 + ld b,1 + call shl32_INT_by_B + + ; PR_R = P + ld a,(P_S0) + ld (PR_R0),a + ld a,(P_S1) + ld (PR_R1),a + ld a,(P_S2) + ld (PR_R2),a + ld a,(P_S3) + ld (PR_R3),a + ; PR_R *=8 (shift left 3) + ld b,3 + call shl32_R_by_B + + ; P = PR_INT + PR_R + ld a,(PR_INT0) + add a,(PR_R0) + ld (P_S0),a + ld a,(PR_INT1) + adc a,(PR_R1) + ld (P_S1),a + ld a,(PR_INT2) + adc a,(PR_R2) + ld (P_S2),a + ld a,(PR_INT3) + adc a,(PR_R3) + ld (P_S3),a + ret + + +shl32_R_by_B: + ld a,b + or a + ret z +.loop: + ld a,(PR_R0) + add a,a + ld (PR_R0),a + ld a,(PR_R1) + adc a,a + ld (PR_R1),a + ld a,(PR_R2) + adc a,a + ld (PR_R2),a + ld a,(PR_R3) + adc a,a + ld (PR_R3),a + djnz .loop + ret + + +; P_S += C (0..9) +u32_add8_scaled: + ld a,(P_S0) + add a,c + ld (P_S0),a + ld a,(P_S1) + adc a,0 + ld (P_S1),a + ld a,(P_S2) + adc a,0 + ld (P_S2),a + ld a,(P_S3) + adc a,0 + ld (P_S3),a + ret + + +; Convert P_S (u32) to float at (HL). Positive only; sign handled by caller. +fp_from_u32_scaled_to_A: + ld a,(P_S0) + or (P_S1) + or (P_S2) + or (P_S3) + jr nz,.nz + ld (hl),0 + inc hl + ld (hl),0 + inc hl + ld (hl),0 + inc hl + ld (hl),0 + ret + +.nz: + ; find MSB index in B (0..31) + ld b,31 + ld a,(P_S3) + ld c,a + or a + jr nz,.scan + ld b,23 + ld a,(P_S2) + ld c,a + or a + jr nz,.scan + ld b,15 + ld a,(P_S1) + ld c,a + or a + jr nz,.scan + ld b,7 + ld a,(P_S0) + ld c,a +.scan: +.find: + bit 7,c + jr nz,.found + add c,c + dec b + jr .find +.found: + ; EXP = FP_BIAS + B + ld a,b + add a,FP_BIAS + ld (hl),a + inc hl + + ; shift value left by (23-B), take top 24 bits + ld a,23 + sub b + ld b,a + + ; PR_INT = P_S + ld a,(P_S0) + ld (PR_INT0),a + ld a,(P_S1) + ld (PR_INT1),a + ld a,(P_S2) + ld (PR_INT2),a + ld a,(P_S3) + ld (PR_INT3),a + call shl32_INT_by_B + + ; store sign=0, fraction = top 23 bits of mantissa (hidden 1 removed) + ld a,(PR_INT3) + and 07Fh + ld (hl),a + inc hl + ld a,(PR_INT2) + ld (hl),a + inc hl + ld a,(PR_INT1) + ld (hl),a + ret + +.data +; ============================================================ +; pow10_table: 10^k constants (k=0..6) in THIS float encoding +; Verified: +; 1.0 = 127 00 00 00 +; 10.0 = 130 20 00 00 +; 100.0 = 133 48 00 00 +; 1000.0 = 136 7A 00 00 +; 10000.0 = 140 1C 40 00 +; 100000.0 = 143 43 50 00 +; 1000000.0= 146 74 24 00 +; ============================================================ +pow10_table: + .byte 127, 0x00, 0x00, 0x00 ; 10^0 = 1 + .byte 130, 0x20, 0x00, 0x00 ; 10^1 = 10 + .byte 133, 0x48, 0x00, 0x00 ; 10^2 = 100 + .byte 136, 0x7A, 0x00, 0x00 ; 10^3 = 1000 + .byte 140, 0x1C, 0x40, 0x00 ; 10^4 = 10000 + .byte 143, 0x43, 0x50, 0x00 ; 10^5 = 100000 + .byte 146, 0x74, 0x24, 0x00 ; 10^6 = 1000000 + + +; ============================================================ +; BSS / WORKSPACE +; ============================================================ +.bss + +; Unpacked A +.comm A_exp,1 +.comm A_sign,1 +.comm A_m2,1 +.comm A_m1,1 +.comm A_m0,1 + +; Unpacked B +.comm B_exp,1 +.comm B_sign,1 +.comm B_m2,1 +.comm B_m1,1 +.comm B_m0,1 + +; 48-bit workspace (P0 LSB .. P5 MSB) +.comm P0,1 +.comm P1,1 +.comm P2,1 +.comm P3,1 +.comm P4,1 +.comm P5,1 + +.comm SHCNT,1 + +; Print temps +.comm PR_SI,1 +.comm PR_E,1 +.comm PR_M2,1 +.comm PR_M1,1 +.comm PR_M0,1 +.comm PR_INT0,1 +.comm PR_INT1,1 +.comm PR_INT2,1 +.comm PR_INT3,1 +.comm PR_R0,1 +.comm PR_R1,1 +.comm PR_R2,1 +.comm PR_R3,1 + +; Parse temps +.comm P_SIGN,1 +.comm P_FRACN,1 +.comm P_S0,1 +.comm P_S1,1 +.comm P_S2,1 +.comm P_S3,1 + +; Digit buffer +.comm DIGBUF,1 +.comm DIGLEN,1