diff --git a/.gitignore b/.gitignore index 35c765e..940421a 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,6 @@ *.srec *.s19 *.elf -*.map + AGENTS.md .pre-commit-config.yaml \ No newline at end of file diff --git a/Makefile b/Makefile index 1937f6a..37e0284 100644 --- a/Makefile +++ b/Makefile @@ -2,16 +2,12 @@ # SPDX-FileCopyrightText: (c) 2025 A.M. Rowsell ASM=vasm_z80_std LINK=vlink -MAP=zone.map ASMFLAGS=-Fvobj -LINKFLAGS=-b ihex -T linker.cmd -M -BINLINKFLAGS=-b rawbin -T linker.cmd -M +LINKFLAGS=-b ihex -T linker.cmd all: zone.hex -binary: zone.bin - float.o: float.asm $(ASM) $(ASMFLAGS) -o $@ $< @@ -25,12 +21,9 @@ boot.o: boot.asm $(ASM) $(ASMFLAGS) -o $@ $< zone.hex: float.o vectorTable.o zone.o boot.o linker.cmd - $(LINK) $(LINKFLAGS) -o $@ float.o vectorTable.o zone.o boot.o > $(MAP) - -zone.bin: float.o vectorTable.o zone.o boot.o linker.cmd - $(LINK) $(BINLINKFLAGS) -o $@ float.o vectorTable.o zone.o boot.o > $(MAP) + $(LINK) $(LINKFLAGS) -o $@ float.o vectorTable.o zone.o boot.o clean: - rm -f float.o vectorTable.o zone.o boot.o zone.hex zone.bin $(MAP) + rm -f float.o vectorTable.o zone.o boot.o zone.hex .PHONY: all clean diff --git a/boot.asm b/boot.asm index d32bcde..92c0e4a 100644 --- a/boot.asm +++ b/boot.asm @@ -1,20 +1,13 @@ - ; SPDX-License-Identifier: MPL-2.0 - ; SPDX-FileCopyrightText: (c) 2025 A.M. Rowsell - .section "boot", "acrx" - .global zone_setup - .global os_warm_boot - .extern os_main_loop +; SPDX-License-Identifier: MPL-2.0 +; SPDX-FileCopyrightText: (c) 2025 A.M. Rowsell +.section "boot","acrx" +.global zone_setup +.global os_warm_boot +.extern os_main_loop - ; ================================== - ; This is the cold boot entry point! - ; It is linked to land at 0xC3C3 - ; ================================== - -_start: zone_setup: - ld a, ixl - ld sp, 0x6FFF - jp os_main_loop + ld sp, 0x6FFF + jp os_main_loop os_warm_boot: - ret + ret diff --git a/float.asm b/float.asm index f12305b..2ef9b37 100644 --- a/float.asm +++ b/float.asm @@ -1,1685 +1,1626 @@ - ; SPDX-License-Identifier: MPL-2.0 - ; SPDX-FileCopyrightText: (c) 2025 A.M. Rowsell - ; ============================================================ - ; Z80 Soft Float Library (4-byte) + Print + Parse (vasm syntax) - ; ============================================================ - ; Float format in memory (big-endian, 4 bytes): - ; byte0: EXP (8-bit biased exponent, 0 = zero) - ; byte1: S|F22..F16 (bit7 = sign, bits6..0 = top 7 fraction bits) - ; byte2: F15..F8 - ; byte3: F7..F0 +; SPDX-License-Identifier: MPL-2.0 +; SPDX-FileCopyrightText: (c) 2025 A.M. Rowsell +; ============================================================ +; Z80 Soft Float Library (4-byte) + Print + Parse (vasm syntax) +; ============================================================ +; Float format in memory (big-endian, 4 bytes): +; byte0: EXP (8-bit biased exponent, 0 = zero) +; byte1: S|F22..F16 (bit7 = sign, bits6..0 = top 7 fraction bits) +; byte2: F15..F8 +; byte3: F7..F0 +; +; For EXP != 0: +; value = (-1)^S * (1.F) * 2^(EXP - FP_BIAS) +; FP_BIAS = 127 +; +; Calling convention (in-place ops): +; HL -> A (4 bytes) +; DE -> B (4 bytes) +; fp_add: A = A + B (stored back at HL) +; fp_sub: A = A - B +; fp_mul: A = A * B +; fp_div: A = A / B +; +; Extra: +; fp_print: print float at (HL) using external os_print_vec (A=ASCII) +; fp_parse: parse null-terminated string at (DE) into float at (HL) +; +; Limitations: +; - No NaN/Inf/denormals +; - Truncation (no rounding) +; - fp_print prints fixed decimals with a lightweight fraction path +; - fp_parse supports optional +/- and '.' up to MAX_FRAC digits, no exponent notation +; ============================================================ - ; For EXP != 0: - ; value = (-1)^S * (1.F) * 2^(EXP - FP_BIAS) - ; FP_BIAS = 127 +.equ FP_BIAS,127 +.equ FRAC_DIGITS,6 +.equ MAX_FRAC,6 - ; Calling convention (in-place ops): - ; HL -> A (4 bytes) - ; DE -> B (4 bytes) - ; fp_add: A = A + B (stored back at HL) - ; fp_sub: A = A - B - ; fp_mul: A = A * B - ; fp_div: A = A / B +.extern os_print_vec +; ============================================================ +; CODE +; ============================================================ +.section "float","acrx" +; ------------------------------------------------------------ +; External routine you provide: +; os_print_vec: prints ASCII character in A +; ------------------------------------------------------------ +; os_print_vec is external, not defined here. - ; Extra: - ; fp_print: print float at (HL) using external os_print_vec (A=ASCII) - ; fp_parse: parse null-terminated string at (DE) into float at (HL) - - ; Limitations: - ; - No NaN/Inf/denormals - ; - Truncation (no rounding) - ; - fp_print prints fixed decimals with a lightweight fraction path - ; - fp_parse supports optional +/- and '.' up to MAX_FRAC digits, no exponent notation - ; ============================================================ - - .equ FP_BIAS, 127 - .equ FRAC_DIGITS, 6 - .equ MAX_FRAC, 6 - - .extern os_print_vec - ; ============================================================ - ; CODE - ; ============================================================ - .section "zone", "acrx" - ; ------------------------------------------------------------ - ; External routine you provide: - ; os_print_vec: prints ASCII character in A - ; ------------------------------------------------------------ - ; os_print_vec is external, not defined here. - - ; ============================================================ - ; Public API: fp_add / fp_sub / fp_mul / fp_div - ; ============================================================ - - ; ------------------------------------------------------------ - ; fp_add: A = A + B - ; ------------------------------------------------------------ +; ============================================================ +; Public API: fp_add / fp_sub / fp_mul / fp_div +; ============================================================ +; ------------------------------------------------------------ +; fp_add: A = A + B +; ------------------------------------------------------------ fp_add: - push hl - push de - call fp_unpackA - pop de - call fp_unpackB - pop hl - - ; zero short-cuts - ld a, (A_exp) - or a - jr nz, fp_add_checkB - ; A==0 => result=B - call fp_pack_from_B_into_A - ret + push hl + push de + call fp_unpackA + pop de + call fp_unpackB + pop hl + ; zero short-cuts + ld a,(A_exp) + or a + jr nz,fp_add_checkB + ; A==0 => result=B + call fp_pack_from_B_into_A + ret fp_add_checkB: - ld a, (B_exp) - or a - ret z + ld a,(B_exp) + or a + ret z - ; if signs same -> magnitude add - ld a, (A_sign) - ld b, a - ld a, (B_sign) - xor b - jp z, fp_add_same_sign + ; if signs same -> magnitude add + ld a,(A_sign) + ld b,a + ld a,(B_sign) + xor b + jp z,fp_add_same_sign - ; signs differ -> magnitude subtract - jp fp_add_diff_sign + ; signs differ -> magnitude subtract + jp fp_add_diff_sign - ; ------------------------------------------------------------ - ; fp_sub: A = A - B (flip B sign in memory, add, flip back) - ; ------------------------------------------------------------ +; ------------------------------------------------------------ +; fp_sub: A = A - B (flip B sign in memory, add, flip back) +; ------------------------------------------------------------ fp_sub: - ; Flip sign bit of B byte1 (DE+1) - push hl - push de - inc de - ld a, (de) - xor 0x80 - ld (de), a - pop de - pop hl + ; Flip sign bit of B byte1 (DE+1) + push hl + push de + inc de + ld a,(de) + xor 0x80 + ld (de),a + pop de + pop hl - call fp_add + call fp_add - ; Flip sign bit back - push hl - push de - inc de - ld a, (de) - xor 0x80 - ld (de), a - pop de - pop hl - ret + ; Flip sign bit back + push hl + push de + inc de + ld a,(de) + xor 0x80 + ld (de),a + pop de + pop hl + ret - ; ------------------------------------------------------------ - ; fp_mul: A = A * B - ; ------------------------------------------------------------ +; ------------------------------------------------------------ +; fp_mul: A = A * B +; ------------------------------------------------------------ fp_mul: - push hl - push de - call fp_unpackA - pop de - call fp_unpackB - pop hl + push hl + push de + call fp_unpackA + pop de + call fp_unpackB + pop hl - ; if A==0 or B==0 => 0 - ld a, (A_exp) - or a - jp z, fp_store_zero_A - ld a, (B_exp) - or a - jp z, fp_store_zero_A + ; if A==0 or B==0 => 0 + ld a,(A_exp) + or a + jp z,fp_store_zero_A + ld a,(B_exp) + or a + jp z,fp_store_zero_A - ; sign = A_sign XOR B_sign - ld a, (A_sign) - ld b, a - ld a, (B_sign) - xor b - ld (A_sign), a + ; sign = A_sign XOR B_sign + ld a,(A_sign) + ld b,a + ld a,(B_sign) + xor b + ld (A_sign),a - ; exponent = A_exp + B_exp - BIAS - ld a, (A_exp) - ld b, a - ld a, (B_exp) - add a, b - sub FP_BIAS - ld (A_exp), a + ; exponent = A_exp + B_exp - BIAS + ld a,(A_exp) + ld b,a + ld a,(B_exp) + add a,b + sub FP_BIAS + ld (A_exp),a - ; product = A_mant * B_mant (24x24 => 48) - call mul24x24_schoolbook + ; product = A_mant * B_mant (24x24 => 48) + call mul24x24_schoolbook - ; normalize product into A mantissa - call norm_product_to_A + ; normalize product into A mantissa + call norm_product_to_A - ; pack back into (HL) - call fp_packA - ret + ; pack back into (HL) + call fp_packA + ret - ; ------------------------------------------------------------ - ; fp_div: A = A / B - ; ------------------------------------------------------------ +; ------------------------------------------------------------ +; fp_div: A = A / B +; ------------------------------------------------------------ fp_div: - push hl - push de - call fp_unpackA - pop de - call fp_unpackB - pop hl + push hl + push de + call fp_unpackA + pop de + call fp_unpackB + pop hl - ; A==0 => 0 - ld a, (A_exp) - or a - jp z, fp_store_zero_A + ; A==0 => 0 + ld a,(A_exp) + or a + jp z,fp_store_zero_A - ; B==0 => return 0 (simple “error” behavior) - ld a, (B_exp) - or a - jp z, fp_store_zero_A + ; B==0 => return 0 (simple “error” behavior) + ld a,(B_exp) + or a + jp z,fp_store_zero_A - ; sign = A_sign XOR B_sign - ld a, (A_sign) - ld b, a - ld a, (B_sign) - xor b - ld (A_sign), a + ; sign = A_sign XOR B_sign + ld a,(A_sign) + ld b,a + ld a,(B_sign) + xor b + ld (A_sign),a - ; exponent = A_exp - B_exp + BIAS - ld a, (A_exp) - ld b, a - ld a, (B_exp) - ld c, a - ld a, b - sub c - add a, FP_BIAS - ld (A_exp), a + ; exponent = A_exp - B_exp + BIAS + ld a,(A_exp) + ld b,a + ld a,(B_exp) + ld c,a + ld a,b + sub c + add a,FP_BIAS + ld (A_exp),a - ; mantissa division - call div_mantissas_to_A - call normalize_A_mant + ; mantissa division + call div_mantissas_to_A + call normalize_A_mant - call fp_packA - ret + call fp_packA + ret - ; ============================================================ - ; Add/Sub core (unpacked) - ; ============================================================ + +; ============================================================ +; Add/Sub core (unpacked) +; ============================================================ fp_add_same_sign: - call align_exponents_A_B - call add24_A_plus_B - - ; if carry: shift right, exponent++ - jr nc, fp_add_same_sign_noCarry - call shr24_A_1 - ld a, (A_exp) - inc a - ld (A_exp), a + call align_exponents_A_B + call add24_A_plus_B + ; if carry: shift right, exponent++ + jr nc,fp_add_same_sign_noCarry + call shr24_A_1 + ld a,(A_exp) + inc a + ld (A_exp),a fp_add_same_sign_noCarry: - call normalize_A_mant - call fp_packA - ret + call normalize_A_mant + call fp_packA + ret + fp_add_diff_sign: - ; compare |A| vs |B|, do larger - smaller, sign = sign(larger) - call compare_mag_A_B - jr c, fp_add_diff_sign_A_ge_B - ; |B| > |A| => swap - call swap_A_B_unpacked - + ; compare |A| vs |B|, do larger - smaller, sign = sign(larger) + call compare_mag_A_B + jr c,fp_add_diff_sign_A_ge_B + ; |B| > |A| => swap + call swap_A_B_unpacked fp_add_diff_sign_A_ge_B: - call align_exponents_A_B - call sub24_A_minus_B - call is_A_mant_zero - jp z, fp_store_zero_A - call normalize_A_mant - call fp_packA - ret + call align_exponents_A_B + call sub24_A_minus_B + call is_A_mant_zero + jp z,fp_store_zero_A + call normalize_A_mant + call fp_packA + ret - ; ============================================================ - ; Unpack / Pack helpers - ; ============================================================ - ; Unpack A from (HL) +; ============================================================ +; Unpack / Pack helpers +; ============================================================ +; Unpack A from (HL) fp_unpackA: - ld a, (hl) - ld (A_exp), a - or a - jp z, fp_unpackA_zeroA - inc hl - ld a, (hl) - ld b, a - ; sign bit -> A_sign (0/1) - and 0x80 - jp z, fp_unpackA_sa0 - ld a, 1 - jr fp_unpackA_sa1 - + ld a,(hl) + ld (A_exp),a + or a + jp z,fp_unpackA_zeroA + inc hl + ld a,(hl) + ld b,a + ; sign bit -> A_sign (0/1) + and 0x80 + jp z,fp_unpackA_sa0 + ld a,1 + jr fp_unpackA_sa1 fp_unpackA_sa0: - xor a - + xor a fp_unpackA_sa1: - ld (A_sign), a - - ; mantissa bytes with hidden 1 inserted - ld a, b - and 0x7F - or 0x80 - ld (A_m2), a - inc hl - ld a, (hl) - ld (A_m1), a - inc hl - ld a, (hl) - ld (A_m0), a - ret + ld (A_sign),a + ; mantissa bytes with hidden 1 inserted + ld a,b + and 0x7F + or 0x80 + ld (A_m2),a + inc hl + ld a,(hl) + ld (A_m1),a + inc hl + ld a,(hl) + ld (A_m0),a + ret fp_unpackA_zeroA: - xor a - ld (A_sign), a - ld (A_m2), a - ld (A_m1), a - ld (A_m0), a - ret + xor a + ld (A_sign),a + ld (A_m2),a + ld (A_m1),a + ld (A_m0),a + ret - ; Unpack B from (DE) +; Unpack B from (DE) fp_unpackB: - ld a, (de) - ld (B_exp), a - or a - jp z, fp_unpackB_zeroB - inc de - ld a, (de) - ld b, a - and 0x80 - jp z, fp_unpackB_sb0 - ld a, 1 - jr fp_unpackB_sb1 - + ld a,(de) + ld (B_exp),a + or a + jp z,fp_unpackB_zeroB + inc de + ld a,(de) + ld b,a + and 0x80 + jp z,fp_unpackB_sb0 + ld a,1 + jr fp_unpackB_sb1 fp_unpackB_sb0: - xor a - + xor a fp_unpackB_sb1: - ld (B_sign), a - - ld a, b - and 0x7F - or 0x80 - ld (B_m2), a - inc de - ld a, (de) - ld (B_m1), a - inc de - ld a, (de) - ld (B_m0), a - ret + ld (B_sign),a + ld a,b + and 0x7F + or 0x80 + ld (B_m2),a + inc de + ld a,(de) + ld (B_m1),a + inc de + ld a,(de) + ld (B_m0),a + ret fp_unpackB_zeroB: - xor a - ld (B_sign), a - ld (B_m2), a - ld (B_m1), a - ld (B_m0), a - ret + xor a + ld (B_sign),a + ld (B_m2),a + ld (B_m1),a + ld (B_m0),a + ret - ; Pack unpacked A back into memory at (HL) +; Pack unpacked A back into memory at (HL) fp_packA: - ld a, (A_exp) - or a - jr nz, fp_packA_packNZ - ; store 0 - ld (hl), 0 - inc hl - ld (hl), 0 - inc hl - ld (hl), 0 - inc hl - ld (hl), 0 - ret + ld a,(A_exp) + or a + jr nz,fp_packA_packNZ + ; store 0 + ld (hl),0 + inc hl + ld (hl),0 + inc hl + ld (hl),0 + inc hl + ld (hl),0 + ret fp_packA_packNZ: - ld a, (A_exp) - ld (hl), a - inc hl + ld a,(A_exp) + ld (hl),a + inc hl - ; remove hidden 1 - ld a, (A_m2) - and 0x7F - ld b, a - - ; apply sign bit7 - ld a, (A_sign) - or a - jp z, fp_packA_sign0 - ld a, b - or 0x80 - jr fp_packA_storeB1 + ; remove hidden 1 + ld a,(A_m2) + and 0x7F + ld b,a + ; apply sign bit7 + ld a,(A_sign) + or a + jp z,fp_packA_sign0 + ld a,b + or 0x80 + jr fp_packA_storeB1 fp_packA_sign0: - ld a, b - + ld a,b fp_packA_storeB1: - ld (hl), a - inc hl - ld a, (A_m1) - ld (hl), a - inc hl - ld a, (A_m0) - ld (hl), a - ret + ld (hl),a + inc hl + ld a,(A_m1) + ld (hl),a + inc hl + ld a,(A_m0) + ld (hl),a + ret - ; Pack from unpacked B into memory A (HL points to A destination) +; Pack from unpacked B into memory A (HL points to A destination) fp_pack_from_B_into_A: - ld a, (B_exp) - ld (hl), a - inc hl - ld a, (B_m2) - and 0x7F - ld b, a - ld a, (B_sign) - or a - jp z, fp_pack_from_B_bs0 - ld a, b - or 0x80 - jr fp_pack_from_B_bs1 - + ld a,(B_exp) + ld (hl),a + inc hl + ld a,(B_m2) + and 0x7F + ld b,a + ld a,(B_sign) + or a + jp z,fp_pack_from_B_bs0 + ld a,b + or 0x80 + jr fp_pack_from_B_bs1 fp_pack_from_B_bs0: - ld a, b - + ld a,b fp_pack_from_B_bs1: - ld (hl), a - inc hl - ld a, (B_m1) - ld (hl), a - inc hl - ld a, (B_m0) - ld (hl), a - ret + ld (hl),a + inc hl + ld a,(B_m1) + ld (hl),a + inc hl + ld a,(B_m0) + ld (hl),a + ret + fp_store_zero_A: - xor a - ld (A_exp), a - ld (A_sign), a - ld (A_m2), a - ld (A_m1), a - ld (A_m0), a - jp fp_packA + xor a + ld (A_exp),a + ld (A_sign),a + ld (A_m2),a + ld (A_m1),a + ld (A_m0),a + jp fp_packA - ; ============================================================ - ; Exponent alignment / compare / swap - ; ============================================================ - ; Ensure A_exp >= B_exp; shift smaller mantissa right by diff +; ============================================================ +; Exponent alignment / compare / swap +; ============================================================ +; Ensure A_exp >= B_exp; shift smaller mantissa right by diff align_exponents_A_B: - ld a, (A_exp) - ld b, a - ld a, (B_exp) - cp b - jr z, align_exponents_A_B_done - jr c, align_exponents_A_B_bigger_exp; B_exp < A_exp - call swap_A_B_unpacked; make A the larger exponent - + ld a,(A_exp) + ld b,a + ld a,(B_exp) + cp b + jr z,align_exponents_A_B_done + jr c,align_exponents_A_B_bigger_exp ; B_exp < A_exp + call swap_A_B_unpacked ; make A the larger exponent align_exponents_A_B_bigger_exp: - ld a, (A_exp) - ld b, a - ld a, (B_exp) - ld c, a - ld a, b - sub c; A = diff - call shr24_B_by_A - ld a, (A_exp) - ld (B_exp), a - + ld a,(A_exp) + ld b,a + ld a,(B_exp) + ld c,a + ld a,b + sub c ; A = diff + call shr24_B_by_A + ld a,(A_exp) + ld (B_exp),a align_exponents_A_B_done: - ret + ret - ; Carry set if |A| >= |B|, else carry clear +; Carry set if |A| >= |B|, else carry clear compare_mag_A_B: - ld a, (A_exp) - ld b, a - ld a, (B_exp) - cp b - jr z, compare_mag_A_B_cmpMant - jr c, compare_mag_A_B_ge - or a - ret - + ld a,(A_exp) + ld b,a + ld a,(B_exp) + cp b + jr z,compare_mag_A_B_cmpMant + jr c,compare_mag_A_B_ge + or a + ret compare_mag_A_B_ge: - scf - ret - + scf + ret compare_mag_A_B_cmpMant: - ld a, (A_m2) - ld b, a - ld a, (B_m2) - cp b - jr z, compare_mag_A_B_m1 - jr c, compare_mag_A_B_ge2 - or a - ret - + ld a,(A_m2) + ld b,a + ld a,(B_m2) + cp b + jr z,compare_mag_A_B_m1 + jr c,compare_mag_A_B_ge2 + or a + ret compare_mag_A_B_ge2: - scf - ret - + scf + ret compare_mag_A_B_m1: - ld a, (A_m1) - ld b, a - ld a, (B_m1) - cp b - jr z, compare_mag_A_B_m0 - jr c, compare_mag_A_B_ge3 - or a - ret - + ld a,(A_m1) + ld b,a + ld a,(B_m1) + cp b + jr z,compare_mag_A_B_m0 + jr c,compare_mag_A_B_ge3 + or a + ret compare_mag_A_B_ge3: - scf - ret - + scf + ret compare_mag_A_B_m0: - ld a, (A_m0) - ld b, a - ld a, (B_m0) - cp b - jr c, compare_mag_A_B_ge4 - scf - ret - + ld a,(A_m0) + ld b,a + ld a,(B_m0) + cp b + jr c,compare_mag_A_B_ge4 + scf + ret compare_mag_A_B_ge4: - scf - ret + scf + ret + swap_A_B_unpacked: - ld a, (A_exp) - ld b, a - ld a, (B_exp) - ld (A_exp), a - ld a, b - ld (B_exp), a - ld a, (A_sign) - ld b, a - ld a, (B_sign) - ld (A_sign), a - ld a, b - ld (B_sign), a - ld a, (A_m2) - ld b, a - ld a, (B_m2) - ld (A_m2), a - ld a, b - ld (B_m2), a - ld a, (A_m1) - ld b, a - ld a, (B_m1) - ld (A_m1), a - ld a, b - ld (B_m1), a - ld a, (A_m0) - ld b, a - ld a, (B_m0) - ld (A_m0), a - ld a, b - ld (B_m0), a - ret + ld a,(A_exp) + ld b,a + ld a,(B_exp) + ld (A_exp),a + ld a,b + ld (B_exp),a + ld a,(A_sign) + ld b,a + ld a,(B_sign) + ld (A_sign),a + ld a,b + ld (B_sign),a + ld a,(A_m2) + ld b,a + ld a,(B_m2) + ld (A_m2),a + ld a,b + ld (B_m2),a + ld a,(A_m1) + ld b,a + ld a,(B_m1) + ld (A_m1),a + ld a,b + ld (B_m1),a + ld a,(A_m0) + ld b,a + ld a,(B_m0) + ld (A_m0),a + ld a,b + ld (B_m0),a + ret - ; ============================================================ - ; 24-bit mantissa ops - ; ============================================================ + +; ============================================================ +; 24-bit mantissa ops +; ============================================================ add24_A_plus_B: - ld a, (B_m0) - ld b, a - ld a, (A_m0) - add a, b - ld (A_m0), a - ld a, (B_m1) - ld b, a - ld a, (A_m1) - adc a, b - ld (A_m1), a - ld a, (B_m2) - ld b, a - ld a, (A_m2) - adc a, b - ld (A_m2), a - ret ; carry meaningful + ld a,(B_m0) + ld b,a + ld a,(A_m0) + add a,b + ld (A_m0),a + ld a,(B_m1) + ld b,a + ld a,(A_m1) + adc a,b + ld (A_m1),a + ld a,(B_m2) + ld b,a + ld a,(A_m2) + adc a,b + ld (A_m2),a + ret ; carry meaningful + sub24_A_minus_B: - ld a, (B_m0) - ld b, a - ld a, (A_m0) - sub b - ld (A_m0), a - ld a, (B_m1) - ld b, a - ld a, (A_m1) - sbc a, b - ld (A_m1), a - ld a, (B_m2) - ld b, a - ld a, (A_m2) - sbc a, b - ld (A_m2), a - ret + ld a,(B_m0) + ld b,a + ld a,(A_m0) + sub b + ld (A_m0),a + ld a,(B_m1) + ld b,a + ld a,(A_m1) + sbc a,b + ld (A_m1),a + ld a,(B_m2) + ld b,a + ld a,(A_m2) + sbc a,b + ld (A_m2),a + ret + is_A_mant_zero: - ld a, (A_m2) - ld b, a - ld a, (A_m1) - or b - ld b, a - ld a, (A_m0) - or b - ret + ld a,(A_m2) + ld b,a + ld a,(A_m1) + or b + ld b,a + ld a,(A_m0) + or b + ret + shr24_A_1: - ld a, (A_m2) - srl a - ld (A_m2), a - ld a, (A_m1) - rr a - ld (A_m1), a - ld a, (A_m0) - rr a - ld (A_m0), a - ret + ld a,(A_m2) + srl a + ld (A_m2),a + ld a,(A_m1) + rr a + ld (A_m1),a + ld a,(A_m0) + rr a + ld (A_m0),a + ret + shl24_A_1: - ld a, (A_m0) - add a, a - ld (A_m0), a - ld a, (A_m1) - adc a, a - ld (A_m1), a - ld a, (A_m2) - adc a, a - ld (A_m2), a - ret + ld a,(A_m0) + add a,a + ld (A_m0),a + ld a,(A_m1) + adc a,a + ld (A_m1),a + ld a,(A_m2) + adc a,a + ld (A_m2),a + ret - ; Shift B mantissa right by A bits (A=0..255) +; Shift B mantissa right by A bits (A=0..255) shr24_B_by_A: - ld (SHCNT), a - ld a, (SHCNT) - cp 24 - jr c, shr24_B_by_A_ok - xor a - ld (B_m2), a - ld (B_m1), a - ld (B_m0), a - ret - + ld (SHCNT),a + ld a,(SHCNT) + cp 24 + jr c,shr24_B_by_A_ok + xor a + ld (B_m2),a + ld (B_m1),a + ld (B_m0),a + ret shr24_B_by_A_ok: - ld a, (SHCNT) - or a - ret z - + ld a,(SHCNT) + or a + ret z shr24_B_by_A_loop: - ld a, (B_m2) - srl a - ld (B_m2), a - ld a, (B_m1) - rr a - ld (B_m1), a - ld a, (B_m0) - rr a - ld (B_m0), a - ld a, (SHCNT) - dec a - ld (SHCNT), a - jr nz, shr24_B_by_A_loop - ret + ld a,(B_m2) + srl a + ld (B_m2),a + ld a,(B_m1) + rr a + ld (B_m1),a + ld a,(B_m0) + rr a + ld (B_m0),a + ld a,(SHCNT) + dec a + ld (SHCNT),a + jr nz,shr24_B_by_A_loop + ret + normalize_A_mant: - call is_A_mant_zero - jr nz, normalize_A_mant_nz - xor a - ld (A_exp), a - ret - + call is_A_mant_zero + jr nz,normalize_A_mant_nz + xor a + ld (A_exp),a + ret normalize_A_mant_nz: - ld a, (A_m2) - bit 7, a - ret nz - + ld a,(A_m2) + bit 7,a + ret nz normalize_A_mant_left_loop: - ld a, (A_m0) - add a, a - ld (A_m0), a - ld a, (A_m1) - adc a, a - ld (A_m1), a - ld a, (A_m2) - adc a, a - ld (A_m2), a - ld a, (A_exp) - dec a - ld (A_exp), a - ld a, (A_m2) - bit 7, a - jr z, normalize_A_mant_left_loop - ret + ld a,(A_m0) + add a,a + ld (A_m0),a + ld a,(A_m1) + adc a,a + ld (A_m1),a + ld a,(A_m2) + adc a,a + ld (A_m2),a + ld a,(A_exp) + dec a + ld (A_exp),a + ld a,(A_m2) + bit 7,a + jr z,normalize_A_mant_left_loop + ret - ; ============================================================ - ; 8x8 -> 16 multiply (unsigned), shift-add - ; in: A = multiplicand, C = multiplier - ; out: HL = 16-bit product - ; ============================================================ +; ============================================================ +; 8x8 -> 16 multiply (unsigned), shift-add +; in: A = multiplicand, C = multiplier +; out: HL = 16-bit product +; ============================================================ mul8u: - ld h, 0 - ld l, 0 - ld b, 8 - + ld h,0 + ld l,0 + ld b,8 mul8u_m8: - srl c - jr nc, mul8u_noadd - ld e, a - ld d, 0 - add hl, de - + srl c + jr nc,mul8u_noadd + ld e,a + ld d,0 + add hl,de mul8u_noadd: - add a, a - djnz mul8u_m8 - ret + add a,a + djnz mul8u_m8 + ret - ; ============================================================ - ; 24x24 schoolbook multiply into P0..P5 (P0 LSB) - ; ============================================================ +; ============================================================ +; 24x24 schoolbook multiply into P0..P5 (P0 LSB) +; ============================================================ mul24x24_schoolbook: - xor a - ld (P0), a - ld (P1), a - ld (P2), a - ld (P3), a - ld (P4), a - ld (P5), a + xor a + ld (P0),a + ld (P1),a + ld (P2),a + ld (P3),a + ld (P4),a + ld (P5),a - ; (0, 0) offset 0 - ld a, (B_m0) - ld c, a - ld a, (A_m0) - call mul8u - call add16_to_P_at0 + ; (0,0) offset 0 + ld a,(B_m0) + ld c,a + ld a,(A_m0) + call mul8u + call add16_to_P_at0 - ; (0, 1) offset 1 - ld a, (B_m1) - ld c, a - ld a, (A_m0) - call mul8u - call add16_to_P_at1 + ; (0,1) offset 1 + ld a,(B_m1) + ld c,a + ld a,(A_m0) + call mul8u + call add16_to_P_at1 - ; (0, 2) offset 2 - ld a, (B_m2) - ld c, a - ld a, (A_m0) - call mul8u - call add16_to_P_at2 + ; (0,2) offset 2 + ld a,(B_m2) + ld c,a + ld a,(A_m0) + call mul8u + call add16_to_P_at2 - ; (1, 0) offset 1 - ld a, (B_m0) - ld c, a - ld a, (A_m1) - call mul8u - call add16_to_P_at1 + ; (1,0) offset 1 + ld a,(B_m0) + ld c,a + ld a,(A_m1) + call mul8u + call add16_to_P_at1 - ; (1, 1) offset 2 - ld a, (B_m1) - ld c, a - ld a, (A_m1) - call mul8u - call add16_to_P_at2 + ; (1,1) offset 2 + ld a,(B_m1) + ld c,a + ld a,(A_m1) + call mul8u + call add16_to_P_at2 - ; (1, 2) offset 3 - ld a, (B_m2) - ld c, a - ld a, (A_m1) - call mul8u - call add16_to_P_at3 + ; (1,2) offset 3 + ld a,(B_m2) + ld c,a + ld a,(A_m1) + call mul8u + call add16_to_P_at3 - ; (2, 0) offset 2 - ld a, (B_m0) - ld c, a - ld a, (A_m2) - call mul8u - call add16_to_P_at2 + ; (2,0) offset 2 + ld a,(B_m0) + ld c,a + ld a,(A_m2) + call mul8u + call add16_to_P_at2 - ; (2, 1) offset 3 - ld a, (B_m1) - ld c, a - ld a, (A_m2) - call mul8u - call add16_to_P_at3 + ; (2,1) offset 3 + ld a,(B_m1) + ld c,a + ld a,(A_m2) + call mul8u + call add16_to_P_at3 - ; (2, 2) offset 4 - ld a, (B_m2) - ld c, a - ld a, (A_m2) - call mul8u - call add16_to_P_at4 + ; (2,2) offset 4 + ld a,(B_m2) + ld c,a + ld a,(A_m2) + call mul8u + call add16_to_P_at4 + + ret - ret add16_to_P_at0: - ld a, (P0) - add a, l - ld (P0), a - ld a, (P1) - adc a, h - ld (P1), a - ret - + ld a,(P0) + add a,l + ld (P0),a + ld a,(P1) + adc a,h + ld (P1),a + ret add16_to_P_at1: - ld a, (P1) - add a, l - ld (P1), a - ld a, (P2) - adc a, h - ld (P2), a - ret - + ld a,(P1) + add a,l + ld (P1),a + ld a,(P2) + adc a,h + ld (P2),a + ret add16_to_P_at2: - ld a, (P2) - add a, l - ld (P2), a - ld a, (P3) - adc a, h - ld (P3), a - ret - + ld a,(P2) + add a,l + ld (P2),a + ld a,(P3) + adc a,h + ld (P3),a + ret add16_to_P_at3: - ld a, (P3) - add a, l - ld (P3), a - ld a, (P4) - adc a, h - ld (P4), a - ret - + ld a,(P3) + add a,l + ld (P3),a + ld a,(P4) + adc a,h + ld (P4),a + ret add16_to_P_at4: - ld a, (P4) - add a, l - ld (P4), a - ld a, (P5) - adc a, h - ld (P5), a - ret + ld a,(P4) + add a,l + ld (P4),a + ld a,(P5) + adc a,h + ld (P5),a + ret - ; ============================================================ - ; Normalize product P into A mantissa - ; P is 48-bit, P0 LSB .. P5 MSB - ; ============================================================ +; ============================================================ +; Normalize product P into A mantissa +; P is 48-bit, P0 LSB .. P5 MSB +; ============================================================ norm_product_to_A: - ld a, (P5) - bit 7, a - jr z, norm_product_shift23 - ld a, 24 - call shr48_P_by_A - ld a, (A_exp) - inc a - ld (A_exp), a - jr norm_product_take - + ld a,(P5) + bit 7,a + jr z,norm_product_shift23 + ld a,24 + call shr48_P_by_A + ld a,(A_exp) + inc a + ld (A_exp),a + jr norm_product_take norm_product_shift23: - ld a, 23 - call shr48_P_by_A - + ld a,23 + call shr48_P_by_A norm_product_take: - ld a, (P2) - ld (A_m2), a - ld a, (P1) - ld (A_m1), a - ld a, (P0) - ld (A_m0), a - ret + ld a,(P2) + ld (A_m2),a + ld a,(P1) + ld (A_m1),a + ld a,(P0) + ld (A_m0),a + ret + shr48_P_by_A: - ld (SHCNT), a - ld a, (SHCNT) - or a - ret z - + ld (SHCNT),a + ld a,(SHCNT) + or a + ret z shr48_P_by_A_loop: - ld a, (P5) - srl a - ld (P5), a - ld a, (P4) - rr a - ld (P4), a - ld a, (P3) - rr a - ld (P3), a - ld a, (P2) - rr a - ld (P2), a - ld a, (P1) - rr a - ld (P1), a - ld a, (P0) - rr a - ld (P0), a - ld a, (SHCNT) - dec a - ld (SHCNT), a - jr nz, shr48_P_by_A_loop - ret + ld a,(P5) + srl a + ld (P5),a + ld a,(P4) + rr a + ld (P4),a + ld a,(P3) + rr a + ld (P3),a + ld a,(P2) + rr a + ld (P2),a + ld a,(P1) + rr a + ld (P1),a + ld a,(P0) + rr a + ld (P0),a + ld a,(SHCNT) + dec a + ld (SHCNT),a + jr nz,shr48_P_by_A_loop + ret - ; ============================================================ - ; Mantissa division (restoring-style) - ; A_m = (A_m << 23) / B_m - ; ============================================================ +; ============================================================ +; Mantissa division (restoring-style) +; A_m = (A_m << 23) / B_m +; ============================================================ div_mantissas_to_A: - ; P = A_m as 48-bit, then shift left 23 - xor a - ld (P3), a - ld (P4), a - ld (P5), a - ld a, (A_m0) - ld (P0), a - ld a, (A_m1) - ld (P1), a - ld a, (A_m2) - ld (P2), a + ; P = A_m as 48-bit, then shift left 23 + xor a + ld (P3),a + ld (P4),a + ld (P5),a + ld a,(A_m0) + ld (P0),a + ld a,(A_m1) + ld (P1),a + ld a,(A_m2) + ld (P2),a - ld a, 23 - call shl48_P_by_A + ld a,23 + call shl48_P_by_A - ; clear quotient - xor a - ld (A_m2), a - ld (A_m1), a - ld (A_m0), a - - ld b, 24 + ; clear quotient + xor a + ld (A_m2),a + ld (A_m1),a + ld (A_m0),a + ld b,24 div_mantissas_loop: - call shl24_A_1 - call shl48_P_1 - - ; subtract divisor from high 24 bits of P (P5..P3) - call sub24_Phigh_minus_B - jr c, div_mantissas_restore - ; success => set quotient LSB = 1 - ld a, (A_m0) - or 0x1 - ld (A_m0), a - jr div_mantissas_next + call shl24_A_1 + call shl48_P_1 + ; subtract divisor from high 24 bits of P (P5..P3) + call sub24_Phigh_minus_B + jr c,div_mantissas_restore + ; success => set quotient LSB = 1 + ld a,(A_m0) + or 0x1 + ld (A_m0),a + jr div_mantissas_next div_mantissas_restore: - call add24_Phigh_plus_B - + call add24_Phigh_plus_B div_mantissas_next: - djnz div_mantissas_loop - ret + djnz div_mantissas_loop + ret + shl48_P_by_A: - ld (SHCNT), a - ld a, (SHCNT) - or a - ret z - + ld (SHCNT),a + ld a,(SHCNT) + or a + ret z shl48_P_by_A_loop: - call shl48_P_1 - ld a, (SHCNT) - dec a - ld (SHCNT), a - jr nz, shl48_P_by_A_loop - ret + call shl48_P_1 + ld a,(SHCNT) + dec a + ld (SHCNT),a + jr nz,shl48_P_by_A_loop + ret + shl48_P_1: - ld a, (P0) - add a, a - ld (P0), a - ld a, (P1) - adc a, a - ld (P1), a - ld a, (P2) - adc a, a - ld (P2), a - ld a, (P3) - adc a, a - ld (P3), a - ld a, (P4) - adc a, a - ld (P4), a - ld a, (P5) - adc a, a - ld (P5), a - ret + ld a,(P0) + add a,a + ld (P0),a + ld a,(P1) + adc a,a + ld (P1),a + ld a,(P2) + adc a,a + ld (P2),a + ld a,(P3) + adc a,a + ld (P3),a + ld a,(P4) + adc a,a + ld (P4),a + ld a,(P5) + adc a,a + ld (P5),a + ret + sub24_Phigh_minus_B: - ld a, (B_m0) - ld b, a - ld a, (P3) - sub b - ld (P3), a - ld a, (B_m1) - ld b, a - ld a, (P4) - sbc a, b - ld (P4), a - ld a, (B_m2) - ld b, a - ld a, (P5) - sbc a, b - ld (P5), a - ret ; carry set indicates borrow + ld a,(B_m0) + ld b,a + ld a,(P3) + sub b + ld (P3),a + ld a,(B_m1) + ld b,a + ld a,(P4) + sbc a,b + ld (P4),a + ld a,(B_m2) + ld b,a + ld a,(P5) + sbc a,b + ld (P5),a + ret ; carry set indicates borrow + add24_Phigh_plus_B: - ld a, (B_m0) - ld b, a - ld a, (P3) - add a, b - ld (P3), a - ld a, (B_m1) - ld b, a - ld a, (P4) - adc a, b - ld (P4), a - ld a, (B_m2) - ld b, a - ld a, (P5) - adc a, b - ld (P5), a - ret + ld a,(B_m0) + ld b,a + ld a,(P3) + add a,b + ld (P3),a + ld a,(B_m1) + ld b,a + ld a,(P4) + adc a,b + ld (P4),a + ld a,(B_m2) + ld b,a + ld a,(P5) + adc a,b + ld (P5),a + ret - ; ============================================================ - ; fp_print: fixed format printing - ; Prints: [-]I.FFFFFF (FRAC_DIGITS digits) - ; Uses os_print_vec (A=char) - ; ============================================================ +; ============================================================ +; fp_print: fixed format printing +; Prints: [-]I.FFFFFF (FRAC_DIGITS digits) +; Uses os_print_vec (A=char) +; ============================================================ fp_print: - ; zero? - ld a, (hl) - or a - jr nz, fp_print_nz - ld a, '0' - call os_print_vec - ld a, '.' - call os_print_vec - ld b, FRAC_DIGITS - + ; zero? + ld a,(hl) + or a + jr nz,fp_print_nz + ld a,'0' + call os_print_vec + ld a,'.' + call os_print_vec + ld b,FRAC_DIGITS fp_print_zf: - ld a, '0' - call os_print_vec - djnz fp_print_zf - ret + ld a,'0' + call os_print_vec + djnz fp_print_zf + ret fp_print_nz: - ; EXP -> PR_E (unbiased) - ld a, (hl) - sub FP_BIAS - ld (PR_E), a - inc hl - - ; sign + top fraction - ld a, (hl) - ld b, a - and 0x80 - jp z, fp_print_ps0 - ld a, 1 - jr fp_print_ps1 + ; EXP -> PR_E (unbiased) + ld a,(hl) + sub FP_BIAS + ld (PR_E),a + inc hl + ; sign + top fraction + ld a,(hl) + ld b,a + and 0x80 + jp z,fp_print_ps0 + ld a,1 + jr fp_print_ps1 fp_print_ps0: - xor a - + xor a fp_print_ps1: - ld (PR_SIGN), a + ld (PR_SIGN),a - ; mantissa with hidden 1 inserted - ld a, b - and 0x7F - or 0x80 - ld (PR_M2), a - inc hl - ld a, (hl) - ld (PR_M1), a - inc hl - ld a, (hl) - ld (PR_M0), a - - ; print '-' - ld a, (PR_SIGN) - or a - jp z, fp_print_mag - ld a, '-' - call os_print_vec + ; mantissa with hidden 1 inserted + ld a,b + and 0x7F + or 0x80 + ld (PR_M2),a + inc hl + ld a,(hl) + ld (PR_M1),a + inc hl + ld a,(hl) + ld (PR_M0),a + ; print '-' + ld a,(PR_SIGN) + or a + jp z,fp_print_mag + ld a,'-' + call os_print_vec fp_print_mag: - ; S = (E - 23) - ld a, (PR_E) - sub 23 + ; S = (E - 23) + ld a,(PR_E) + sub 23 - ; clear int and remainder helpers - xor a - ld (PR_INT0), a - ld (PR_INT1), a - ld (PR_INT2), a - ld (PR_INT3), a - ld (PR_R3), a + ; clear int and remainder helpers + xor a + ld (PR_INT0),a + ld (PR_INT1),a + ld (PR_INT2),a + ld (PR_INT3),a + ld (PR_R3),a - bit 7, a - jp z, fp_print_S_nonneg + bit 7,a + jp z,fp_print_S_nonneg - ; S negative: INT = [M2][M1][M0][00] (i.e., M << 8), then shift right by -S - neg - ld b, a; B = shift count + ; S negative: INT = [M2][M1][M0][00] (i.e., M << 8), then shift right by -S + neg + ld b,a ; B = shift count - xor a - ld (PR_INT0), a - ld a, (PR_M0) - ld (PR_INT1), a - ld a, (PR_M1) - ld (PR_INT2), a - ld a, (PR_M2) - ld (PR_INT3), a + xor a + ld (PR_INT0),a + ld a,(PR_M0) + ld (PR_INT1),a + ld a,(PR_M1) + ld (PR_INT2),a + ld a,(PR_M2) + ld (PR_INT3),a - call shr32_INT_to_INT_with_remainder - jr fp_print_print_int_and_frac + call shr32_INT_to_INT_with_remainder + jr fp_print_print_int_and_frac fp_print_S_nonneg: - ; S non-negative: INT = M (24-bit) then shift left S (cap at 31) - cp 32 - jr c, fp_print_doShl - ld a, 31 - + ; S non-negative: INT = M (24-bit) then shift left S (cap at 31) + cp 32 + jr c,fp_print_doShl + ld a,31 fp_print_doShl: - ld b, a - ld a, (PR_M0) - ld (PR_INT0), a - ld a, (PR_M1) - ld (PR_INT1), a - ld a, (PR_M2) - ld (PR_INT2), a - xor a - ld (PR_INT3), a - call shl32_INT_by_B + ld b,a + ld a,(PR_M0) + ld (PR_INT0),a + ld a,(PR_M1) + ld (PR_INT1),a + ld a,(PR_M2) + ld (PR_INT2),a + xor a + ld (PR_INT3),a + call shl32_INT_by_B fp_print_print_int_and_frac: - call print_u32_dec - ld a, '.' - call os_print_vec - ld b, FRAC_DIGITS - + call print_u32_dec + ld a,'.' + call os_print_vec + ld b,FRAC_DIGITS fp_print_fr: - call mul_remainder_by_10 - ld a, (PR_R3) - add a, '0' - call os_print_vec - xor a - ld (PR_R3), a - djnz fp_print_fr - ret + call mul_remainder_by_10 + ld a,(PR_R3) + add a,'0' + call os_print_vec + xor a + ld (PR_R3),a + djnz fp_print_fr + ret - ; Shift-right PR_INT by B, collect shifted-out bits into PR_R3 (simplified) +; Shift-right PR_INT by B, collect shifted-out bits into PR_R3 (simplified) shr32_INT_to_INT_with_remainder: - xor a - ld (PR_R3), a - ld a, b - or a - ret z - + xor a + ld (PR_R3),a + ld a,b + or a + ret z shr32_INT_to_INT_with_remainder_loop: - ld a, (PR_INT3) - srl a - ld (PR_INT3), a - ld a, (PR_INT2) - rr a - ld (PR_INT2), a - ld a, (PR_INT1) - rr a - ld (PR_INT1), a - ld a, (PR_INT0) - rr a - ld (PR_INT0), a - ; carry has shifted-out bit; accumulate into PR_R3 - ld a, (PR_R3) - add a, a - adc a, 0 - ld (PR_R3), a - djnz shr32_INT_to_INT_with_remainder_loop - ret + ld a,(PR_INT3) + srl a + ld (PR_INT3),a + ld a,(PR_INT2) + rr a + ld (PR_INT2),a + ld a,(PR_INT1) + rr a + ld (PR_INT1),a + ld a,(PR_INT0) + rr a + ld (PR_INT0),a + ; carry has shifted-out bit; accumulate into PR_R3 + ld a,(PR_R3) + add a,a + adc a,0 + ld (PR_R3),a + djnz shr32_INT_to_INT_with_remainder_loop + ret + shl32_INT_by_B: - ld a, b - or a - ret z - + ld a,b + or a + ret z shl32_INT_by_B_loop: - ld a, (PR_INT0) - add a, a - ld (PR_INT0), a - ld a, (PR_INT1) - adc a, a - ld (PR_INT1), a - ld a, (PR_INT2) - adc a, a - ld (PR_INT2), a - ld a, (PR_INT3) - adc a, a - ld (PR_INT3), a - djnz shl32_INT_by_B_loop - ret + ld a,(PR_INT0) + add a,a + ld (PR_INT0),a + ld a,(PR_INT1) + adc a,a + ld (PR_INT1),a + ld a,(PR_INT2) + adc a,a + ld (PR_INT2),a + ld a,(PR_INT3) + adc a,a + ld (PR_INT3),a + djnz shl32_INT_by_B_loop + ret + mul_remainder_by_10: - ld a, (PR_R3) - ld b, a - add a, a; *2 - add a, a; *4 - add a, a; *8 - add a, b; *9 - add a, b; *10 - ld (PR_R3), a - ret + ld a,(PR_R3) + ld b,a + add a,a ; *2 + add a,a ; *4 + add a,a ; *8 + add a,b ; *9 + add a,b ; *10 + ld (PR_R3),a + ret - ; Print PR_INT (u32) as decimal +; Print PR_INT (u32) as decimal print_u32_dec: - ld a, (PR_INT0) - ld b, a - ld a, (PR_INT1) - or b - ld b, a - ld a, (PR_INT2) - or b - ld b, a - ld a, (PR_INT3) - or b - jr nz, print_u32_dec_nz - ld a, '0' - call os_print_vec - ret - + ld a,(PR_INT0) + ld b,a + ld a,(PR_INT1) + or b + ld b,a + ld a,(PR_INT2) + or b + ld b,a + ld a,(PR_INT3) + or b + jr nz,print_u32_dec_nz + ld a,'0' + call os_print_vec + ret print_u32_dec_nz: - xor a - ld (DIGLEN), a - + xor a + ld (DIGLEN),a print_u32_dec_dloop: - call u32_div10_inplace; remainder in A, quotient back in PR_INT - ld hl, DIGBUF - ld b, 0 - ld a, (DIGLEN) - ld c, a - add hl, bc - add a, '0' - ld (hl), a - ld a, (DIGLEN) - inc a - ld (DIGLEN), a - ld a, (PR_INT0) - ld b, a - ld a, (PR_INT1) - or b - ld b, a - ld a, (PR_INT2) - or b - ld b, a - ld a, (PR_INT3) - or b - jr nz, print_u32_dec_dloop - - ; print in reverse - ld a, (DIGLEN) - ld b, a + call u32_div10_inplace ; remainder in A, quotient back in PR_INT + ld hl,DIGBUF + ld b,0 + ld a,(DIGLEN) + ld c,a + add hl,bc + add a,'0' + ld (hl),a + ld a,(DIGLEN) + inc a + ld (DIGLEN),a + ld a,(PR_INT0) + ld b,a + ld a,(PR_INT1) + or b + ld b,a + ld a,(PR_INT2) + or b + ld b,a + ld a,(PR_INT3) + or b + jr nz,print_u32_dec_dloop + ; print in reverse + ld a,(DIGLEN) + ld b,a print_u32_dec_pr: - dec b - ld hl, DIGBUF - ld c, b - ld b, 0 - add hl, bc - ld a, (hl) - call os_print_vec - ld a, c - or a - jr nz, print_u32_dec_pr - ret + dec b + ld hl,DIGBUF + ld c,b + ld b,0 + add hl,bc + ld a,(hl) + call os_print_vec + ld a,c + or a + jr nz,print_u32_dec_pr + ret - ; Divide PR_INT (u32) by 10, return remainder in A (0..9) +; Divide PR_INT (u32) by 10, return remainder in A (0..9) u32_div10_inplace: - ld b, 0; remainder - ld hl, PR_INT3 - call u32_div10_step - inc hl - call u32_div10_step - inc hl - call u32_div10_step - inc hl - call u32_div10_step - ld a, b - ret - + ld b,0 ; remainder + ld hl,PR_INT3 + call u32_div10_step + inc hl + call u32_div10_step + inc hl + call u32_div10_step + inc hl + call u32_div10_step + ld a,b + ret u32_div10_step: - ; DE = remainder*256 + byte - ld a, b - ld d, a - ld e, (hl) - ld c, 0; quotient byte - + ; DE = remainder*256 + byte + ld a,b + ld d,a + ld e,(hl) + ld c,0 ; quotient byte u32_div10_div: - ld a, d - or a - jr nz, u32_div10_sub - ld a, e - cp 10 - jr c, u32_div10_done - + ld a,d + or a + jr nz,u32_div10_sub + ld a,e + cp 10 + jr c,u32_div10_done u32_div10_sub: - ld a, e - sub 10 - ld e, a - ld a, d - sbc a, 0 - ld d, a - inc c - jr u32_div10_div - + ld a,e + sub 10 + ld e,a + ld a,d + sbc a,0 + ld d,a + inc c + jr u32_div10_div u32_div10_done: - ld (hl), c - ld b, e - ret + ld (hl),c + ld b,e + ret - ; ============================================================ - ; fp_parse: parse decimal string -> float - ; DE -> "[-]ddd[.ddd]\0" - ; HL -> output float - ; ============================================================ +; ============================================================ +; fp_parse: parse decimal string -> float +; DE -> "[-]ddd[.ddd]\0" +; HL -> output float +; ============================================================ fp_parse: - xor a - ld (P_SIGN), a - ld (P_FRACN), a - ld (P_S0), a - ld (P_S1), a - ld (P_S2), a - ld (P_S3), a - - ; optional sign - ld a, (de) - cp '-' - jr nz, fp_parse_chkplus - ld a, 1 - ld (P_SIGN), a - inc de - jr fp_parse_intpart + xor a + ld (P_SIGN),a + ld (P_FRACN),a + ld (P_S0),a + ld (P_S1),a + ld (P_S2),a + ld (P_S3),a + ; optional sign + ld a,(de) + cp '-' + jr nz,fp_parse_chkplus + ld a,1 + ld (P_SIGN),a + inc de + jr fp_parse_intpart fp_parse_chkplus: - ld a, (de) - cp '+' - jr nz, fp_parse_intpart - inc de + ld a,(de) + cp '+' + jr nz,fp_parse_intpart + inc de fp_parse_intpart: - ld a, (de) - call is_digit - jr nc, fp_parse_maybe_dot - + ld a,(de) + call is_digit + jr nc,fp_parse_maybe_dot fp_parse_il: - ld a, (de) - sub '0' - ld c, a - call u32_mul10_scaled - call u32_add8_scaled - inc de - ld a, (de) - call is_digit - jr c, fp_parse_il + ld a,(de) + sub '0' + ld c,a + call u32_mul10_scaled + call u32_add8_scaled + inc de + ld a,(de) + call is_digit + jr c,fp_parse_il fp_parse_maybe_dot: - ld a, (de) - cp '.' - jr nz, fp_parse_finish_scaled - inc de - - ld b, MAX_FRAC + ld a,(de) + cp '.' + jr nz,fp_parse_finish_scaled + inc de + ld b,MAX_FRAC fp_parse_fl: - ld a, (de) - call is_digit - jr nc, fp_parse_finish_scaled - ld a, (de) - sub '0' - ld c, a - call u32_mul10_scaled - call u32_add8_scaled - ld a, (P_FRACN) - inc a - ld (P_FRACN), a - inc de - djnz fp_parse_fl + ld a,(de) + call is_digit + jr nc,fp_parse_finish_scaled + ld a,(de) + sub '0' + ld c,a + call u32_mul10_scaled + call u32_add8_scaled + ld a,(P_FRACN) + inc a + ld (P_FRACN),a + inc de + djnz fp_parse_fl fp_parse_finish_scaled: - ; convert scaled u32 to float into (HL) - call fp_from_u32_scaled_to_A + ; convert scaled u32 to float into (HL) + call fp_from_u32_scaled_to_A - ; divide by 10^k if needed - ld a, (P_FRACN) - or a - jp z, fp_parse_apply_sign + ; divide by 10^k if needed + ld a,(P_FRACN) + or a + jp z,fp_parse_apply_sign - ; DE = &pow10_table[k] - push hl - ld e, a - ld d, 0 - ld hl, pow10_table - add hl, de - add hl, de - add hl, de - add hl, de - ex de, hl - pop hl - call fp_div + ; DE = &pow10_table[k] + push hl + ld e,a + ld d,0 + ld hl,pow10_table + add hl,de + add hl,de + add hl,de + add hl,de + ex de,hl + pop hl + call fp_div fp_parse_apply_sign: - ld a, (P_SIGN) - or a - ret z - inc hl - ld a, (hl) - xor 0x80 - ld (hl), a - ret + ld a,(P_SIGN) + or a + ret z + inc hl + ld a,(hl) + xor 0x80 + ld (hl),a + ret + is_digit: - cp '0' - jr c, is_digit_no - cp '9'+1 - jr nc, is_digit_no - scf - ret - + cp '0' + jr c,is_digit_no + cp '9'+1 + jr nc,is_digit_no + scf + ret is_digit_no: - or a - ret + or a + ret - ; P_S = P_S*10 (uses PR_INT and PR_R0..3 as scratch) +; P_S = P_S*10 (uses PR_INT and PR_R0..3 as scratch) u32_mul10_scaled: - ; PR_INT = P - ld a, (P_S0) - ld (PR_INT0), a - ld a, (P_S1) - ld (PR_INT1), a - ld a, (P_S2) - ld (PR_INT2), a - ld a, (P_S3) - ld (PR_INT3), a - ; PR_INT *=2 - ld b, 1 - call shl32_INT_by_B + ; PR_INT = P + ld a,(P_S0) + ld (PR_INT0),a + ld a,(P_S1) + ld (PR_INT1),a + ld a,(P_S2) + ld (PR_INT2),a + ld a,(P_S3) + ld (PR_INT3),a + ; PR_INT *=2 + ld b,1 + call shl32_INT_by_B - ; PR_R = P - ld a, (P_S0) - ld (PR_R0), a - ld a, (P_S1) - ld (PR_R1), a - ld a, (P_S2) - ld (PR_R2), a - ld a, (P_S3) - ld (PR_R3), a - ; PR_R *=8 (shift left 3) - ld b, 3 - call shl32_R_by_B + ; PR_R = P + ld a,(P_S0) + ld (PR_R0),a + ld a,(P_S1) + ld (PR_R1),a + ld a,(P_S2) + ld (PR_R2),a + ld a,(P_S3) + ld (PR_R3),a + ; PR_R *=8 (shift left 3) + ld b,3 + call shl32_R_by_B + + ; P = PR_INT + PR_R + ld a,(PR_R0) + ld b,a + ld a,(PR_INT0) + add a,b + ld (P_S0),a + ld a,(PR_R1) + ld b,a + ld a,(PR_INT1) + adc a,b + ld (P_S1),a + ld a,(PR_R2) + ld b,a + ld a,(PR_INT2) + adc a,b + ld (P_S2),a + ld a,(PR_R3) + ld b,a + ld a,(PR_INT3) + adc a,b + ld (P_S3),a + ret - ; P = PR_INT + PR_R - ld a, (PR_R0) - ld b, a - ld a, (PR_INT0) - add a, b - ld (P_S0), a - ld a, (PR_R1) - ld b, a - ld a, (PR_INT1) - adc a, b - ld (P_S1), a - ld a, (PR_R2) - ld b, a - ld a, (PR_INT2) - adc a, b - ld (P_S2), a - ld a, (PR_R3) - ld b, a - ld a, (PR_INT3) - adc a, b - ld (P_S3), a - ret shl32_R_by_B: - ld a, b - or a - ret z - + ld a,b + or a + ret z shl32_R_by_B_loop: - ld a, (PR_R0) - add a, a - ld (PR_R0), a - ld a, (PR_R1) - adc a, a - ld (PR_R1), a - ld a, (PR_R2) - adc a, a - ld (PR_R2), a - ld a, (PR_R3) - adc a, a - ld (PR_R3), a - djnz shl32_R_by_B_loop - ret + ld a,(PR_R0) + add a,a + ld (PR_R0),a + ld a,(PR_R1) + adc a,a + ld (PR_R1),a + ld a,(PR_R2) + adc a,a + ld (PR_R2),a + ld a,(PR_R3) + adc a,a + ld (PR_R3),a + djnz shl32_R_by_B_loop + ret - ; P_S += C (0..9) +; P_S += C (0..9) u32_add8_scaled: - ld a, (P_S0) - add a, c - ld (P_S0), a - ld a, (P_S1) - adc a, 0 - ld (P_S1), a - ld a, (P_S2) - adc a, 0 - ld (P_S2), a - ld a, (P_S3) - adc a, 0 - ld (P_S3), a - ret + ld a,(P_S0) + add a,c + ld (P_S0),a + ld a,(P_S1) + adc a,0 + ld (P_S1),a + ld a,(P_S2) + adc a,0 + ld (P_S2),a + ld a,(P_S3) + adc a,0 + ld (P_S3),a + ret - ; Convert P_S (u32) to float at (HL). Positive only; sign handled by caller. +; Convert P_S (u32) to float at (HL). Positive only; sign handled by caller. fp_from_u32_scaled_to_A: - ld a, (P_S0) - ld b, a - ld a, (P_S1) - or b - ld b, a - ld a, (P_S2) - or b - ld b, a - ld a, (P_S3) - or b - jr nz, fp_from_u32_scaled_to_A_nz - ld (hl), 0 - inc hl - ld (hl), 0 - inc hl - ld (hl), 0 - inc hl - ld (hl), 0 - ret + ld a,(P_S0) + ld b,a + ld a,(P_S1) + or b + ld b,a + ld a,(P_S2) + or b + ld b,a + ld a,(P_S3) + or b + jr nz,fp_from_u32_scaled_to_A_nz + ld (hl),0 + inc hl + ld (hl),0 + inc hl + ld (hl),0 + inc hl + ld (hl),0 + ret fp_from_u32_scaled_to_A_nz: - ; find MSB index in B (0..31) - ld b, 31 - ld a, (P_S3) - ld c, a - or a - jr nz, fp_from_u32_scaled_to_A_scan - ld b, 23 - ld a, (P_S2) - ld c, a - or a - jr nz, fp_from_u32_scaled_to_A_scan - ld b, 15 - ld a, (P_S1) - ld c, a - or a - jr nz, fp_from_u32_scaled_to_A_scan - ld b, 7 - ld a, (P_S0) - ld c, a - + ; find MSB index in B (0..31) + ld b,31 + ld a,(P_S3) + ld c,a + or a + jr nz,fp_from_u32_scaled_to_A_scan + ld b,23 + ld a,(P_S2) + ld c,a + or a + jr nz,fp_from_u32_scaled_to_A_scan + ld b,15 + ld a,(P_S1) + ld c,a + or a + jr nz,fp_from_u32_scaled_to_A_scan + ld b,7 + ld a,(P_S0) + ld c,a fp_from_u32_scaled_to_A_scan: fp_from_u32_scaled_to_A_find: - bit 7, c - jr nz, fp_from_u32_scaled_to_A_found - ld a, c - add a, a - ld c, a - dec b - jr fp_from_u32_scaled_to_A_find - + bit 7,c + jr nz,fp_from_u32_scaled_to_A_found + ld a,c + add a,a + ld c,a + dec b + jr fp_from_u32_scaled_to_A_find fp_from_u32_scaled_to_A_found: - ; EXP = FP_BIAS + B - ld a, b - add a, FP_BIAS - ld (hl), a - inc hl + ; EXP = FP_BIAS + B + ld a,b + add a,FP_BIAS + ld (hl),a + inc hl - ; shift value left by (23-B), take top 24 bits - ld a, 23 - sub b - ld b, a + ; shift value left by (23-B), take top 24 bits + ld a,23 + sub b + ld b,a - ; PR_INT = P_S - ld a, (P_S0) - ld (PR_INT0), a - ld a, (P_S1) - ld (PR_INT1), a - ld a, (P_S2) - ld (PR_INT2), a - ld a, (P_S3) - ld (PR_INT3), a - call shl32_INT_by_B + ; PR_INT = P_S + ld a,(P_S0) + ld (PR_INT0),a + ld a,(P_S1) + ld (PR_INT1),a + ld a,(P_S2) + ld (PR_INT2),a + ld a,(P_S3) + ld (PR_INT3),a + call shl32_INT_by_B - ; store sign=0, fraction = top 23 bits of mantissa (hidden 1 removed) - ld a, (PR_INT3) - and 0x7F - ld (hl), a - inc hl - ld a, (PR_INT2) - ld (hl), a - inc hl - ld a, (PR_INT1) - ld (hl), a - ret + ; store sign=0, fraction = top 23 bits of mantissa (hidden 1 removed) + ld a,(PR_INT3) + and 0x7F + ld (hl),a + inc hl + ld a,(PR_INT2) + ld (hl),a + inc hl + ld a,(PR_INT1) + ld (hl),a + ret - ; ============================================================ - ; BSS / WORKSPACE - ; ============================================================ - .balign 16 - .bss - ; Unpacked A - A_exp: .space 1 - A_sign: .space 1 - A_m2: .space 1 - A_m1: .space 1 - A_m0: .space 1 +; ============================================================ +; BSS / WORKSPACE +; ============================================================ +.balign 16 +.bss - ; Unpacked B - B_exp: .space 1 - B_sign: .space 1 - B_m2: .space 1 - B_m1: .space 1 - B_m0: .space 1 +; Unpacked A +A_exp: .space 1 +A_sign: .space 1 +A_m2: .space 1 +A_m1: .space 1 +A_m0: .space 1 - ; 48-bit workspace (P0 LSB .. P5 MSB) +; Unpacked B +B_exp: .space 1 +B_sign: .space 1 +B_m2: .space 1 +B_m1: .space 1 +B_m0: .space 1 -P0: - .space 1 +; 48-bit workspace (P0 LSB .. P5 MSB) +P0: .space 1 +P1: .space 1 +P2: .space 1 +P3: .space 1 +P4: .space 1 +P5: .space 1 -P1: - .space 1 +SHCNT: .space 1 -P2: - .space 1 +; Print temps +PR_SIGN: .space 1 +PR_E: .space 1 +PR_M2: .space 1 +PR_M1: .space 1 +PR_M0: .space 1 +PR_INT0: .space 1 +PR_INT1: .space 1 +PR_INT2: .space 1 +PR_INT3: .space 1 +PR_R0: .space 1 +PR_R1: .space 1 +PR_R2: .space 1 +PR_R3: .space 1 -P3: - .space 1 +; Parse temps +P_SIGN: .space 1 +P_FRACN: .space 1 +P_S0: .space 1 +P_S1: .space 1 +P_S2: .space 1 +P_S3: .space 1 -P4: - .space 1 - -P5: - .space 1 - -SHCNT: - .space 1 - - ; Print temps - PR_SIGN: .space 1 - PR_E: .space 1 - PR_M2: .space 1 - PR_M1: .space 1 - PR_M0: .space 1 - PR_INT0: .space 1 - PR_INT1: .space 1 - PR_INT2: .space 1 - PR_INT3: .space 1 - PR_R0: .space 1 - PR_R1: .space 1 - PR_R2: .space 1 - PR_R3: .space 1 - - ; Parse temps - P_SIGN: .space 1 - P_FRACN: .space 1 - P_S0: .space 1 - P_S1: .space 1 - P_S2: .space 1 - P_S3: .space 1 - - ; Digit buffer - -DIGBUF: - .space 1 - -DIGLEN: - .space 1 - - ; ============================================================ - ; pow10_table: 10^k constants (k=0..6) in THIS float encoding - ; Verified: - ; 1.0 = 127 00 00 00 - ; 10.0 = 130 20 00 00 - ; 100.0 = 133 48 00 00 - ; 1000.0 = 136 7A 00 00 - ; 10000.0 = 140 1C 40 00 - ; 100000.0 = 143 43 50 00 - ; 1000000.0= 146 74 24 00 - ; ============================================================ - .section "zone", "acrx" +; Digit buffer +DIGBUF: .space 1 +DIGLEN: .space 1 +; ============================================================ +; pow10_table: 10^k constants (k=0..6) in THIS float encoding +; Verified: +; 1.0 = 127 00 00 00 +; 10.0 = 130 20 00 00 +; 100.0 = 133 48 00 00 +; 1000.0 = 136 7A 00 00 +; 10000.0 = 140 1C 40 00 +; 100000.0 = 143 43 50 00 +; 1000000.0= 146 74 24 00 +; ============================================================ +.section "float","acrx" pow10_table: - .byte 127, 0x00, 0x00, 0x00; 10^0 = 1 - .byte 130, 0x20, 0x00, 0x00; 10^1 = 10 - .byte 133, 0x48, 0x00, 0x00; 10^2 = 100 - .byte 136, 0x7A, 0x00, 0x00; 10^3 = 1000 - .byte 140, 0x1C, 0x40, 0x00; 10^4 = 10000 - .byte 143, 0x43, 0x50, 0x00; 10^5 = 100000 - .byte 146, 0x74, 0x24, 0x00; 10^6 = 1000000 + .byte 127, 0x00, 0x00, 0x00 ; 10^0 = 1 + .byte 130, 0x20, 0x00, 0x00 ; 10^1 = 10 + .byte 133, 0x48, 0x00, 0x00 ; 10^2 = 100 + .byte 136, 0x7A, 0x00, 0x00 ; 10^3 = 1000 + .byte 140, 0x1C, 0x40, 0x00 ; 10^4 = 10000 + .byte 143, 0x43, 0x50, 0x00 ; 10^5 = 100000 + .byte 146, 0x74, 0x24, 0x00 ; 10^6 = 1000000 diff --git a/linker.cmd b/linker.cmd index 7d33d9d..787408d 100644 --- a/linker.cmd +++ b/linker.cmd @@ -2,10 +2,10 @@ /* SPDX-FileCopyrightText: (c) 2025 A.M. Rowsell */ SECTIONS { - .zone 0xA000 : { zone.o(zone) float.o(zone) *(.zone) *(zone) } + .zone 0xA000 : { *(.zone) *(zone) } .boot 0xC3C3 : { *(.boot) *(boot) } + .text 0xE000 : { *(.text) *(.float) *(float) } .vectors 0xF000 : { *(.vectors) } .data 0x7000 : { *(.data) } - __data_end = .; - .bss __data_end : { *(.bss) } + .bss : { *(.bss) } } diff --git a/zone.asm b/zone.asm index 39a949a..1935aac 100644 --- a/zone.asm +++ b/zone.asm @@ -1,135 +1,30 @@ - ; SPDX-License-Identifier: MPL-2.0 - ; SPDX-FileCopyrightText: (c) 2025 A.M. Rowsell - ; ============================================================ - ; ZONE OS main section - ; ============================================================ - .section "zone", "acrx" - .global os_print_vec - .global os_getch_vec - .global os_outbyte_vec - .global os_inbyte_vec - .global os_main_loop - .local os_print_signon +; SPDX-License-Identifier: MPL-2.0 +; SPDX-FileCopyrightText: (c) 2025 A.M. Rowsell +; ============================================================ +; ZONE OS main section +; ============================================================ +.section "zone","acrx" +.global zone_start +.global os_print_vec +.global os_getch_vec +.global os_outbyte_vec +.global os_inbyte_vec +.global os_main_loop - .set UARTSTATUS, 6 - .set UARTDATA, 7 +zone_start: + ret os_print_vec: - ; this takes a character in a and sends it to the UART - push af - push bc - push de - ld d, a - ld c, UARTSTATUS - -$1: - in a, (c); get status byte - bit 0, a; test bit 0 - jr z, $1; jump back if UART not ready - ld a, d - ld c, UARTDATA - out (c), a; send byte to uart - pop de - pop bc - pop af - ret + ret os_getch_vec: - ; this gets a character from the UART and puts in in a - push bc - ld c, UARTSTATUS - -a2: - in a, (c); get status byte - bit 1, a - jr z, a2 - ld c, UARTDATA - in a, (c) - pop bc - ret + ret os_outbyte_vec: - ; this takes a byte in a and sends it to port in hl - push af - push bc - push hl - ld c, l - out (c), a - pop hl - pop bc - pop af - ret + ret os_inbyte_vec: - ; this gets a byte from port in hl and returns it in a - push bc - push hl - ld c, l - in a, (c) - pop hl - pop bc - ret + ret os_main_loop: - call os_print_signon - -os_busy_loop: - jp os_busy_loop - - ; ====================== - ; non-vectored functions - ; ====================== - -os_print_signon: - ld hl, zoneSignon - ld a, (hl) - -$2: - call os_print_vec - inc hl - ld a, (hl) - jr nz, $2 - -$3: - ld hl, zoneCopyright - ld a, (hl) - -$4: - call os_print_vec - inc hl - ld a, (hl) - jr nz, $4 - ret - -zoneSignon: - .byte "ZONE OS ver. 0.01" - .byte 10, 13, 0 - -zoneCopyright: - .byte "(c) A.M. Rowsell, license MPLv2" - .byte 10, 13, 0 - -zonePrompt: - .string "z] " - - ; error messages - -errorSyntax: - .string "Syntax error!" - -errorTimeout: - .string "Command timed out." - -errorNotFound: - .string "Command/file not found." - -errorHardware: - .string "Hardware error!" - -errorRAMFailed: - .string "RAM test failed!" - -stringTable: - .word zoneSignon, zoneCopyright, zonePrompt - .word errorSyntax, errorTimeout, errorNotFound - .word errorHardware, errorRAMFailed + jp os_main_loop