From 856e9e955f0e5ddcd64c6d328f279e12a5973574 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 28 Aug 2020 20:01:00 +1000 Subject: [PATCH] core: Add framework for an FPU This adds the skeleton of a floating-point unit and implements the mffs and mtfsf instructions. Execute1 sends FP instructions to the FPU and receives busy, exception, FP interrupt and illegal interrupt signals from it. Signed-off-by: Paul Mackerras --- Makefile | 2 +- common.vhdl | 69 ++++++++++++++ core.vhdl | 34 +++++++ decode1.vhdl | 18 ++++ decode2.vhdl | 11 ++- decode_types.vhdl | 9 +- execute1.vhdl | 82 +++++++++++++---- fpu.vhdl | 185 ++++++++++++++++++++++++++++++++++++++ microwatt.core | 1 + scripts/fmt_log/fmt_log.c | 12 +-- writeback.vhdl | 27 +++++- 11 files changed, 417 insertions(+), 33 deletions(-) create mode 100644 fpu.vhdl diff --git a/Makefile b/Makefile index b584895..9fe2106 100644 --- a/Makefile +++ b/Makefile @@ -48,7 +48,7 @@ core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \ cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \ logical.vhdl countzero.vhdl multiply.vhdl divider.vhdl execute1.vhdl \ loadstore1.vhdl mmu.vhdl dcache.vhdl writeback.vhdl core_debug.vhdl \ - core.vhdl + core.vhdl fpu.vhdl soc_files = $(core_files) wishbone_arbiter.vhdl wishbone_bram_wrapper.vhdl sync_fifo.vhdl \ wishbone_debug_master.vhdl xics.vhdl syscon.vhdl soc.vhdl \ diff --git a/common.vhdl b/common.vhdl index e1ba844..f91ac18 100644 --- a/common.vhdl +++ b/common.vhdl @@ -94,6 +94,38 @@ package common is end record; constant xerc_init : xer_common_t := (others => '0'); + -- FPSCR bit numbers + constant FPSCR_FX : integer := 63 - 32; + constant FPSCR_FEX : integer := 63 - 33; + constant FPSCR_VX : integer := 63 - 34; + constant FPSCR_OX : integer := 63 - 35; + constant FPSCR_UX : integer := 63 - 36; + constant FPSCR_ZX : integer := 63 - 37; + constant FPSCR_XX : integer := 63 - 38; + constant FPSCR_VXSNAN : integer := 63 - 39; + constant FPSCR_VXISI : integer := 63 - 40; + constant FPSCR_VXIDI : integer := 63 - 41; + constant FPSCR_VXZDZ : integer := 63 - 42; + constant FPSCR_VXIMZ : integer := 63 - 43; + constant FPSCR_VXVC : integer := 63 - 44; + constant FPSCR_FR : integer := 63 - 45; + constant FPSCR_FI : integer := 63 - 46; + constant FPSCR_C : integer := 63 - 47; + constant FPSCR_FL : integer := 63 - 48; + constant FPSCR_FG : integer := 63 - 49; + constant FPSCR_FE : integer := 63 - 50; + constant FPSCR_FU : integer := 63 - 51; + constant FPSCR_VXSOFT : integer := 63 - 53; + constant FPSCR_VXSQRT : integer := 63 - 54; + constant FPSCR_VXCVI : integer := 63 - 55; + constant FPSCR_VE : integer := 63 - 56; + constant FPSCR_OE : integer := 63 - 57; + constant FPSCR_UE : integer := 63 - 58; + constant FPSCR_ZE : integer := 63 - 59; + constant FPSCR_XE : integer := 63 - 60; + constant FPSCR_NI : integer := 63 - 61; + constant FPSCR_RN : integer := 63 - 63; + type irq_state_t is (WRITE_SRR0, WRITE_SRR1); -- For now, fixed 16 sources, make this either a parametric @@ -413,6 +445,43 @@ package common is write_cr_data => (others => '0'), write_reg => (others => '0'), exc_write_reg => (others => '0'), exc_write_data => (others => '0')); + type Execute1ToFPUType is record + valid : std_ulogic; + op : insn_type_t; + nia : std_ulogic_vector(63 downto 0); + insn : std_ulogic_vector(31 downto 0); + single : std_ulogic; + fe_mode : std_ulogic_vector(1 downto 0); + fra : std_ulogic_vector(63 downto 0); + frb : std_ulogic_vector(63 downto 0); + frc : std_ulogic_vector(63 downto 0); + frt : gspr_index_t; + rc : std_ulogic; + out_cr : std_ulogic; + end record; + constant Execute1ToFPUInit : Execute1ToFPUType := (valid => '0', op => OP_ILLEGAL, nia => (others => '0'), + insn => (others => '0'), fe_mode => "00", rc => '0', + fra => (others => '0'), frb => (others => '0'), + frc => (others => '0'), frt => (others => '0'), + single => '0', out_cr => '0'); + + type FPUToExecute1Type is record + busy : std_ulogic; + exception : std_ulogic; + interrupt : std_ulogic; + illegal : std_ulogic; + end record; + + type FPUToWritebackType is record + valid : std_ulogic; + write_enable : std_ulogic; + write_reg : gspr_index_t; + write_data : std_ulogic_vector(63 downto 0); + write_cr_enable : std_ulogic; + write_cr_mask : std_ulogic_vector(7 downto 0); + write_cr_data : std_ulogic_vector(31 downto 0); + end record; + type DividerToExecute1Type is record valid: std_ulogic; write_reg_data: std_ulogic_vector(63 downto 0); diff --git a/core.vhdl b/core.vhdl index 81e11c8..b905297 100644 --- a/core.vhdl +++ b/core.vhdl @@ -80,6 +80,11 @@ architecture behave of core is signal mmu_to_dcache: MmuToDcacheType; signal dcache_to_mmu: DcacheToMmuType; + -- FPU signals + signal execute1_to_fpu: Execute1ToFPUType; + signal fpu_to_execute1: FPUToExecute1Type; + signal fpu_to_writeback: FPUToWritebackType; + -- local signals signal fetch1_stall_in : std_ulogic; signal icache_stall_out : std_ulogic; @@ -109,6 +114,7 @@ architecture behave of core is signal rst_dec1 : std_ulogic := '1'; signal rst_dec2 : std_ulogic := '1'; signal rst_ex1 : std_ulogic := '1'; + signal rst_fpu : std_ulogic := '1'; signal rst_ls1 : std_ulogic := '1'; signal rst_dbg : std_ulogic := '1'; signal alt_reset_d : std_ulogic; @@ -171,6 +177,7 @@ begin rst_dec1 <= core_rst; rst_dec2 <= core_rst; rst_ex1 <= core_rst; + rst_fpu <= core_rst; rst_ls1 <= core_rst; rst_dbg <= rst; alt_reset_d <= alt_reset; @@ -225,6 +232,7 @@ begin decode1_0: entity work.decode1 generic map( + HAS_FPU => HAS_FPU, LOG_LENGTH => LOG_LENGTH ) port map ( @@ -313,9 +321,11 @@ begin busy_out => ex1_busy_out, e_in => decode2_to_execute1, l_in => loadstore1_to_execute1, + fp_in => fpu_to_execute1, ext_irq_in => ext_irq, l_out => execute1_to_loadstore1, f_out => execute1_to_fetch1, + fp_out => execute1_to_fpu, e_out => execute1_to_writeback, icache_inval => ex1_icache_inval, dbg_msr_out => msr, @@ -326,6 +336,29 @@ begin log_wr_addr => log_wr_addr ); + with_fpu: if HAS_FPU generate + begin + fpu_0: entity work.fpu + port map ( + clk => clk, + rst => rst_fpu, + e_in => execute1_to_fpu, + e_out => fpu_to_execute1, + w_out => fpu_to_writeback + ); + end generate; + + no_fpu: if not HAS_FPU generate + begin + fpu_to_execute1.busy <= '0'; + fpu_to_execute1.exception <= '0'; + fpu_to_execute1.interrupt <= '0'; + fpu_to_execute1.illegal <= '0'; + fpu_to_writeback.valid <= '0'; + fpu_to_writeback.write_enable <= '0'; + fpu_to_writeback.write_cr_enable <= '0'; + end generate; + loadstore1_0: entity work.loadstore1 generic map ( HAS_FPU => HAS_FPU, @@ -381,6 +414,7 @@ begin clk => clk, e_in => execute1_to_writeback, l_in => loadstore1_to_writeback, + fp_in => fpu_to_writeback, w_out => writeback_to_register_file, c_out => writeback_to_cr_file, complete_out => complete diff --git a/decode1.vhdl b/decode1.vhdl index 29f0e50..afd37ef 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -8,6 +8,7 @@ use work.decode_types.all; entity decode1 is generic ( + HAS_FPU : boolean := true; -- Non-zero to enable log data collection LOG_LENGTH : natural := 0 ); @@ -55,6 +56,7 @@ architecture behaviour of decode1 is type op_30_subop_array_t is array(0 to 15) of decode_rom_t; type op_31_subop_array_t is array(0 to 1023) of decode_rom_t; type minor_rom_array_2_t is array(0 to 3) of decode_rom_t; + type op_63_subop_array_0_t is array(0 to 511) of decode_rom_t; constant major_decode_rom_array : major_rom_array_t := ( -- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl @@ -416,6 +418,15 @@ architecture behaviour of decode1 is others => decode_rom_init ); + -- indexed by bits 4..1 and 10..6 of instruction word + constant decode_op_63l_array : op_63_subop_array_0_t := ( + -- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl + -- op in out A out in out len ext pipe + 2#011110010# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 18/7=mffs family + 2#011110110# => (FPU, OP_FPOP_I, NONE, FRB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 22/7=mtfsf + others => illegal_inst + ); + -- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl -- op in out A out in out len ext pipe constant nop_instr : decode_rom_t := (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'); @@ -569,6 +580,13 @@ begin when 62 => v.decode := decode_op_62_array(to_integer(unsigned(f_in.insn(1 downto 0)))); + when 63 => + if HAS_FPU then + -- floating point operations, general and double-precision + v.decode := decode_op_63l_array(to_integer(unsigned(f_in.insn(4 downto 1) & f_in.insn(10 downto 6)))); + vi.override := f_in.insn(5); + end if; + when others => end case; diff --git a/decode2.vhdl b/decode2.vhdl index 6cc74c7..8b2ab8c 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -93,6 +93,12 @@ architecture behaviour of decode2 is case t is when RB => ret := ('1', gpr_to_gspr(insn_rb(insn_in)), reg_data); + when FRB => + if HAS_FPU then + ret := ('1', fpr_to_gspr(insn_frb(insn_in)), reg_data); + else + ret := ('0', (others => '0'), (others => '0')); + end if; when CONST_UI => ret := ('0', (others => '0'), std_ulogic_vector(resize(unsigned(insn_ui(insn_in)), 64))); when CONST_SI => @@ -296,6 +302,7 @@ begin r_out.read1_reg <= d_in.ispr1 when d_in.decode.input_reg_a = SPR else gpr_to_gspr(insn_ra(d_in.insn)); r_out.read2_reg <= d_in.ispr2 when d_in.decode.input_reg_b = SPR + else fpr_to_gspr(insn_frb(d_in.insn)) when d_in.decode.input_reg_b = FRB and HAS_FPU else gpr_to_gspr(insn_rb(d_in.insn)); r_out.read3_reg <= gpr_to_gspr(insn_rcreg(d_in.insn)) when d_in.decode.input_reg_c = RCR else fpr_to_gspr(insn_frt(d_in.insn)) when d_in.decode.input_reg_c = FRS and HAS_FPU @@ -321,7 +328,7 @@ begin mul_b := (others => '0'); --v.e.input_cr := d_in.decode.input_cr; - --v.e.output_cr := d_in.decode.output_cr; + v.e.output_cr := d_in.decode.output_cr; decoded_reg_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, r_in.read1_data, d_in.ispr1, d_in.nia); @@ -412,7 +419,7 @@ begin cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn); cr_bypass_avail <= '0'; - if EX1_BYPASS then + if EX1_BYPASS and d_in.decode.unit = ALU then cr_bypass_avail <= d_in.decode.output_cr; end if; diff --git a/decode_types.vhdl b/decode_types.vhdl index 8c20441..5eaef50 100644 --- a/decode_types.vhdl +++ b/decode_types.vhdl @@ -7,8 +7,9 @@ package decode_types is OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB, OP_CNTZ, OP_CROP, OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST, - OP_DCBZ, OP_DIV, OP_DIVE, OP_EXTS, - OP_EXTSWSLI, OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC, + OP_DCBZ, OP_DIV, OP_DIVE, OP_EXTS, OP_EXTSWSLI, + OP_FPOP, OP_FPOP_I, + OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC, OP_LOAD, OP_STORE, OP_FPLOAD, OP_FPSTORE, OP_MCRXRX, OP_MFCR, OP_MFMSR, OP_MFSPR, OP_MOD, @@ -24,7 +25,7 @@ package decode_types is ); type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR, CIA); type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD, - CONST_DXHI4, CONST_DS, CONST_M1, CONST_SH, CONST_SH32, SPR); + CONST_DXHI4, CONST_DS, CONST_M1, CONST_SH, CONST_SH32, SPR, FRB); type input_reg_c_t is (NONE, RS, RCR, FRS); type output_reg_a_t is (NONE, RT, RA, SPR, FRT); type rc_t is (NONE, ONE, RC); @@ -48,7 +49,7 @@ package decode_types is constant TOO_OFFSET : integer := 0; - type unit_t is (NONE, ALU, LDST); + type unit_t is (NONE, ALU, LDST, FPU); type length_t is (NONE, is1B, is2B, is4B, is8B); type decode_rom_t is record diff --git a/execute1.vhdl b/execute1.vhdl index 9d9b711..29713b2 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -27,12 +27,14 @@ entity execute1 is e_in : in Decode2ToExecute1Type; l_in : in Loadstore1ToExecute1Type; + fp_in : in FPUToExecute1Type; ext_irq_in : std_ulogic; -- asynchronous l_out : out Execute1ToLoadstore1Type; f_out : out Execute1ToFetch1Type; + fp_out : out Execute1ToFPUType; e_out : out Execute1ToWritebackType; @@ -54,6 +56,7 @@ architecture behaviour of execute1 is f : Execute1ToFetch1Type; busy: std_ulogic; terminate: std_ulogic; + fp_exception_next : std_ulogic; trace_next : std_ulogic; prev_op : insn_type_t; lr_update : std_ulogic; @@ -72,7 +75,8 @@ architecture behaviour of execute1 is end record; constant reg_type_init : reg_type := (e => Execute1ToWritebackInit, f => Execute1ToFetch1Init, - busy => '0', lr_update => '0', terminate => '0', trace_next => '0', prev_op => OP_ILLEGAL, + busy => '0', lr_update => '0', terminate => '0', + fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL, mul_in_progress => '0', mul_finish => '0', div_in_progress => '0', cntz_in_progress => '0', slow_op_insn => OP_ILLEGAL, slow_op_rc => '0', slow_op_oe => '0', slow_op_xerc => xerc_init, next_lr => (others => '0'), last_nia => (others => '0'), others => (others => '0')); @@ -268,7 +272,7 @@ begin b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2; c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3; - busy_out <= l_in.busy or r.busy; + busy_out <= l_in.busy or r.busy or fp_in.busy; valid_in <= e_in.valid and not busy_out; terminate_out <= r.terminate; @@ -334,6 +338,7 @@ begin variable spr_val : std_ulogic_vector(63 downto 0); variable addend : std_ulogic_vector(127 downto 0); variable do_trace : std_ulogic; + variable fv : Execute1ToFPUType; begin result := (others => '0'); sum_with_carry := (others => '0'); @@ -347,6 +352,7 @@ begin v.e := Execute1ToWritebackInit; lv := Execute1ToLoadstore1Init; v.f.redirect := '0'; + fv := Execute1ToFPUInit; -- XER forwarding. To avoid having to track XER hazards, we -- use the previously latched value. @@ -522,9 +528,11 @@ begin exception_nextpc := '0'; v.e.exc_write_enable := '0'; v.e.exc_write_reg := fast_spr_num(SPR_SRR0); - v.e.exc_write_data := e_in.nia; if valid_in = '1' then + v.e.exc_write_data := e_in.nia; v.last_nia := e_in.nia; + else + v.e.exc_write_data := r.last_nia; end if; v.e.mode_32bit := not ctrl.msr(MSR_SF); @@ -552,18 +560,27 @@ begin ctrl_tmp.msr(MSR_LE) <= '1'; v.e.valid := '1'; v.trace_next := '0'; + v.fp_exception_next := '0'; report "Writing SRR1: " & to_hstring(ctrl.srr1); - elsif r.trace_next = '1' and valid_in = '1' then - -- Generate a trace interrupt rather than executing the next instruction - -- or taking any asynchronous interrupt - v.f.redirect_nia := std_logic_vector(to_unsigned(16#d00#, 64)); - ctrl_tmp.srr1(63 - 33) <= '1'; - if r.prev_op = OP_LOAD or r.prev_op = OP_ICBI or r.prev_op = OP_ICBT or - r.prev_op = OP_DCBT or r.prev_op = OP_DCBST or r.prev_op = OP_DCBF then - ctrl_tmp.srr1(63 - 35) <= '1'; - elsif r.prev_op = OP_STORE or r.prev_op = OP_DCBZ or r.prev_op = OP_DCBTST then - ctrl_tmp.srr1(63 - 36) <= '1'; + elsif valid_in = '1' and ((HAS_FPU and r.fp_exception_next = '1') or r.trace_next = '1') then + if HAS_FPU and r.fp_exception_next = '1' then + -- This is used for FP-type program interrupts that + -- become pending due to MSR[FE0,FE1] changing from 00 to non-zero. + v.f.redirect_nia := std_logic_vector(to_unsigned(16#700#, 64)); + ctrl_tmp.srr1(63 - 43) <= '1'; + ctrl_tmp.srr1(63 - 47) <= '1'; + else + -- Generate a trace interrupt rather than executing the next instruction + -- or taking any asynchronous interrupt + v.f.redirect_nia := std_logic_vector(to_unsigned(16#d00#, 64)); + ctrl_tmp.srr1(63 - 33) <= '1'; + if r.prev_op = OP_LOAD or r.prev_op = OP_ICBI or r.prev_op = OP_ICBT or + r.prev_op = OP_DCBT or r.prev_op = OP_DCBST or r.prev_op = OP_DCBF then + ctrl_tmp.srr1(63 - 35) <= '1'; + elsif r.prev_op = OP_STORE or r.prev_op = OP_DCBZ or r.prev_op = OP_DCBTST then + ctrl_tmp.srr1(63 - 36) <= '1'; + end if; end if; exception := '1'; @@ -589,7 +606,7 @@ begin illegal := '1'; elsif HAS_FPU and valid_in = '1' and ctrl.msr(MSR_FP) = '0' and - (e_in.insn_type = OP_FPLOAD or e_in.insn_type = OP_FPSTORE) then + (e_in.unit = FPU or e_in.insn_type = OP_FPLOAD or e_in.insn_type = OP_FPSTORE) then -- generate a floating-point unavailable interrupt exception := '1'; v.f.redirect_nia := std_logic_vector(to_unsigned(16#800#, 64)); @@ -809,6 +826,10 @@ begin is_branch := '1'; taken_branch := '1'; abs_branch := '1'; + if HAS_FPU then + v.fp_exception_next := fp_in.exception and + (a_in(MSR_FE0) or a_in(MSR_FE1)); + end if; do_trace := '0'; when OP_CNTZ => @@ -980,6 +1001,10 @@ begin ctrl_tmp.msr(MSR_IR) <= '1'; ctrl_tmp.msr(MSR_DR) <= '1'; end if; + if HAS_FPU then + v.fp_exception_next := fp_in.exception and + (c_in(MSR_FE0) or c_in(MSR_FE1)); + end if; end if; when OP_MTSPR => report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) & @@ -1096,6 +1121,8 @@ begin lv.valid := '1'; elsif e_in.unit = NONE then illegal := '1'; + elsif HAS_FPU and e_in.unit = FPU then + fv.valid := '1'; end if; elsif r.f.redirect = '1' then @@ -1170,7 +1197,17 @@ begin v.e.valid := '1'; end if; - if illegal = '1' then + -- Generate FP-type program interrupt. fp_in.interrupt will only + -- be set during the execution of a FP instruction. + -- The case where MSR[FE0,FE1] goes from zero to non-zero is + -- handled above by mtmsrd and rfid setting v.fp_exception_next. + if HAS_FPU and fp_in.interrupt = '1' then + v.f.redirect_nia := std_logic_vector(to_unsigned(16#700#, 64)); + ctrl_tmp.srr1(63 - 43) <= '1'; + exception := '1'; + end if; + + if illegal = '1' or (HAS_FPU and fp_in.illegal = '1') then exception := '1'; v.f.redirect_nia := std_logic_vector(to_unsigned(16#700#, 64)); -- Since we aren't doing Hypervisor emulation assist (0xe40) we @@ -1216,7 +1253,6 @@ begin end if; v.e.exc_write_enable := '1'; v.e.exc_write_reg := fast_spr_num(SPR_SRR0); - v.e.exc_write_data := r.last_nia; report "ldst exception writing srr0=" & to_hstring(r.last_nia); end if; @@ -1261,6 +1297,19 @@ begin lv.mode_32bit := not ctrl.msr(MSR_SF); lv.is_32bit := e_in.is_32bit; + -- Outputs to FPU + fv.op := e_in.insn_type; + fv.nia := e_in.nia; + fv.insn := e_in.insn; + fv.single := e_in.is_32bit; + fv.fe_mode := ctrl.msr(MSR_FE0) & ctrl.msr(MSR_FE1); + fv.fra := a_in; + fv.frb := b_in; + fv.frc := c_in; + fv.frt := e_in.write_reg; + fv.rc := e_in.rc; + fv.out_cr := e_in.output_cr; + -- Update registers rin <= v; @@ -1268,6 +1317,7 @@ begin f_out <= r.f; l_out <= lv; e_out <= r.e; + fp_out <= fv; flush_out <= f_out.redirect; exception_log <= exception; diff --git a/fpu.vhdl b/fpu.vhdl new file mode 100644 index 0000000..b05ec9d --- /dev/null +++ b/fpu.vhdl @@ -0,0 +1,185 @@ +-- Floating-point unit for Microwatt + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +library work; +use work.insn_helpers.all; +use work.decode_types.all; +use work.crhelpers.all; +use work.helpers.all; +use work.common.all; + +entity fpu is + port ( + clk : in std_ulogic; + rst : in std_ulogic; + + e_in : in Execute1toFPUType; + e_out : out FPUToExecute1Type; + + w_out : out FPUToWritebackType + ); +end entity fpu; + +architecture behaviour of fpu is + + type state_t is (IDLE, + DO_MFFS, DO_MTFSF); + + type reg_type is record + state : state_t; + busy : std_ulogic; + instr_done : std_ulogic; + do_intr : std_ulogic; + op : insn_type_t; + insn : std_ulogic_vector(31 downto 0); + dest_fpr : gspr_index_t; + fe_mode : std_ulogic; + rc : std_ulogic; + is_cmp : std_ulogic; + single_prec : std_ulogic; + fpscr : std_ulogic_vector(31 downto 0); + b : std_ulogic_vector(63 downto 0); + writing_back : std_ulogic; + cr_result : std_ulogic_vector(3 downto 0); + cr_mask : std_ulogic_vector(7 downto 0); + end record; + + signal r, rin : reg_type; + + signal fp_result : std_ulogic_vector(63 downto 0); + +begin + fpu_0: process(clk) + begin + if rising_edge(clk) then + if rst = '1' then + r.state <= IDLE; + r.busy <= '0'; + r.instr_done <= '0'; + r.do_intr <= '0'; + r.fpscr <= (others => '0'); + r.writing_back <= '0'; + else + assert not (r.state /= IDLE and e_in.valid = '1') severity failure; + r <= rin; + end if; + end if; + end process; + + e_out.busy <= r.busy; + e_out.exception <= r.fpscr(FPSCR_FEX); + e_out.interrupt <= r.do_intr; + + w_out.valid <= r.instr_done and not r.do_intr; + w_out.write_enable <= r.writing_back; + w_out.write_reg <= r.dest_fpr; + w_out.write_data <= fp_result; + w_out.write_cr_enable <= r.instr_done and r.rc; + w_out.write_cr_mask <= r.cr_mask; + w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result & + r.cr_result & r.cr_result & r.cr_result & r.cr_result; + + fpu_1: process(all) + variable v : reg_type; + variable illegal : std_ulogic; + variable j, k : integer; + variable flm : std_ulogic_vector(7 downto 0); + begin + v := r; + illegal := '0'; + v.busy := '0'; + + -- capture incoming instruction + if e_in.valid = '1' then + v.insn := e_in.insn; + v.op := e_in.op; + v.fe_mode := or (e_in.fe_mode); + v.dest_fpr := e_in.frt; + v.single_prec := e_in.single; + v.rc := e_in.rc; + v.is_cmp := e_in.out_cr; + v.cr_mask := num_to_fxm(1); + v.b := e_in.frb; + end if; + + v.writing_back := '0'; + v.instr_done := '0'; + + case r.state is + when IDLE => + if e_in.valid = '1' then + case e_in.insn(5 downto 1) is + when "00111" => + if e_in.insn(8) = '0' then + v.state := DO_MFFS; + else + v.state := DO_MTFSF; + end if; + when others => + illegal := '1'; + end case; + end if; + + when DO_MFFS => + v.writing_back := '1'; + case r.insn(20 downto 16) is + when "00000" => + -- mffs + when others => + illegal := '1'; + end case; + v.instr_done := '1'; + v.state := IDLE; + + when DO_MTFSF => + if r.insn(25) = '1' then + flm := x"FF"; + elsif r.insn(16) = '1' then + flm := x"00"; + else + flm := r.insn(24 downto 17); + end if; + for i in 0 to 7 loop + k := i * 4; + if flm(i) = '1' then + v.fpscr(k + 3 downto k) := r.b(k + 3 downto k); + end if; + end loop; + v.instr_done := '1'; + v.state := IDLE; + + end case; + + -- Data path. + -- Just enough to read FPSCR for now. + fp_result <= x"00000000" & r.fpscr; + + v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or + (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI))); + v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and + v.fpscr(FPSCR_VE downto FPSCR_XE)); + if r.rc = '1' then + v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX); + end if; + + if illegal = '1' then + v.instr_done := '0'; + v.do_intr := '0'; + v.writing_back := '0'; + v.busy := '0'; + v.state := IDLE; + else + v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode; + if v.state /= IDLE or v.do_intr = '1' then + v.busy := '1'; + end if; + end if; + + rin <= v; + e_out.illegal <= illegal; + end process; + +end architecture behaviour; diff --git a/microwatt.core b/microwatt.core index 3b47339..7f2068d 100644 --- a/microwatt.core +++ b/microwatt.core @@ -23,6 +23,7 @@ filesets: - cr_hazard.vhdl - control.vhdl - execute1.vhdl + - fpu.vhdl - loadstore1.vhdl - mmu.vhdl - dcache.vhdl diff --git a/scripts/fmt_log/fmt_log.c b/scripts/fmt_log/fmt_log.c index eca4bf0..c61c8a5 100644 --- a/scripts/fmt_log/fmt_log.c +++ b/scripts/fmt_log/fmt_log.c @@ -84,17 +84,17 @@ struct log_entry { #define FLGA(i, y, z) (log.i? y: z) #define PNIA(f) (full_nia[log.f] & 0xff) -const char *units[4] = { "--", "al", "ls", "?3" }; +const char *units[4] = { "--", "al", "ls", "fp" }; const char *ops[64] = { "illegal", "nop ", "add ", "and ", "attn ", "b ", "bc ", "bcreg ", "bperm ", "cmp ", "cmpb ", "cmpeqb ", "cmprb ", "cntz ", "crop ", "darn ", "dcbf ", "dcbst ", "dcbt ", "dcbtst ", "dcbz ", "div ", "dive ", "exts ", - "extswsl", "icbi ", "icbt ", "isel ", "isync ", "ld ", "st ", "fpload ", - "fpstore", "mcrxrx ", "mfcr ", "mfmsr ", "mfspr ", "mod ", "mtcrf ", "mtmsr ", - "mtspr ", "mull64 ", "mulh64 ", "mulh32 ", "or ", "popcnt ", "prty ", "rfid ", - "rlc ", "rlcl ", "rlcr ", "sc ", "setb ", "shl ", "shr ", "sync ", - "tlbie ", "trap ", "xor ", "bcd ", "addg6s ", "ffail ", "?62 ", "?63 " + "extswsl", "fpop ", "fpopi ", "icbi ", "icbt ", "isel ", "isync ", "ld ", + "st ", "fpload ", "fpstore", "mcrxrx ", "mfcr ", "mfmsr ", "mfspr ", "mod ", + "mtcrf ", "mtmsr ", "mtspr ", "mull64 ", "mulh64 ", "mulh32 ", "or ", "popcnt ", + "prty ", "rfid ", "rlc ", "rlcl ", "rlcr ", "sc ", "setb ", "shl ", + "shr ", "sync ", "tlbie ", "trap ", "xor ", "bcd ", "addg6s ", "ffail ", }; const char *spr_names[13] = diff --git a/writeback.vhdl b/writeback.vhdl index d0230d8..95de0ec 100644 --- a/writeback.vhdl +++ b/writeback.vhdl @@ -12,6 +12,7 @@ entity writeback is e_in : in Execute1ToWritebackType; l_in : in Loadstore1ToWritebackType; + fp_in : in FPUToWritebackType; w_out : out WritebackToRegisterFileType; c_out : out WritebackToCrFileType; @@ -31,15 +32,21 @@ begin -- Do consistency checks only on the clock edge x(0) := e_in.valid; y(0) := l_in.valid; - assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure; + w(0) := fp_in.valid; + assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + + to_integer(unsigned(w))) <= 1 severity failure; x(0) := e_in.write_enable or e_in.exc_write_enable; y(0) := l_in.write_enable; - assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure; + w(0) := fp_in.write_enable; + assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + + to_integer(unsigned(w))) <= 1 severity failure; w(0) := e_in.write_cr_enable; x(0) := (e_in.write_enable and e_in.rc); - assert (to_integer(unsigned(w)) + to_integer(unsigned(x))) <= 1 severity failure; + y(0) := fp_in.write_cr_enable; + assert (to_integer(unsigned(w)) + to_integer(unsigned(x)) + + to_integer(unsigned(y))) <= 1 severity failure; end if; end process; @@ -53,7 +60,7 @@ begin c_out <= WritebackToCrFileInit; complete_out <= '0'; - if e_in.valid = '1' or l_in.valid = '1' then + if e_in.valid = '1' or l_in.valid = '1' or fp_in.valid = '1' then complete_out <= '1'; end if; @@ -79,6 +86,18 @@ begin c_out.write_xerc_data <= e_in.xerc; end if; + if fp_in.write_enable = '1' then + w_out.write_reg <= fp_in.write_reg; + w_out.write_data <= fp_in.write_data; + w_out.write_enable <= '1'; + end if; + + if fp_in.write_cr_enable = '1' then + c_out.write_cr_enable <= '1'; + c_out.write_cr_mask <= fp_in.write_cr_mask; + c_out.write_cr_data <= fp_in.write_cr_data; + end if; + if l_in.write_enable = '1' then w_out.write_reg <= l_in.write_reg; w_out.write_data <= l_in.write_data;