diff --git a/common.vhdl b/common.vhdl index 44198b0..ea0ec1d 100644 --- a/common.vhdl +++ b/common.vhdl @@ -12,15 +12,28 @@ package common is function decode_spr_num(insn: std_ulogic_vector(31 downto 0)) return spr_num_t; + constant SPR_XER : spr_num_t := 1; constant SPR_LR : spr_num_t := 8; constant SPR_CTR : spr_num_t := 9; constant SPR_TB : spr_num_t := 268; + -- The XER is split: the common bits (CA, OV, SO, OV32 and CA32) are + -- in the CR file as a kind of CR extension (with a separate write + -- control). The rest is stored as a fast SPR. + type xer_common_t is record + ca : std_ulogic; + ca32 : std_ulogic; + ov : std_ulogic; + ov32 : std_ulogic; + so : std_ulogic; + end record; + constant xerc_init : xer_common_t := (others => '0'); + + -- This needs to die... type ctrl_t is record lr: std_ulogic_vector(63 downto 0); ctr: std_ulogic_vector(63 downto 0); tb: std_ulogic_vector(63 downto 0); - carry: std_ulogic; end record; type Fetch1ToIcacheType is record @@ -64,8 +77,10 @@ package common is read_data2: std_ulogic_vector(63 downto 0); read_data3: std_ulogic_vector(63 downto 0); cr: std_ulogic_vector(31 downto 0); + xerc: xer_common_t; lr: std_ulogic; rc: std_ulogic; + oe: std_ulogic; invert_a: std_ulogic; invert_out: std_ulogic; input_carry: carry_in_t; @@ -78,9 +93,9 @@ package common is data_len: std_ulogic_vector(3 downto 0); end record; constant Decode2ToExecute1Init : Decode2ToExecute1Type := - (valid => '0', insn_type => OP_ILLEGAL, lr => '0', rc => '0', invert_a => '0', + (valid => '0', insn_type => OP_ILLEGAL, lr => '0', rc => '0', oe => '0', invert_a => '0', invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0', - is_32bit => '0', is_signed => '0', others => (others => '0')); + is_32bit => '0', is_signed => '0', xerc => xerc_init, others => (others => '0')); type Decode2ToMultiplyType is record valid: std_ulogic; @@ -89,8 +104,13 @@ package common is data1: std_ulogic_vector(64 downto 0); data2: std_ulogic_vector(64 downto 0); rc: std_ulogic; + oe: std_ulogic; + is_32bit: std_ulogic; + xerc: xer_common_t; end record; - constant Decode2ToMultiplyInit : Decode2ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL, rc => '0', others => (others => '0')); + constant Decode2ToMultiplyInit : Decode2ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL, rc => '0', + oe => '0', is_32bit => '0', xerc => xerc_init, + others => (others => '0')); type Decode2ToDividerType is record valid: std_ulogic; @@ -102,8 +122,13 @@ package common is is_extended: std_ulogic; is_modulus: std_ulogic; rc: std_ulogic; + oe: std_ulogic; + xerc: xer_common_t; end record; - constant Decode2ToDividerInit: Decode2ToDividerType := (valid => '0', is_signed => '0', is_32bit => '0', is_extended => '0', is_modulus => '0', rc => '0', others => (others => '0')); + constant Decode2ToDividerInit: Decode2ToDividerType := (valid => '0', is_signed => '0', is_32bit => '0', + is_extended => '0', is_modulus => '0', + rc => '0', oe => '0', xerc => xerc_init, + others => (others => '0')); type Decode2ToRegisterFileType is record read1_enable : std_ulogic; @@ -126,6 +151,7 @@ package common is type CrFileToDecode2Type is record read_cr_data : std_ulogic_vector(31 downto 0); + read_xerc_data : xer_common_t; end record; type Execute1ToFetch1Type is record @@ -146,8 +172,11 @@ package common is sign_extend : std_ulogic; -- do we need to sign extend? update : std_ulogic; -- is this an update instruction? update_reg : std_ulogic_vector(4 downto 0); -- if so, the register to update + xerc : xer_common_t; end record; - constant Decode2ToLoadstore1Init : Decode2ToLoadstore1Type := (valid => '0', load => '0', byte_reverse => '0', sign_extend => '0', update => '0', others => (others => '0')); + constant Decode2ToLoadstore1Init : Decode2ToLoadstore1Type := (valid => '0', load => '0', byte_reverse => '0', + sign_extend => '0', update => '0', xerc => xerc_init, + others => (others => '0')); type Loadstore1ToDcacheType is record valid : std_ulogic; @@ -161,6 +190,7 @@ package common is sign_extend : std_ulogic; update : std_ulogic; update_reg : std_ulogic_vector(4 downto 0); + xerc : xer_common_t; end record; type DcacheToWritebackType is record @@ -173,8 +203,11 @@ package common is sign_extend : std_ulogic; byte_reverse : std_ulogic; second_word : std_ulogic; + xerc : xer_common_t; end record; - constant DcacheToWritebackInit : DcacheToWritebackType := (valid => '0', write_enable => '0', sign_extend => '0', byte_reverse => '0', second_word => '0', others => (others => '0')); + constant DcacheToWritebackInit : DcacheToWritebackType := (valid => '0', write_enable => '0', sign_extend => '0', + byte_reverse => '0', second_word => '0', xerc => xerc_init, + others => (others => '0')); type Execute1ToWritebackType is record valid: std_ulogic; @@ -186,9 +219,14 @@ package common is write_cr_enable : std_ulogic; write_cr_mask : std_ulogic_vector(7 downto 0); write_cr_data : std_ulogic_vector(31 downto 0); + write_xerc_enable : std_ulogic; + xerc : xer_common_t; sign_extend: std_ulogic; end record; - constant Execute1ToWritebackInit : Execute1ToWritebackType := (valid => '0', rc => '0', write_enable => '0', write_cr_enable => '0', sign_extend => '0', others => (others => '0')); + constant Execute1ToWritebackInit : Execute1ToWritebackType := (valid => '0', rc => '0', write_enable => '0', + write_cr_enable => '0', sign_extend => '0', + write_xerc_enable => '0', xerc => xerc_init, + others => (others => '0')); type MultiplyToWritebackType is record valid: std_ulogic; @@ -196,9 +234,14 @@ package common is write_reg_enable : std_ulogic; write_reg_nr: std_ulogic_vector(4 downto 0); write_reg_data: std_ulogic_vector(63 downto 0); + write_xerc_enable : std_ulogic; + xerc : xer_common_t; rc: std_ulogic; end record; - constant MultiplyToWritebackInit : MultiplyToWritebackType := (valid => '0', write_reg_enable => '0', rc => '0', others => (others => '0')); + constant MultiplyToWritebackInit : MultiplyToWritebackType := (valid => '0', write_reg_enable => '0', + rc => '0', write_xerc_enable => '0', + xerc => xerc_init, + others => (others => '0')); type DividerToWritebackType is record valid: std_ulogic; @@ -206,9 +249,14 @@ package common is write_reg_enable : std_ulogic; write_reg_nr: std_ulogic_vector(4 downto 0); write_reg_data: std_ulogic_vector(63 downto 0); + write_xerc_enable : std_ulogic; + xerc : xer_common_t; rc: std_ulogic; end record; - constant DividerToWritebackInit : DividerToWritebackType := (valid => '0', write_reg_enable => '0', rc => '0', others => (others => '0')); + constant DividerToWritebackInit : DividerToWritebackType := (valid => '0', write_reg_enable => '0', + rc => '0', write_xerc_enable => '0', + xerc => xerc_init, + others => (others => '0')); type WritebackToRegisterFileType is record write_reg : std_ulogic_vector(4 downto 0); @@ -221,9 +269,12 @@ package common is write_cr_enable : std_ulogic; write_cr_mask : std_ulogic_vector(7 downto 0); write_cr_data : std_ulogic_vector(31 downto 0); + write_xerc_enable : std_ulogic; + write_xerc_data : xer_common_t; end record; - constant WritebackToCrFileInit : WritebackToCrFileType := (write_cr_enable => '0', others => (others => '0')); - + constant WritebackToCrFileInit : WritebackToCrFileType := (write_cr_enable => '0', write_xerc_enable => '0', + write_xerc_data => xerc_init, + others => (others => '0')); end common; package body common is diff --git a/cr_file.vhdl b/cr_file.vhdl index d8ce230..dcd21be 100644 --- a/cr_file.vhdl +++ b/cr_file.vhdl @@ -18,7 +18,9 @@ end entity cr_file; architecture behaviour of cr_file is signal crs : std_ulogic_vector(31 downto 0) := (others => '0'); - signal crs_updated : std_ulogic_vector(31 downto 0) := (others => '0'); + signal crs_updated : std_ulogic_vector(31 downto 0); + signal xerc : xer_common_t := xerc_init; + signal xerc_updated : xer_common_t; begin cr_create_0: process(all) variable hi, lo : integer := 0; @@ -35,6 +37,13 @@ begin end loop; crs_updated <= cr_tmp; + + if w_in.write_xerc_enable = '1' then + xerc_updated <= w_in.write_xerc_data; + else + xerc_updated <= xerc; + end if; + end process; -- synchronous writes @@ -45,6 +54,10 @@ begin report "Writing " & to_hstring(w_in.write_cr_data) & " to CR mask " & to_hstring(w_in.write_cr_mask); crs <= crs_updated; end if; + if w_in.write_xerc_enable = '1' then + report "Writing XERC"; + xerc <= xerc_updated; + end if; end if; end process; @@ -56,5 +69,6 @@ begin report "Reading CR " & to_hstring(crs_updated); end if; d_out.read_cr_data <= crs_updated; + d_out.read_xerc_data <= xerc_updated; end process; end architecture behaviour; diff --git a/dcache.vhdl b/dcache.vhdl index 7d6e74c..df54c95 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -185,6 +185,7 @@ architecture rtl of dcache is length : std_ulogic_vector(3 downto 0); sign_extend : std_ulogic; byte_reverse : std_ulogic; + xerc : xer_common_t; end record; signal r2 : reg_stage_2_t; @@ -469,6 +470,7 @@ begin d_out.sign_extend <= r2.sign_extend; d_out.byte_reverse <= r2.byte_reverse; d_out.second_word <= '0'; + d_out.xerc <= r2.xerc; -- We have a valid load or store hit or we just completed a slow -- op such as a load miss, a NC load or a store @@ -518,6 +520,7 @@ begin d_out.sign_extend <= r1.req.sign_extend; d_out.byte_reverse <= r1.req.byte_reverse; d_out.write_len <= r1.req.length; + d_out.xerc <= r1.req.xerc; end if; -- If it's a store or a non-update load form, complete now @@ -539,6 +542,7 @@ begin d_out.write_len <= "1000"; d_out.sign_extend <= '0'; d_out.byte_reverse <= '0'; + d_out.xerc <= r1.req.xerc; -- If it was a load, this completes the operation (load with -- update case). diff --git a/decode2.vhdl b/decode2.vhdl index 1307e7d..e9c71ba 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -131,6 +131,22 @@ architecture behaviour of decode2 is end case; end; + -- For now, use "rc" in the decode table to decide whether oe exists. + -- This is not entirely correct architecturally: For mulhd and + -- mulhdu, the OE field is reserved. It remains to be seen what an + -- actual POWER9 does if we set it on those instructions, for now we + -- test that further down when assigning to the multiplier oe input. + -- + function decode_oe (t : rc_t; insn_in : std_ulogic_vector(31 downto 0)) return std_ulogic is + begin + case t is + when RC => + return insn_oe(insn_in); + when OTHERS => + return '0'; + end case; + end; + -- issue control signals signal control_valid_in : std_ulogic; signal control_valid_out : std_ulogic; @@ -255,7 +271,9 @@ begin v.e.read_data3 := decoded_reg_c.data; v.e.write_reg := decode_output_reg(d_in.decode.output_reg_a, d_in.insn); v.e.rc := decode_rc(d_in.decode.rc, d_in.insn); + v.e.oe := decode_oe(d_in.decode.rc, d_in.insn); v.e.cr := c_in.read_cr_data; + v.e.xerc := c_in.read_xerc_data; v.e.invert_a := d_in.decode.invert_a; v.e.invert_out := d_in.decode.invert_out; v.e.input_carry := d_in.decode.input_carry; @@ -274,6 +292,11 @@ begin mul_b := decoded_reg_b.data; v.m.write_reg := decode_output_reg(d_in.decode.output_reg_a, d_in.insn); v.m.rc := decode_rc(d_in.decode.rc, d_in.insn); + v.m.xerc := c_in.read_xerc_data; + if v.m.insn_type = OP_MUL_L64 then + v.m.oe := decode_oe(d_in.decode.rc, d_in.insn); + end if; + v.m.is_32bit := d_in.decode.is_32bit; if d_in.decode.is_32bit = '1' then if d_in.decode.is_signed = '1' then @@ -337,6 +360,8 @@ begin end if; end if; v.d.rc := decode_rc(d_in.decode.rc, d_in.insn); + v.d.xerc := c_in.read_xerc_data; + v.d.oe := decode_oe(d_in.decode.rc, d_in.insn); -- load/store unit v.l.update_reg := decoded_reg_a.reg; @@ -355,6 +380,7 @@ begin v.l.byte_reverse := d_in.decode.byte_reverse; v.l.sign_extend := d_in.decode.sign_extend; v.l.update := d_in.decode.update; + v.l.xerc := c_in.read_xerc_data; -- issue control control_valid_in <= d_in.valid; diff --git a/divider.vhdl b/divider.vhdl index 20d4600..d632e90 100644 --- a/divider.vhdl +++ b/divider.vhdl @@ -36,7 +36,8 @@ architecture behaviour of divider is signal overflow : std_ulogic; signal ovf32 : std_ulogic; signal did_ovf : std_ulogic; - + signal oe : std_ulogic; + signal xerc : xer_common_t; begin divider_0: process(clk) begin @@ -62,6 +63,8 @@ begin is_32bit <= d_in.is_32bit; is_signed <= d_in.is_signed; rc <= d_in.rc; + oe <= d_in.oe; + xerc <= d_in.xerc; count <= "1111111"; running <= '1'; overflow <= '0'; @@ -147,13 +150,25 @@ begin divider_out: process(clk) begin if rising_edge(clk) then + d_out.valid <= '0'; d_out.write_reg_data <= oresult; + d_out.write_reg_enable <= '0'; + d_out.write_xerc_enable <= '0'; + d_out.xerc <= xerc; if count = "1000000" then d_out.valid <= '1'; d_out.write_reg_enable <= '1'; - else - d_out.valid <= '0'; - d_out.write_reg_enable <= '0'; + d_out.write_xerc_enable <= oe; + + -- We must test oe because the RC update code in writeback + -- will use the xerc value to set CR0:SO so we must not clobber + -- xerc if OE wasn't set. + -- + if oe = '1' then + d_out.xerc.ov <= did_ovf; + d_out.xerc.ov32 <= did_ovf; + d_out.xerc.so <= xerc.so or did_ovf; + end if; end if; end if; end process; diff --git a/execute1.vhdl b/execute1.vhdl index 862c631..2391ba2 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -31,14 +31,13 @@ end entity execute1; architecture behaviour of execute1 is type reg_type is record - --f : Execute1ToFetch1Type; e : Execute1ToWritebackType; end record; signal r, rin : reg_type; - signal ctrl: ctrl_t := (carry => '0', others => (others => '0')); - signal ctrl_tmp: ctrl_t := (carry => '0', others => (others => '0')); + signal ctrl: ctrl_t := (others => (others => '0')); + signal ctrl_tmp: ctrl_t := (others => (others => '0')); signal right_shift, rot_clear_left, rot_clear_right: std_ulogic; signal rotator_result: std_ulogic_vector(63 downto 0); @@ -46,17 +45,46 @@ architecture behaviour of execute1 is signal logical_result: std_ulogic_vector(63 downto 0); signal countzero_result: std_ulogic_vector(63 downto 0); - function decode_input_carry (carry_sel : carry_in_t; ca_in : std_ulogic) return std_ulogic is + procedure set_carry(e: inout Execute1ToWritebackType; + carry32 : in std_ulogic; + carry : in std_ulogic) is begin - case carry_sel is + e.xerc.ca32 := carry32; + e.xerc.ca := carry; + e.write_xerc_enable := '1'; + end; + + procedure set_ov(e: inout Execute1ToWritebackType; + ov : in std_ulogic; + ov32 : in std_ulogic) is + begin + e.xerc.ov32 := ov32; + e.xerc.ov := ov; + if ov = '1' then + e.xerc.so := '1'; + end if; + e.write_xerc_enable := '1'; + end; + + function calc_ov(msb_a : std_ulogic; msb_b: std_ulogic; + ca: std_ulogic; msb_r: std_ulogic) return std_ulogic is + begin + return (ca xor msb_r) and not (msb_a xor msb_b); + end; + + function decode_input_carry(ic : carry_in_t; + xerc : xer_common_t) return std_ulogic is + begin + case ic is when ZERO => return '0'; when CA => - return ca_in; + return xerc.ca; when ONE => return '1'; end case; end; + begin rotator_0: entity work.rotator @@ -117,6 +145,7 @@ begin variable bf, bfa : std_ulogic_vector(2 downto 0); variable l : std_ulogic; variable next_nia : std_ulogic_vector(63 downto 0); + variable carry_32, carry_64 : std_ulogic; begin result := (others => '0'); result_with_carry := (others => '0'); @@ -125,7 +154,41 @@ begin v := r; v.e := Execute1ToWritebackInit; - --v.f := Execute1ToFetch1TypeInit; + + -- XER forwarding. To avoid having to track XER hazards, we + -- use the previously latched value. + -- + -- If the XER was modified by a multiply or a divide, those are + -- single issue, we'll get the up to date value from decode2 from + -- the register file. + -- + -- If it was modified by an instruction older than the previous + -- one in EX1, it will have also hit writeback and will be up + -- to date in decode2. + -- + -- That leaves us with the case where it was updated by the previous + -- instruction in EX1. In that case, we can forward it back here. + -- + -- This will break if we allow pipelining of multiply and divide, + -- but ideally, those should go via EX1 anyway and run as a state + -- machine from here. + -- + -- One additional hazard to beware of is an XER:SO modifying instruction + -- in EX1 followed immediately by a store conditional. Due to our + -- writeback latency, the store will go down the LSU with the previous + -- XER value, thus the stcx. will set CR0:SO using an obsolete SO value. + -- + -- We will need to handle that if we ever make stcx. not single issue + -- + -- We always pass a valid XER value downto writeback even when + -- we aren't updating it, in order for XER:SO -> CR0:SO transfer + -- to work for RC instructions. + -- + if r.e.write_xerc_enable = '1' then + v.e.xerc := r.e.xerc; + else + v.e.xerc := e_in.xerc; + end if; ctrl_tmp <= ctrl; -- FIXME: run at 512MHz not core freq @@ -163,10 +226,18 @@ begin else a_inv := not e_in.read_data1; end if; - result_with_carry := ppc_adde(a_inv, e_in.read_data2, decode_input_carry(e_in.input_carry, ctrl.carry)); + result_with_carry := ppc_adde(a_inv, e_in.read_data2, + decode_input_carry(e_in.input_carry, v.e.xerc)); result := result_with_carry(63 downto 0); - if e_in.output_carry then - ctrl_tmp.carry <= result_with_carry(64); + carry_32 := result(32) xor a_inv(32) xor e_in.read_data2(32); + carry_64 := result_with_carry(64); + if e_in.output_carry = '1' then + set_carry(v.e, carry_32, carry_64); + end if; + if e_in.oe = '1' then + set_ov(v.e, + calc_ov(a_inv(63), e_in.read_data2(63), carry_64, result_with_carry(63)), + calc_ov(a_inv(31), e_in.read_data2(31), carry_32, result_with_carry(31))); end if; result_en := '1'; when OP_AND | OP_OR | OP_XOR => @@ -270,6 +341,13 @@ begin end loop; when OP_MFSPR => case decode_spr_num(e_in.insn) is + when SPR_XER => + result := ( 63-32 => v.e.xerc.so, + 63-33 => v.e.xerc.ov, + 63-34 => v.e.xerc.ca, + 63-44 => v.e.xerc.ov32, + 63-45 => v.e.xerc.ca32, + others => '0'); when SPR_CTR => result := ctrl.ctr; when SPR_LR => @@ -310,6 +388,13 @@ begin v.e.write_cr_data := e_in.read_data3(31 downto 0); when OP_MTSPR => case decode_spr_num(e_in.insn) is + when SPR_XER => + v.e.xerc.so := e_in.read_data3(63-32); + v.e.xerc.ov := e_in.read_data3(63-33); + v.e.xerc.ca := e_in.read_data3(63-34); + v.e.xerc.ov32 := e_in.read_data3(63-44); + v.e.xerc.ca32 := e_in.read_data3(63-45); + v.e.write_xerc_enable := '1'; when SPR_CTR => ctrl_tmp.ctr <= e_in.read_data3; when SPR_LR => @@ -334,7 +419,7 @@ begin when OP_RLC | OP_RLCL | OP_RLCR | OP_SHL | OP_SHR => result := rotator_result; if e_in.output_carry = '1' then - ctrl_tmp.carry <= rotator_carry; + set_carry(v.e, rotator_carry, rotator_carry); end if; result_en := '1'; when OP_SIM_CONFIG => diff --git a/insn_helpers.vhdl b/insn_helpers.vhdl index d3ddcca..f58dacd 100644 --- a/insn_helpers.vhdl +++ b/insn_helpers.vhdl @@ -16,6 +16,7 @@ package insn_helpers is function insn_lk (insn_in : std_ulogic_vector) return std_ulogic; function insn_aa (insn_in : std_ulogic_vector) return std_ulogic; function insn_rc (insn_in : std_ulogic_vector) return std_ulogic; + function insn_oe (insn_in : std_ulogic_vector) return std_ulogic; function insn_bd (insn_in : std_ulogic_vector) return std_ulogic_vector; function insn_bf (insn_in : std_ulogic_vector) return std_ulogic_vector; function insn_bfa (insn_in : std_ulogic_vector) return std_ulogic_vector; @@ -103,6 +104,11 @@ package body insn_helpers is return insn_in(0); end; + function insn_oe (insn_in : std_ulogic_vector) return std_ulogic is + begin + return insn_in(10); + end; + function insn_bd (insn_in : std_ulogic_vector) return std_ulogic_vector is begin return insn_in(15 downto 2); diff --git a/loadstore1.vhdl b/loadstore1.vhdl index 7fa8a42..1c16c46 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -47,6 +47,7 @@ begin v.sign_extend := l_in.sign_extend; v.update := l_in.update; v.update_reg := l_in.update_reg; + v.xerc := l_in.xerc; -- XXX Temporary hack. Mark the op as non-cachable if the address -- is the form 0xc------- diff --git a/multiply.vhdl b/multiply.vhdl index 94fa792..23339b5 100644 --- a/multiply.vhdl +++ b/multiply.vhdl @@ -27,8 +27,17 @@ architecture behaviour of multiply is data : signed(129 downto 0); write_reg : std_ulogic_vector(4 downto 0); rc : std_ulogic; + oe : std_ulogic; + is_32bit : std_ulogic; + xerc : xer_common_t; end record; - constant MultiplyPipelineStageInit : multiply_pipeline_stage := (valid => '0', insn_type => OP_ILLEGAL, rc => '0', data => (others => '0'), others => (others => '0')); + constant MultiplyPipelineStageInit : multiply_pipeline_stage := (valid => '0', + insn_type => OP_ILLEGAL, + rc => '0', oe => '0', + is_32bit => '0', + xerc => xerc_init, + data => (others => '0'), + others => (others => '0')); type multiply_pipeline_type is array(0 to PIPELINE_DEPTH-1) of multiply_pipeline_stage; constant MultiplyPipelineInit : multiply_pipeline_type := (others => MultiplyPipelineStageInit); @@ -51,6 +60,7 @@ begin variable v : reg_type; variable d : std_ulogic_vector(129 downto 0); variable d2 : std_ulogic_vector(63 downto 0); + variable ov : std_ulogic; begin v := r; @@ -61,16 +71,26 @@ begin v.multiply_pipeline(0).data := signed(m.data1) * signed(m.data2); v.multiply_pipeline(0).write_reg := m.write_reg; v.multiply_pipeline(0).rc := m.rc; + v.multiply_pipeline(0).oe := m.oe; + v.multiply_pipeline(0).is_32bit := m.is_32bit; + v.multiply_pipeline(0).xerc := m.xerc; loop_0: for i in 1 to PIPELINE_DEPTH-1 loop v.multiply_pipeline(i) := r.multiply_pipeline(i-1); end loop; d := std_ulogic_vector(v.multiply_pipeline(PIPELINE_DEPTH-1).data); + ov := '0'; + -- TODO: Handle overflows case_0: case v.multiply_pipeline(PIPELINE_DEPTH-1).insn_type is when OP_MUL_L64 => d2 := d(63 downto 0); + if v.multiply_pipeline(PIPELINE_DEPTH-1).is_32bit = '1' then + ov := (or d(63 downto 31)) and not (and d(63 downto 31)); + else + ov := (or d(127 downto 63)) and not (and d(127 downto 63)); + end if; when OP_MUL_H32 => d2 := d(63 downto 32) & d(63 downto 32); when OP_MUL_H64 => @@ -82,11 +102,24 @@ begin m_out.write_reg_data <= d2; m_out.write_reg_nr <= v.multiply_pipeline(PIPELINE_DEPTH-1).write_reg; + m_out.xerc <= v.multiply_pipeline(PIPELINE_DEPTH-1).xerc; + -- Generate OV/OV32/SO when OE=1 if v.multiply_pipeline(PIPELINE_DEPTH-1).valid = '1' then m_out.valid <= '1'; m_out.write_reg_enable <= '1'; m_out.rc <= v.multiply_pipeline(PIPELINE_DEPTH-1).rc; + m_out.write_xerc_enable <= v.multiply_pipeline(PIPELINE_DEPTH-1).oe; + + -- We must test oe because the RC update code in writeback + -- will use the xerc value to set CR0:SO so we must not clobber + -- xerc if OE wasn't set. + -- + if v.multiply_pipeline(PIPELINE_DEPTH-1).oe = '1' then + m_out.xerc.ov <= ov; + m_out.xerc.ov32 <= ov; + m_out.xerc.so <= v.multiply_pipeline(PIPELINE_DEPTH-1).xerc.so or ov; + end if; end if; rin <= v; diff --git a/writeback.vhdl b/writeback.vhdl index e2b74f8..545e931 100644 --- a/writeback.vhdl +++ b/writeback.vhdl @@ -62,6 +62,8 @@ begin variable w : std_ulogic_vector(0 downto 0); variable j : integer; variable k : unsigned(3 downto 0); + variable cf: std_ulogic_vector(3 downto 0); + variable xe: xer_common_t; begin x := "" & e_in.valid; y := "" & l_in.valid; @@ -81,6 +83,11 @@ begin z := "" & (d_in.valid and d_in.rc); assert (to_integer(unsigned(w)) + to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z))) <= 1 severity failure; + x := "" & e_in.write_xerc_enable; + y := "" & m_in.write_xerc_enable; + z := "" & D_in.write_xerc_enable; + assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z))) <= 1 severity failure; + w_out <= WritebackToRegisterFileInit; c_out <= WritebackToCrFileInit; @@ -96,12 +103,12 @@ begin partial_write <= '0'; sign_extend <= '0'; second_word <= '0'; - data_in <= e_in.write_data; + xe := e_in.xerc; if e_in.write_enable = '1' then w_out.write_reg <= e_in.write_reg; - data_in <= e_in.write_data; w_out.write_enable <= '1'; + data_in <= e_in.write_data; data_len <= unsigned(e_in.write_len); sign_extend <= e_in.sign_extend; rc <= e_in.rc; @@ -113,7 +120,12 @@ begin c_out.write_cr_data <= e_in.write_cr_data; end if; - if l_in.write_enable = '1' then + if e_in.write_xerc_enable = '1' then + c_out.write_xerc_enable <= '1'; + c_out.write_xerc_data <= e_in.xerc; + end if; + + if l_in.write_enable = '1' then w_out.write_reg <= l_in.write_reg; data_in <= l_in.write_data; data_len <= unsigned(l_in.write_len); @@ -127,6 +139,7 @@ begin if l_in.valid = '0' and (data_len + byte_offset > 8) then partial_write <= '1'; end if; + xe := l_in.xerc; end if; if m_in.write_reg_enable = '1' then @@ -134,15 +147,27 @@ begin w_out.write_reg <= m_in.write_reg_nr; data_in <= m_in.write_reg_data; rc <= m_in.rc; + xe := m_in.xerc; end if; + if m_in.write_xerc_enable = '1' then + c_out.write_xerc_enable <= '1'; + c_out.write_xerc_data <= m_in.xerc; + end if; + if d_in.write_reg_enable = '1' then w_out.write_enable <= '1'; w_out.write_reg <= d_in.write_reg_nr; data_in <= d_in.write_reg_data; rc <= d_in.rc; + xe := d_in.xerc; end if; + if d_in.write_xerc_enable = '1' then + c_out.write_xerc_enable <= '1'; + c_out.write_xerc_data <= d_in.xerc; + end if; + -- shift and byte-reverse data bytes for i in 0 to 7 loop k := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset); @@ -193,17 +218,15 @@ begin -- deliver to regfile w_out.write_data <= data_trimmed; - -- test value against 0 and set CR0 if requested + -- Perform CR0 update for RC forms if rc = '1' then c_out.write_cr_enable <= '1'; c_out.write_cr_mask <= num_to_fxm(0); - if negative = '1' then - c_out.write_cr_data <= x"80000000"; - elsif zero = '0' then - c_out.write_cr_data <= x"40000000"; - else - c_out.write_cr_data <= x"20000000"; - end if; + cf(3) := negative; + cf(2) := not negative and not zero; + cf(1) := zero; + cf(0) := xe.so; + c_out.write_cr_data(31 downto 28) <= cf; end if; end process; end;