diff --git a/common.vhdl b/common.vhdl index ffddb0b..84bbc47 100644 --- a/common.vhdl +++ b/common.vhdl @@ -130,12 +130,13 @@ package common is byte_reverse : std_ulogic; sign_extend : std_ulogic; -- do we need to sign extend? update : std_ulogic; -- is this an update instruction? + reserve : std_ulogic; -- set for larx/stcx end record; constant Decode2ToExecute1Init : Decode2ToExecute1Type := (valid => '0', insn_type => OP_ILLEGAL, bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0', lr => '0', rc => '0', oe => '0', invert_a => '0', invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0', - is_32bit => '0', is_signed => '0', xerc => xerc_init, + is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', byte_reverse => '0', sign_extend => '0', update => '0', others => (others => '0')); type Execute1ToMultiplyType is record @@ -206,10 +207,12 @@ package common is update : std_ulogic; -- is this an update instruction? update_reg : gpr_index_t; -- if so, the register to update xerc : xer_common_t; + reserve : std_ulogic; -- set for larx/stcx. + rc : std_ulogic; -- set for stcx. end record; constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', load => '0', byte_reverse => '0', sign_extend => '0', update => '0', xerc => xerc_init, - others => (others => '0')); + reserve => '0', rc => '0', others => (others => '0')); type Loadstore1ToDcacheType is record valid : std_ulogic; @@ -224,6 +227,8 @@ package common is update : std_ulogic; update_reg : gpr_index_t; xerc : xer_common_t; + reserve : std_ulogic; + rc : std_ulogic; end record; type DcacheToWritebackType is record @@ -237,10 +242,12 @@ package common is byte_reverse : std_ulogic; second_word : std_ulogic; xerc : xer_common_t; + rc : std_ulogic; + store_done : std_ulogic; end record; constant DcacheToWritebackInit : DcacheToWritebackType := (valid => '0', write_enable => '0', sign_extend => '0', byte_reverse => '0', second_word => '0', xerc => xerc_init, - others => (others => '0')); + rc => '0', store_done => '0', others => (others => '0')); type Execute1ToWritebackType is record valid: std_ulogic; diff --git a/dcache.vhdl b/dcache.vhdl index 5bf477b..75b10c7 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -171,6 +171,9 @@ architecture rtl of dcache is slow_data : std_ulogic_vector(63 downto 0); slow_valid : std_ulogic; + -- Signal to complete a failed stcx. + stcx_fail : std_ulogic; + -- Cache miss state (reload state machine) state : state_t; wb : wishbone_master_out; @@ -199,6 +202,15 @@ architecture rtl of dcache is signal r2 : reg_stage_2_t; + -- Reservation information + -- + type reservation_t is record + valid : std_ulogic; + addr : std_ulogic_vector(63 downto LINE_OFF_BITS); + end record; + + signal reservation : reservation_t; + -- Async signals on incoming request signal req_index : index_t; signal req_row : row_t; @@ -210,6 +222,10 @@ architecture rtl of dcache is signal req_laddr : std_ulogic_vector(63 downto 0); signal req_sel : std_ulogic_vector(7 downto 0); + signal cancel_store : std_ulogic; + signal set_rsrv : std_ulogic; + signal clear_rsrv : std_ulogic; + -- Cache RAM interface type cache_ram_out_t is array(way_t) of cache_row_t; signal cache_out : cache_ram_out_t; @@ -481,6 +497,41 @@ begin -- Generate stalls from stage 1 state machine stall_out <= '1' when r1.state /= IDLE else '0'; + -- Handle load-with-reservation and store-conditional instructions + reservation_comb: process(all) + begin + cancel_store <= '0'; + set_rsrv <= '0'; + clear_rsrv <= '0'; + if d_in.valid = '1' and d_in.reserve = '1' then + -- XXX generate alignment interrupt if address is not aligned + -- XXX or if d_in.nc = '1' + if d_in.load = '1' then + -- load with reservation + set_rsrv <= '1'; + else + -- store conditional + clear_rsrv <= '1'; + if reservation.valid = '0' or + d_in.addr(63 downto LINE_OFF_BITS) /= reservation.addr then + cancel_store <= '1'; + end if; + end if; + end if; + end process; + + reservation_reg: process(clk) + begin + if rising_edge(clk) then + if rst = '1' or clear_rsrv = '1' then + reservation.valid <= '0'; + elsif set_rsrv = '1' then + reservation.valid <= '1'; + reservation.addr <= d_in.addr(63 downto LINE_OFF_BITS); + end if; + end if; + end process; + -- Writeback (loads and reg updates) & completion control logic -- writeback_control: process(all) @@ -497,6 +548,8 @@ begin d_out.byte_reverse <= r2.byte_reverse; d_out.second_word <= r2.second_dword; d_out.xerc <= r2.xerc; + d_out.rc <= '0'; -- loads never have rc=1 + d_out.store_done <= '0'; -- We have a valid load or store hit or we just completed a slow -- op such as a load miss, a NC load or a store @@ -512,11 +565,14 @@ begin assert (r1.update_valid and r2.hit_load_valid) /= '1' report "unexpected hit_load_delayed collision with update_valid" severity FAILURE; - assert (r1.slow_valid and r2.hit_load_valid) /= '1' report + assert (r1.slow_valid and r1.stcx_fail) /= '1' report + "unexpected slow_valid collision with stcx_fail" + severity FAILURE; + assert ((r1.slow_valid or r1.stcx_fail) and r2.hit_load_valid) /= '1' report "unexpected hit_load_delayed collision with slow_valid" severity FAILURE; - assert (r1.slow_valid and r1.update_valid) /= '1' report - "unexpected update_valid collision with slow_valid" + assert ((r1.slow_valid or r1.stcx_fail) and r1.update_valid) /= '1' report + "unexpected update_valid collision with slow_valid or stcx_fail" severity FAILURE; -- Delayed load hit case is the standard path @@ -551,6 +607,8 @@ begin d_out.xerc <= r1.req.xerc; d_out.second_word <= r1.second_dword; end if; + d_out.rc <= r1.req.rc; + d_out.store_done <= '1'; -- If it's a store or a non-update load form, complete now -- unless we need to do another dword transfer @@ -561,6 +619,12 @@ begin end if; end if; + if r1.stcx_fail = '1' then + d_out.rc <= r1.req.rc; + d_out.store_done <= '0'; + d_out.valid <= '1'; + end if; + -- We have a register update to do. if r1.update_valid = '1' then d_out.write_enable <= '1'; @@ -657,7 +721,7 @@ begin if reloading and wishbone_in.ack = '1' and r1.store_way = i then do_write <= '1'; end if; - if req_op = OP_STORE_HIT and req_hit_way = i then + if req_op = OP_STORE_HIT and req_hit_way = i and cancel_store = '0' then assert not reloading report "Store hit while in state:" & state_t'image(r1.state) severity FAILURE; @@ -753,6 +817,7 @@ begin -- One cycle pulses reset r1.slow_valid <= '0'; r1.update_valid <= '0'; + r1.stcx_fail <= '0'; -- We cannot currently process a new request when not idle assert d_in.valid = '0' or r1.state = IDLE report "request " & @@ -832,10 +897,15 @@ begin r1.wb.sel <= req_sel; r1.wb.adr <= req_addr(r1.wb.adr'left downto 3) & "000"; r1.wb.dat <= req_data; - r1.wb.cyc <= '1'; - r1.wb.stb <= '1'; - r1.wb.we <= '1'; - r1.state <= STORE_WAIT_ACK; + if cancel_store = '0' then + r1.wb.cyc <= '1'; + r1.wb.stb <= '1'; + r1.wb.we <= '1'; + r1.state <= STORE_WAIT_ACK; + else + r1.stcx_fail <= '1'; + r1.state <= IDLE; + end if; -- OP_NONE and OP_BAD do nothing when OP_NONE => @@ -932,7 +1002,7 @@ begin r1.wb.cyc <= '0'; r1.wb.stb <= '0'; end if; - end case; + end case; end if; end if; end process; diff --git a/decode1.vhdl b/decode1.vhdl index bca7c2a..349aa7e 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -60,8 +60,8 @@ architecture behaviour of decode1 is 20 => (ALU, OP_RLC, RA, CONST_SH32, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- rlwimi 21 => (ALU, OP_RLC, NONE, CONST_SH32, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- rlwinm 23 => (ALU, OP_RLC, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- rlwnm - 38 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- stb - 39 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', RC, '0', '0'), -- stbu + 38 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stb + 39 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stbu 44 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sth 45 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- sthu 36 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stw @@ -278,19 +278,19 @@ architecture behaviour of decode1 is 2#1100111000# => (ALU, OP_SHR, NONE, CONST_SH32, RS, RA, '0', '0', '0', '0', ZERO, '1', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- srawi 2#1000011011# => (ALU, OP_SHR, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- srd 2#1000011000# => (ALU, OP_SHR, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- srw - 2#1010110110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '1', '0', '0', RC, '0', '0'), -- stbcx - 2#0011110111# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', RC, '0', '0'), -- stbux - 2#0011010111# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- stbx + 2#1010110110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '1', '0', '0', ONE, '0', '0'), -- stbcx + 2#0011110111# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stbux + 2#0011010111# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stbx 2#1010010100# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '1', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stdbrx - 2#0011010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '1', '0', '0', NONE, '0', '0'), -- stdcx + 2#0011010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '1', '0', '0', ONE, '0', '0'), -- stdcx 2#0010110101# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stdux 2#0010010101# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stdx 2#1110010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '1', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sthbrx - 2#1011010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '1', '0', '0', NONE, '0', '0'), -- sthcx + 2#1011010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '1', '0', '0', ONE, '0', '0'), -- sthcx 2#0110110111# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- sthux 2#0110010111# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sthx 2#1010010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '1', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stwbrx - 2#0010010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '1', '0', '0', NONE, '0', '0'), -- stwcx + 2#0010010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '1', '0', '0', ONE, '0', '0'), -- stwcx 2#0010110111# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stwux 2#0010010111# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stwx 2#0000101000# => (ALU, OP_ADD, RA, RB, NONE, RT, '0', '0', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- subf @@ -329,7 +329,7 @@ architecture behaviour of decode1 is -- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl -- op in out A out in out len ext pipe - constant attn_instr : decode_rom_t := (ALU, OP_ILLEGAL, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'); + constant attn_instr : decode_rom_t := (ALU, OP_ILLEGAL, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'); constant nop_instr : decode_rom_t := (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'); constant sim_cfg_instr : decode_rom_t := (ALU, OP_SIM_CONFIG,NONE, NONE, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'); diff --git a/decode2.vhdl b/decode2.vhdl index 3d6b7d8..ff773aa 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -334,6 +334,7 @@ begin v.e.byte_reverse := d_in.decode.byte_reverse; v.e.sign_extend := d_in.decode.sign_extend; v.e.update := d_in.decode.update; + v.e.reserve := d_in.decode.reserve; -- issue control control_valid_in <= d_in.valid; diff --git a/execute1.vhdl b/execute1.vhdl index c536a27..b1662b7 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -759,6 +759,8 @@ begin lv.update := e_in.update; lv.update_reg := gspr_to_gpr(e_in.read_reg1); lv.xerc := v.e.xerc; + lv.reserve := e_in.reserve; + lv.rc := e_in.rc; -- Update registers rin <= v; diff --git a/loadstore1.vhdl b/loadstore1.vhdl index 9e038e1..a0c0beb 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -51,6 +51,8 @@ begin v.update := l_in.update; v.update_reg := l_in.update_reg; v.xerc := l_in.xerc; + v.reserve := l_in.reserve; + v.rc := l_in.rc; -- XXX Temporary hack. Mark the op as non-cachable if the address -- is the form 0xc------- diff --git a/writeback.vhdl b/writeback.vhdl index b924ee0..0151561 100644 --- a/writeback.vhdl +++ b/writeback.vhdl @@ -63,6 +63,7 @@ begin variable xe: xer_common_t; variable zero : std_ulogic; variable sign : std_ulogic; + variable scf : std_ulogic_vector(3 downto 0); begin x(0) := e_in.valid; y(0) := l_in.valid; @@ -124,6 +125,17 @@ begin w_out.write_enable <= not partial_write or second_word; end if; + if l_in.rc = '1' then + -- st*cx. instructions + scf(3) := '0'; + scf(2) := '0'; + scf(1) := l_in.store_done; + scf(0) := xe.so; + c_out.write_cr_enable <= '1'; + c_out.write_cr_mask <= num_to_fxm(0); + c_out.write_cr_data(31 downto 28) <= scf; + end if; + -- shift and byte-reverse data bytes for i in 0 to 7 loop k := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset);