From 5121e0f392ee99513bedb5006d944e5d436b7d2e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sun, 9 May 2021 19:58:59 +1000 Subject: [PATCH] core: Implement sync instructions This implements all the sync variants (sync, lwsync, ptesync, etc.) as a LSU op that gets sent down to the dcache and completes once the dcache state machine is idle. Signed-off-by: Paul Mackerras --- common.vhdl | 1 + dcache.vhdl | 23 ++++++++++++++++++++--- decode1.vhdl | 2 +- loadstore1.vhdl | 9 +++++++-- 4 files changed, 29 insertions(+), 6 deletions(-) diff --git a/common.vhdl b/common.vhdl index 425bb79..c04bbe4 100644 --- a/common.vhdl +++ b/common.vhdl @@ -605,6 +605,7 @@ package common is dcbz : std_ulogic; flush : std_ulogic; touch : std_ulogic; + sync : std_ulogic; nc : std_ulogic; reserve : std_ulogic; atomic_qw : std_ulogic; -- part of a quadword atomic op diff --git a/dcache.vhdl b/dcache.vhdl index f4403e4..b921095 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -299,11 +299,13 @@ architecture rtl of dcache is op_lmiss : std_ulogic; op_store : std_ulogic; op_flush : std_ulogic; + op_sync : std_ulogic; nc : std_ulogic; valid : std_ulogic; dcbz : std_ulogic; flush : std_ulogic; touch : std_ulogic; + sync : std_ulogic; reserve : std_ulogic; first_dw : std_ulogic; last_dw : std_ulogic; @@ -404,6 +406,7 @@ architecture rtl of dcache is signal req_op_load_miss : std_ulogic; signal req_op_store : std_ulogic; signal req_op_flush : std_ulogic; + signal req_op_sync : std_ulogic; signal req_op_bad : std_ulogic; signal req_op_nop : std_ulogic; signal req_data : std_ulogic_vector(63 downto 0); @@ -1144,8 +1147,11 @@ begin req_op_store <= '0'; req_op_nop <= '0'; req_op_flush <= '0'; + req_op_sync <= '0'; if go = '1' then - if r0.req.touch = '1' then + if r0.req.sync = '1' then + req_op_sync <= '1'; + elsif r0.req.touch = '1' then if access_ok = '1' and is_hit = '0' and nc = '0' then req_op_load_miss <= '1'; elsif access_ok = '1' and is_hit = '1' and nc = '0' then @@ -1241,7 +1247,7 @@ begin report "completing ld/st with error"; end if; - -- Slow ops (load miss, NC, stores) + -- Slow ops (load miss, NC, stores, sync) if r1.slow_valid = '1' then report "completing store or load miss data=" & to_hstring(r1.data_out); end if; @@ -1517,12 +1523,14 @@ begin req.op_lmiss := req_op_load_miss; req.op_store := req_op_store; req.op_flush := req_op_flush; + req.op_sync := req_op_sync; req.nc := req_nc; req.valid := req_go; req.mmu_req := r0.mmu_req; req.dcbz := r0.req.dcbz; req.flush := r0.req.flush; req.touch := r0.req.touch; + req.sync := r0.req.sync; req.reserve := r0.req.reserve; req.first_dw := not r0.req.atomic_qw or r0.req.atomic_first; req.last_dw := not r0.req.atomic_qw or r0.req.atomic_last; @@ -1547,7 +1555,8 @@ begin -- Store the incoming request from r0, if it is a slow request -- Note that r1.full = 1 implies none of the req_op_* are 1 - if req_op_load_miss = '1' or req_op_store = '1' or req_op_flush = '1' then + if req_op_load_miss = '1' or req_op_store = '1' or req_op_flush = '1' or + req_op_sync = '1' then r1.req <= req; r1.full <= '1'; end if; @@ -1673,6 +1682,14 @@ begin r1.state <= FLUSH_CYCLE; end if; + if req.op_sync = '1' then + -- sync/lwsync can complete now that the state machine + -- is idle. + r1.full <= '0'; + r1.slow_valid <= '1'; + r1.ls_valid <= '1'; + end if; + when RELOAD_WAIT_ACK => -- If we are still sending requests, was one accepted ? if wishbone_in.stall = '0' and r1.wb.stb = '1' then diff --git a/decode1.vhdl b/decode1.vhdl index 9047cf8..7b480a3 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -377,7 +377,7 @@ architecture behaviour of decode1 is INSN_subfic => (ALU, NONE, OP_ADD, RA, CONST_SI, NONE, RT, '0', '0', '1', '0', ONE, '1', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_subfme => (ALU, NONE, OP_ADD, RA, CONST_M1, NONE, RT, '0', '0', '1', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RCOE, '0', '0', NONE), INSN_subfze => (ALU, NONE, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RCOE, '0', '0', NONE), - INSN_sync => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), + INSN_sync => (LDST, NONE, OP_SYNC, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), INSN_td => (ALU, NONE, OP_TRAP, RA, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_tdi => (ALU, NONE, OP_TRAP, RA, CONST_SI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_tlbie => (LDST, NONE, OP_TLBIE, NONE, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), diff --git a/loadstore1.vhdl b/loadstore1.vhdl index 5e69352..485947b 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -63,6 +63,7 @@ architecture behave of loadstore1 is store : std_ulogic; flush : std_ulogic; touch : std_ulogic; + sync : std_ulogic; tlbie : std_ulogic; dcbz : std_ulogic; read_spr : std_ulogic; @@ -103,7 +104,7 @@ architecture behave of loadstore1 is incomplete : std_ulogic; end record; constant request_init : request_t := (valid => '0', dc_req => '0', load => '0', store => '0', - flush => '0', touch => '0', tlbie => '0', + flush => '0', touch => '0', sync => '0', tlbie => '0', dcbz => '0', read_spr => '0', write_spr => '0', mmu_op => '0', instr_fault => '0', do_update => '0', mode_32bit => '0', prefixed => '0', @@ -506,6 +507,8 @@ begin end if; case l_in.op is + when OP_SYNC => + v.sync := '1'; when OP_STORE => v.store := '1'; if l_in.length = "0000" then @@ -547,7 +550,7 @@ begin v.mmu_op := '1'; when others => end case; - v.dc_req := l_in.valid and (v.load or v.store or v.dcbz) and not v.align_intr; + v.dc_req := l_in.valid and (v.load or v.store or v.sync or v.dcbz) and not v.align_intr; v.incomplete := v.dc_req and v.two_dwords; -- Work out controls for load and store formatting @@ -994,6 +997,7 @@ begin d_out.dcbz <= stage1_req.dcbz; d_out.flush <= stage1_req.flush; d_out.touch <= stage1_req.touch; + d_out.sync <= stage1_req.sync; d_out.nc <= stage1_req.nc; d_out.reserve <= stage1_req.reserve; d_out.atomic_qw <= stage1_req.atomic_qw; @@ -1009,6 +1013,7 @@ begin d_out.dcbz <= r2.req.dcbz; d_out.flush <= r2.req.flush; d_out.touch <= r2.req.touch; + d_out.sync <= r2.req.sync; d_out.nc <= r2.req.nc; d_out.reserve <= r2.req.reserve; d_out.atomic_qw <= r2.req.atomic_qw;