From d745995207b121e6b53a104c9ddfc8b9840693a2 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 27 Sep 2021 21:50:57 +1000 Subject: [PATCH 1/3] Introduce real_addr_t and addr_to_real() This moves REAL_ADDR_BITS out of the caches and defines a real_addr_t type for a real address, along with a addr_to_real() conversion helper. It makes the vhdl a bit more readable Signed-off-by: Benjamin Herrenschmidt --- common.vhdl | 11 +++++++++++ dcache.vhdl | 10 ++++------ icache.vhdl | 8 +++----- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/common.vhdl b/common.vhdl index 2d010ab..fb60ce3 100644 --- a/common.vhdl +++ b/common.vhdl @@ -156,6 +156,12 @@ package common is constant FPSCR_NI : integer := 63 - 61; constant FPSCR_RN : integer := 63 - 63; + -- Real addresses + -- REAL_ADDR_BITS is the number of real address bits that we store + constant REAL_ADDR_BITS : positive := 56; + subtype real_addr_t is std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0); + function addr_to_real(addr: std_ulogic_vector(63 downto 0)) return real_addr_t; + -- Used for tracking instruction completion and pending register writes constant TAG_COUNT : positive := 4; constant TAG_NUMBER_BITS : natural := log2(TAG_COUNT); @@ -779,4 +785,9 @@ package body common is begin return tag1.valid = '1' and tag2.valid = '1' and tag1.tag = tag2.tag; end; + + function addr_to_real(addr: std_ulogic_vector(63 downto 0)) return real_addr_t is + begin + return addr(real_addr_t'range); + end; end common; diff --git a/dcache.vhdl b/dcache.vhdl index 34dbda2..489ccb5 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -67,8 +67,6 @@ architecture rtl of dcache is -- Bit fields counts in the address - -- REAL_ADDR_BITS is the number of real address bits that we store - constant REAL_ADDR_BITS : positive := 56; -- ROW_BITS is the number of bits to select a row constant ROW_BITS : natural := log2(BRAM_ROWS); -- ROW_LINEBITS is the number of bits to select a row within a line @@ -289,7 +287,7 @@ architecture rtl of dcache is op : op_t; valid : std_ulogic; dcbz : std_ulogic; - real_addr : std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0); + real_addr : real_addr_t; data : std_ulogic_vector(63 downto 0); byte_sel : std_ulogic_vector(7 downto 0); hit_way : way_t; @@ -412,7 +410,7 @@ architecture rtl of dcache is signal tlb_hit : std_ulogic; signal tlb_hit_way : tlb_way_t; signal pte : tlb_pte_t; - signal ra : std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0); + signal ra : real_addr_t; signal valid_ra : std_ulogic; signal perm_attr : perm_attr_t; signal rc_ok : std_ulogic; @@ -803,7 +801,7 @@ begin -- Cache tag RAM second read port, for snooping cache_tag_read_2 : process(clk) - variable addr : std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0); + variable addr : real_addr_t; begin if rising_edge(clk) then addr := (others => '0'); @@ -830,7 +828,7 @@ begin variable s_hit : std_ulogic; variable s_tag : cache_tag_t; variable s_pte : tlb_pte_t; - variable s_ra : std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0); + variable s_ra : real_addr_t; variable hit_set : std_ulogic_vector(TLB_NUM_WAYS - 1 downto 0); variable hit_way_set : hit_way_set_t; variable rel_matches : std_ulogic_vector(TLB_NUM_WAYS - 1 downto 0); diff --git a/icache.vhdl b/icache.vhdl index 298ee47..ecd0c84 100644 --- a/icache.vhdl +++ b/icache.vhdl @@ -46,8 +46,6 @@ entity icache is TLB_SIZE : positive := 64; -- L1 ITLB log_2(page_size) TLB_LG_PGSZ : positive := 12; - -- Number of real address bits that we store - REAL_ADDR_BITS : positive := 56; -- Non-zero to enable log data collection LOG_LENGTH : natural := 0 ); @@ -210,7 +208,7 @@ architecture rtl of icache is signal req_laddr : std_ulogic_vector(63 downto 0); signal tlb_req_index : tlb_index_t; - signal real_addr : std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0); + signal real_addr : real_addr_t; signal ra_valid : std_ulogic; signal priv_fault : std_ulogic; signal access_ok : std_ulogic; @@ -468,7 +466,7 @@ begin end if; eaa_priv <= pte(3); else - real_addr <= i_in.nia(REAL_ADDR_BITS - 1 downto 0); + real_addr <= addr_to_real(i_in.nia); ra_valid <= '1'; eaa_priv <= '1'; end if; @@ -627,7 +625,7 @@ begin icache_miss : process(clk) variable tagset : cache_tags_set_t; variable tag : cache_tag_t; - variable snoop_addr : std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0); + variable snoop_addr : real_addr_t; variable snoop_tag : cache_tag_t; variable snoop_cache_tags : cache_tags_set_t; begin From 5cfa65e836138179ca84d369a9711460411aa88e Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 27 Sep 2021 21:53:52 +1000 Subject: [PATCH 2/3] Introduce addr_to_wb() and wb_to_addr() helpers These convert addresses to/from wishbone addresses, and use them in parts of the caches, in order to make the code a bit more readable. Along the way, rename some functions in the caches to make it a bit clearer what they operate on and fix a bug in the icache STOP_RELOAD state where the wb address wasn't properly converted. Signed-off-by: Benjamin Herrenschmidt --- dcache.vhdl | 13 ++++++------- icache.vhdl | 30 ++++++++++++++---------------- wishbone_types.vhdl | 20 ++++++++++++++++++++ 3 files changed, 40 insertions(+), 23 deletions(-) diff --git a/dcache.vhdl b/dcache.vhdl index 489ccb5..b4098f9 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -452,7 +452,7 @@ architecture rtl of dcache is end; -- Returns whether this is the last row of a line - function is_last_row_addr(addr: wishbone_addr_type; last: row_in_line_t) return boolean is + function is_last_row_wb_addr(addr: wishbone_addr_type; last: row_in_line_t) return boolean is begin return unsigned(addr(LINE_OFF_BITS - ROW_OFF_BITS - 1 downto 0)) = last; end; @@ -464,7 +464,7 @@ architecture rtl of dcache is end; -- Return the address of the next row in the current cache line - function next_row_addr(addr: wishbone_addr_type) return std_ulogic_vector is + function next_row_wb_addr(addr: wishbone_addr_type) return std_ulogic_vector is variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0); variable result : wishbone_addr_type; begin @@ -804,8 +804,7 @@ begin variable addr : real_addr_t; begin if rising_edge(clk) then - addr := (others => '0'); - addr(snoop_in.adr'left + ROW_OFF_BITS downto ROW_OFF_BITS) := snoop_in.adr; + addr := addr_to_real(wb_to_addr(snoop_in.adr)); snoop_tag_set <= cache_tags(get_index(addr)); snoop_wrtag <= get_tag(addr); snoop_index <= get_index(addr); @@ -1381,7 +1380,7 @@ begin -- Main state machine case r1.state is when IDLE => - r1.wb.adr <= req.real_addr(r1.wb.adr'left + ROW_OFF_BITS downto ROW_OFF_BITS); + r1.wb.adr <= addr_to_wb(req.real_addr); r1.wb.sel <= req.byte_sel; r1.wb.dat <= req.data; r1.dcbz <= req.dcbz; @@ -1469,12 +1468,12 @@ begin -- If we are still sending requests, was one accepted ? if wishbone_in.stall = '0' and r1.wb.stb = '1' then -- That was the last word ? We are done sending. Clear stb. - if is_last_row_addr(r1.wb.adr, r1.end_row_ix) then + if is_last_row_wb_addr(r1.wb.adr, r1.end_row_ix) then r1.wb.stb <= '0'; end if; -- Calculate the next row address - r1.wb.adr <= next_row_addr(r1.wb.adr); + r1.wb.adr <= next_row_wb_addr(r1.wb.adr); end if; -- Incoming acks processing diff --git a/icache.vhdl b/icache.vhdl index ecd0c84..e7ac528 100644 --- a/icache.vhdl +++ b/icache.vhdl @@ -235,7 +235,7 @@ architecture rtl of icache is end; -- Return the cache row index (data memory) for an address - function get_row(addr: std_ulogic_vector) return row_t is + function get_row(addr: std_ulogic_vector(63 downto 0)) return row_t is begin return to_integer(unsigned(addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS))); end; @@ -249,9 +249,9 @@ architecture rtl of icache is end; -- Returns whether this is the last row of a line - function is_last_row_addr(addr: wishbone_addr_type; last: row_in_line_t) return boolean is + function is_last_row_wb_addr(wb_addr: wishbone_addr_type; last: row_in_line_t) return boolean is begin - return unsigned(addr(LINE_OFF_BITS - ROW_OFF_BITS - 1 downto 0)) = last; + return unsigned(wb_addr(LINE_OFF_BITS - ROW_OFF_BITS - 1 downto 0)) = last; end; -- Returns whether this is the last row of a line @@ -261,15 +261,15 @@ architecture rtl of icache is end; -- Return the address of the next row in the current cache line - function next_row_addr(addr: wishbone_addr_type) + function next_row_wb_addr(wb_addr: wishbone_addr_type) return std_ulogic_vector is variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0); variable result : wishbone_addr_type; begin -- Is there no simpler way in VHDL to generate that 3 bits adder ? - row_idx := addr(ROW_LINEBITS - 1 downto 0); + row_idx := wb_addr(ROW_LINEBITS - 1 downto 0); row_idx := std_ulogic_vector(unsigned(row_idx) + 1); - result := addr; + result := wb_addr; result(ROW_LINEBITS - 1 downto 0) := row_idx; return result; end; @@ -299,10 +299,9 @@ architecture rtl of icache is end; -- Get the tag value from the address - function get_tag(addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0); - endian: std_ulogic) return cache_tag_t is + function get_tag(addr: real_addr_t; endian: std_ulogic) return cache_tag_t is begin - return endian & addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS); + return endian & addr(addr'left downto SET_SIZE_BITS); end; -- Read a tag from a tag memory row @@ -523,7 +522,7 @@ begin -- used for cache miss processing if needed -- req_laddr <= (63 downto REAL_ADDR_BITS => '0') & - real_addr(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS)& + real_addr(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS) & (ROW_OFF_BITS-1 downto 0 => '0'); -- Test if pending request is a hit on any way @@ -655,8 +654,7 @@ begin -- Detect snooped writes and decode address into index and tag -- Since we never write, any write should be snooped snoop_valid <= wb_snoop_in.cyc and wb_snoop_in.stb and wb_snoop_in.we; - snoop_addr := (others => '0'); - snoop_addr(wb_snoop_in.adr'left + ROW_OFF_BITS downto ROW_OFF_BITS) := wb_snoop_in.adr; + snoop_addr := addr_to_real(wb_to_addr(wb_snoop_in.adr)); snoop_index <= get_index(snoop_addr); snoop_cache_tags := cache_tags(get_index(snoop_addr)); snoop_tag := get_tag(snoop_addr, '0'); @@ -715,7 +713,7 @@ begin -- Prep for first wishbone read. We calculate the address of -- the start of the cache line and start the WB cycle. -- - r.wb.adr <= req_laddr(r.wb.adr'left + ROW_OFF_BITS downto ROW_OFF_BITS); + r.wb.adr <= addr_to_wb(req_laddr); r.wb.cyc <= '1'; r.wb.stb <= '1'; @@ -747,12 +745,12 @@ begin if wishbone_in.stall = '0' and r.wb.stb = '1' then -- That was the last word ? We are done sending. Clear stb. -- - if is_last_row_addr(r.wb.adr, r.end_row_ix) then + if is_last_row_wb_addr(r.wb.adr, r.end_row_ix) then r.wb.stb <= '0'; end if; -- Calculate the next row address - r.wb.adr <= next_row_addr(r.wb.adr); + r.wb.adr <= next_row_wb_addr(r.wb.adr); end if; -- Abort reload if we get an invalidation @@ -783,7 +781,7 @@ begin when STOP_RELOAD => -- Wait for all outstanding requests to be satisfied, then -- go to IDLE state. - if get_row_of_line(r.store_row) = get_row_of_line(get_row(r.wb.adr)) then + if get_row_of_line(r.store_row) = get_row_of_line(get_row(wb_to_addr(r.wb.adr))) then r.wb.cyc <= '0'; r.state <= IDLE; end if; diff --git a/wishbone_types.vhdl b/wishbone_types.vhdl index 8cb3e9b..aea75e8 100644 --- a/wishbone_types.vhdl +++ b/wishbone_types.vhdl @@ -15,6 +15,9 @@ package wishbone_types is subtype wishbone_data_type is std_ulogic_vector(wishbone_data_bits-1 downto 0); subtype wishbone_sel_type is std_ulogic_vector(wishbone_sel_bits-1 downto 0); + function addr_to_wb(addr: std_ulogic_vector) return wishbone_addr_type; + function wb_to_addr(wb_addr: wishbone_addr_type) return std_ulogic_vector; + type wishbone_master_out is record adr : wishbone_addr_type; dat : wishbone_data_type; @@ -38,6 +41,7 @@ package wishbone_types is -- -- IO Bus to a device, 30-bit address, 32-bits data -- + type wb_io_master_out is record adr : std_ulogic_vector(29 downto 0); dat : std_ulogic_vector(31 downto 0); @@ -56,3 +60,19 @@ package wishbone_types is end record; constant wb_io_slave_out_init : wb_io_slave_out := (ack => '0', stall => '0', others => (others => '0')); end package wishbone_types; + +package body wishbone_types is + function addr_to_wb(addr: std_ulogic_vector) return wishbone_addr_type is + begin + assert addr'length >= (wishbone_addr_type'length + wishbone_log2_width); + assert addr'right = 0; + return addr(wishbone_addr_type'left + wishbone_log2_width downto wishbone_log2_width); + end; + function wb_to_addr(wb_addr: wishbone_addr_type) return std_ulogic_vector is + variable ret : std_ulogic_vector(63 downto 0); + begin + ret := (others => '0'); + ret(wishbone_addr_type'left + wishbone_log2_width downto wishbone_log2_width) := wb_addr; + return ret; + end; +end wishbone_types; From e675eba0dfa21fa89dc6cc4f5ebec246116d4507 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 27 Sep 2021 22:03:18 +1000 Subject: [PATCH 3/3] icache: req_laddr becomes req_raddr Uses real_addr_t and only stores the real address bits Signed-off-by: Benjamin Herrenschmidt --- icache.vhdl | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/icache.vhdl b/icache.vhdl index e7ac528..d4be935 100644 --- a/icache.vhdl +++ b/icache.vhdl @@ -205,7 +205,7 @@ architecture rtl of icache is signal req_tag : cache_tag_t; signal req_is_hit : std_ulogic; signal req_is_miss : std_ulogic; - signal req_laddr : std_ulogic_vector(63 downto 0); + signal req_raddr : real_addr_t; signal tlb_req_index : tlb_index_t; signal real_addr : real_addr_t; @@ -235,7 +235,7 @@ architecture rtl of icache is end; -- Return the cache row index (data memory) for an address - function get_row(addr: std_ulogic_vector(63 downto 0)) return row_t is + function get_row(addr: std_ulogic_vector) return row_t is begin return to_integer(unsigned(addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS))); end; @@ -521,8 +521,7 @@ begin -- Calculate address of beginning of cache row, will be -- used for cache miss processing if needed -- - req_laddr <= (63 downto REAL_ADDR_BITS => '0') & - real_addr(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS) & + req_raddr <= real_addr(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS) & (ROW_OFF_BITS-1 downto 0 => '0'); -- Test if pending request is a hit on any way @@ -705,15 +704,15 @@ begin -- Keep track of our index and way for subsequent stores r.store_index <= req_index; - r.store_row <= get_row(req_laddr); + r.store_row <= get_row(req_raddr); r.store_tag <= req_tag; r.store_valid <= '1'; - r.end_row_ix <= get_row_of_line(get_row(req_laddr)) - 1; + r.end_row_ix <= get_row_of_line(get_row(req_raddr)) - 1; -- Prep for first wishbone read. We calculate the address of -- the start of the cache line and start the WB cycle. -- - r.wb.adr <= addr_to_wb(req_laddr); + r.wb.adr <= addr_to_wb(req_raddr); r.wb.cyc <= '1'; r.wb.stb <= '1';