Merge pull request #348 from paulusmack/reduce

Reduce LUT usage
fpu-constant
Michael Neuling 2 years ago committed by GitHub
commit 2b97fb0bf3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -20,10 +20,11 @@ end entity bit_counter;
architecture behaviour of bit_counter is architecture behaviour of bit_counter is
-- signals for count-leading/trailing-zeroes -- signals for count-leading/trailing-zeroes
signal inp : std_ulogic_vector(63 downto 0); signal inp : std_ulogic_vector(63 downto 0);
signal inp_r : std_ulogic_vector(63 downto 0);
signal sum : std_ulogic_vector(64 downto 0); signal sum : std_ulogic_vector(64 downto 0);
signal msb_r : std_ulogic; signal sum_r : std_ulogic_vector(64 downto 0);
signal onehot : std_ulogic_vector(63 downto 0); signal onehot : std_ulogic_vector(63 downto 0);
signal onehot_r : std_ulogic_vector(63 downto 0); signal edge : std_ulogic_vector(63 downto 0);
signal bitnum : std_ulogic_vector(5 downto 0); signal bitnum : std_ulogic_vector(5 downto 0);
signal cntz : std_ulogic_vector(63 downto 0); signal cntz : std_ulogic_vector(63 downto 0);


@ -49,12 +50,13 @@ begin
countzero_r: process(clk) countzero_r: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
msb_r <= sum(64); inp_r <= inp;
onehot_r <= onehot; sum_r <= sum;
end if; end if;
end process; end process;


countzero: process(all) countzero: process(all)
variable bitnum_e, bitnum_o : std_ulogic_vector(5 downto 0);
begin begin
if is_32bit = '0' then if is_32bit = '0' then
if count_right = '0' then if count_right = '0' then
@ -72,12 +74,16 @@ begin
end if; end if;


sum <= std_ulogic_vector(unsigned('0' & not inp) + 1); sum <= std_ulogic_vector(unsigned('0' & not inp) + 1);
onehot <= sum(63 downto 0) and inp;


-- The following occurs after a clock edge -- The following occurs after a clock edge
bitnum <= bit_number(onehot_r); edge <= sum_r(63 downto 0) or inp_r;
bitnum_e := edgelocation(edge, 6);
onehot <= sum_r(63 downto 0) and inp_r;
bitnum_o := bit_number(onehot);
bitnum(5 downto 2) <= bitnum_e(5 downto 2);
bitnum(1 downto 0) <= bitnum_o(1 downto 0);


cntz <= 57x"0" & msb_r & bitnum; cntz <= 57x"0" & sum_r(64) & bitnum;
end process; end process;


popcnt_r: process(clk) popcnt_r: process(clk)

@ -89,9 +89,8 @@ begin
r_int.predicted_taken <= r_next_int.predicted_taken; r_int.predicted_taken <= r_next_int.predicted_taken;
r_int.pred_not_taken <= r_next_int.pred_not_taken; r_int.pred_not_taken <= r_next_int.pred_not_taken;
r_int.predicted_nia <= r_next_int.predicted_nia; r_int.predicted_nia <= r_next_int.predicted_nia;
r_int.rd_is_niap4 <= r_next.sequential; r_int.rd_is_niap4 <= r_next_int.rd_is_niap4;
end if; end if;
r.sequential <= r_next.sequential and advance_nia;
-- always send the up-to-date stop mark and req -- always send the up-to-date stop mark and req
r.stop_mark <= stop_in; r.stop_mark <= stop_in;
r.req <= not rst; r.req <= not rst;
@ -145,11 +144,11 @@ begin
begin begin
v := r; v := r;
v_int := r_int; v_int := r_int;
v.sequential := '0';
v.predicted := '0'; v.predicted := '0';
v.pred_ntaken := '0'; v.pred_ntaken := '0';
v_int.predicted_taken := '0'; v_int.predicted_taken := '0';
v_int.pred_not_taken := '0'; v_int.pred_not_taken := '0';
v_int.rd_is_niap4 := '0';


if rst = '1' then if rst = '1' then
if alt_reset_in = '1' then if alt_reset_in = '1' then
@ -180,7 +179,7 @@ begin
v.nia := r_int.predicted_nia; v.nia := r_int.predicted_nia;
v.predicted := '1'; v.predicted := '1';
else else
v.sequential := '1'; v_int.rd_is_niap4 := '1';
v.pred_ntaken := r_int.pred_not_taken; v.pred_ntaken := r_int.pred_not_taken;
v.nia := std_ulogic_vector(unsigned(r.nia) + 4); v.nia := std_ulogic_vector(unsigned(r.nia) + 4);
if r_int.mode_32bit = '1' then if r_int.mode_32bit = '1' then

@ -28,7 +28,9 @@ package helpers is


function bit_reverse(a: std_ulogic_vector) return std_ulogic_vector; function bit_reverse(a: std_ulogic_vector) return std_ulogic_vector;
function bit_number(a: std_ulogic_vector(63 downto 0)) return std_ulogic_vector; function bit_number(a: std_ulogic_vector(63 downto 0)) return std_ulogic_vector;
function edgelocation(v: std_ulogic_vector; nbits: natural) return std_ulogic_vector;
function count_left_zeroes(val: std_ulogic_vector) return std_ulogic_vector; function count_left_zeroes(val: std_ulogic_vector) return std_ulogic_vector;
function count_right_zeroes(val: std_ulogic_vector) return std_ulogic_vector;
end package helpers; end package helpers;


package body helpers is package body helpers is
@ -247,16 +249,50 @@ package body helpers is
return ret; return ret;
end; end;


-- Count leading zeroes operation -- Assuming the input 'v' is a value of the form 1...10...0,
-- the output is the bit number of the rightmost 1 bit in v.
-- If v is zero, the result is zero.
function edgelocation(v: std_ulogic_vector; nbits: natural) return std_ulogic_vector is
variable p: std_ulogic_vector(nbits - 1 downto 0);
variable stride: natural;
variable b: std_ulogic;
variable k: natural;
begin
stride := 2;
for i in 0 to nbits - 1 loop
b := '0';
for j in 0 to (2**nbits / stride) - 1 loop
k := j * stride;
b := b or (v(k + stride - 1) and not v(k + (stride/2) - 1));
end loop;
p(i) := b;
stride := stride * 2;
end loop;
return p;
end function;

-- Count leading zeroes operations
-- Assumes the value passed in is not zero (if it is, zero is returned) -- Assumes the value passed in is not zero (if it is, zero is returned)
function count_left_zeroes(val: std_ulogic_vector) return std_ulogic_vector is function count_right_zeroes(val: std_ulogic_vector) return std_ulogic_vector is
variable rev: std_ulogic_vector(val'left downto val'right);
variable sum: std_ulogic_vector(val'left downto val'right); variable sum: std_ulogic_vector(val'left downto val'right);
variable onehot: std_ulogic_vector(val'left downto val'right); variable onehot: std_ulogic_vector(val'left downto val'right);
variable edge: std_ulogic_vector(val'left downto val'right);
variable bn, bn_e, bn_o: std_ulogic_vector(5 downto 0);
begin
sum := std_ulogic_vector(- signed(val));
onehot := sum and val;
edge := sum or val;
bn_e := edgelocation(std_ulogic_vector(resize(signed(edge), 64)), 6);
bn_o := bit_number(std_ulogic_vector(resize(unsigned(onehot), 64)));
bn := bn_e(5 downto 2) & bn_o(1 downto 0);
return bn;
end;

function count_left_zeroes(val: std_ulogic_vector) return std_ulogic_vector is
variable rev: std_ulogic_vector(val'left downto val'right);
begin begin
rev := bit_reverse(val); rev := bit_reverse(val);
sum := std_ulogic_vector(- signed(rev)); return count_right_zeroes(rev);
onehot := sum and rev;
return bit_number(std_ulogic_vector(resize(unsigned(onehot), 64)));
end; end;

end package body helpers; end package body helpers;

@ -212,7 +212,6 @@ architecture rtl of icache is
signal ra_valid : std_ulogic; signal ra_valid : std_ulogic;
signal priv_fault : std_ulogic; signal priv_fault : std_ulogic;
signal access_ok : std_ulogic; signal access_ok : std_ulogic;
signal use_previous : std_ulogic;


-- Cache RAM interface -- Cache RAM interface
type cache_ram_out_t is array(way_t) of cache_row_t; type cache_ram_out_t is array(way_t) of cache_row_t;
@ -397,7 +396,7 @@ begin
wr_dat(ii * 8 + 7 downto ii * 8) <= wishbone_in.dat(j * 8 + 7 downto j * 8); wr_dat(ii * 8 + 7 downto ii * 8) <= wishbone_in.dat(j * 8 + 7 downto j * 8);
end loop; end loop;
end if; end if;
do_read <= not (stall_in or use_previous); do_read <= not stall_in;
do_write <= '0'; do_write <= '0';
if wishbone_in.ack = '1' and replace_way = i then if wishbone_in.ack = '1' and replace_way = i then
do_write <= '1'; do_write <= '1';
@ -503,16 +502,6 @@ begin
variable is_hit : std_ulogic; variable is_hit : std_ulogic;
variable hit_way : way_t; variable hit_way : way_t;
begin begin
-- i_in.sequential means that i_in.nia this cycle is 4 more than
-- last cycle. If we read more than 32 bits at a time, had a cache hit
-- last cycle, and we don't want the first 32-bit chunk, then we can
-- keep the data we read last cycle and just use that.
if unsigned(i_in.nia(INSN_BITS+2-1 downto 2)) /= 0 then
use_previous <= i_in.req and i_in.sequential and r.hit_valid;
else
use_previous <= '0';
end if;

-- Extract line, row and tag from request -- Extract line, row and tag from request
req_index <= get_index(i_in.nia); req_index <= get_index(i_in.nia);
req_row <= get_row(i_in.nia); req_row <= get_row(i_in.nia);
@ -542,7 +531,7 @@ begin
end loop; end loop;


-- Generate the "hit" and "miss" signals for the synchronous blocks -- Generate the "hit" and "miss" signals for the synchronous blocks
if i_in.req = '1' and access_ok = '1' and flush_in = '0' and rst = '0' and use_previous = '0' then if i_in.req = '1' and access_ok = '1' and flush_in = '0' and rst = '0' then
req_is_hit <= is_hit; req_is_hit <= is_hit;
req_is_miss <= not is_hit; req_is_miss <= not is_hit;
else else
@ -576,7 +565,7 @@ begin
i_out.next_pred_ntaken <= i_in.pred_ntaken; i_out.next_pred_ntaken <= i_in.pred_ntaken;


-- Stall fetch1 if we have a miss on cache or TLB or a protection fault -- Stall fetch1 if we have a miss on cache or TLB or a protection fault
stall_out <= not (is_hit and access_ok) and not use_previous; stall_out <= not (is_hit and access_ok);


-- Wishbone requests output (from the cache miss reload machine) -- Wishbone requests output (from the cache miss reload machine)
wishbone_out <= r.wb; wishbone_out <= r.wb;
@ -588,8 +577,7 @@ begin
if rising_edge(clk) then if rising_edge(clk) then
-- keep outputs to fetch2 unchanged on a stall -- keep outputs to fetch2 unchanged on a stall
-- except that flush or reset sets valid to 0 -- except that flush or reset sets valid to 0
-- If use_previous, keep the same data as last cycle and use the second half if stall_in = '1' then
if stall_in = '1' or use_previous = '1' then
if rst = '1' or flush_in = '1' then if rst = '1' or flush_in = '1' then
r.hit_valid <= '0'; r.hit_valid <= '0';
end if; end if;

@ -239,12 +239,20 @@ architecture behaviour of soc is
SLAVE_IO_ICP, SLAVE_IO_ICP,
SLAVE_IO_ICS, SLAVE_IO_ICS,
SLAVE_IO_UART1, SLAVE_IO_UART1,
SLAVE_IO_SPI_FLASH_REG, SLAVE_IO_SPI_FLASH,
SLAVE_IO_SPI_FLASH_MAP,
SLAVE_IO_GPIO, SLAVE_IO_GPIO,
SLAVE_IO_EXTERNAL, SLAVE_IO_EXTERNAL);
SLAVE_IO_NONE); signal current_io_decode : slave_io_type;
signal slave_io_dbg : slave_io_type;
signal io_cycle_none : std_ulogic;
signal io_cycle_syscon : std_ulogic;
signal io_cycle_uart : std_ulogic;
signal io_cycle_uart1 : std_ulogic;
signal io_cycle_icp : std_ulogic;
signal io_cycle_ics : std_ulogic;
signal io_cycle_spi_flash : std_ulogic;
signal io_cycle_gpio : std_ulogic;
signal io_cycle_external : std_ulogic;


function wishbone_widen_data(wb : wb_io_master_out) return wishbone_master_out is function wishbone_widen_data(wb : wb_io_master_out) return wishbone_master_out is
variable wwb : wishbone_master_out; variable wwb : wishbone_master_out;
@ -465,14 +473,20 @@ begin
-- Misc -- Misc
variable has_top : boolean; variable has_top : boolean;
variable has_bot : boolean; variable has_bot : boolean;
variable do_cyc : std_ulogic;
variable end_cyc : std_ulogic;
variable slave_io : slave_io_type;
variable match : std_ulogic_vector(31 downto 12);
begin begin
if rising_edge(system_clk) then if rising_edge(system_clk) then
do_cyc := '0';
end_cyc := '0';
if (rst) then if (rst) then
state := IDLE; state := IDLE;
wb_io_out.ack <= '0'; wb_io_out.ack <= '0';
wb_io_out.stall <= '0'; wb_io_out.stall <= '0';
wb_sio_out.cyc <= '0';
wb_sio_out.stb <= '0'; wb_sio_out.stb <= '0';
end_cyc := '1';
has_top := false; has_top := false;
has_bot := false; has_bot := false;
else else
@ -488,7 +502,7 @@ begin
wb_io_out.stall <= '1'; wb_io_out.stall <= '1';


-- Start cycle downstream -- Start cycle downstream
wb_sio_out.cyc <= '1'; do_cyc := '1';
wb_sio_out.stb <= '1'; wb_sio_out.stb <= '1';


-- Copy write enable to IO out, copy address as well -- Copy write enable to IO out, copy address as well
@ -551,8 +565,8 @@ begin
-- Wait for new ack -- Wait for new ack
state := WAIT_ACK_TOP; state := WAIT_ACK_TOP;
else else
-- We are done, ack up, clear cyc downstram -- We are done, ack up, clear cyc downstream
wb_sio_out.cyc <= '0'; end_cyc := '1';


-- And ack & unstall upstream -- And ack & unstall upstream
wb_io_out.ack <= '1'; wb_io_out.ack <= '1';
@ -576,7 +590,7 @@ begin
end if; end if;


-- We are done, ack up, clear cyc downstram -- We are done, ack up, clear cyc downstram
wb_sio_out.cyc <= '0'; end_cyc := '1';


-- And ack & unstall upstream -- And ack & unstall upstream
wb_io_out.ack <= '1'; wb_io_out.ack <= '1';
@ -587,144 +601,149 @@ begin
end if; end if;
end case; end case;
end if; end if;

-- Create individual registered cycle signals for the wishbones
-- going to the various peripherals
if do_cyc = '1' or end_cyc = '1' then
io_cycle_none <= '0';
io_cycle_syscon <= '0';
io_cycle_uart <= '0';
io_cycle_uart1 <= '0';
io_cycle_icp <= '0';
io_cycle_ics <= '0';
io_cycle_spi_flash <= '0';
io_cycle_gpio <= '0';
io_cycle_external <= '0';
wb_sio_out.cyc <= '0';
wb_ext_is_dram_init <= '0';
wb_spiflash_is_map <= '0';
wb_spiflash_is_reg <= '0';
wb_ext_is_dram_csr <= '0';
wb_ext_is_eth <= '0';
wb_ext_is_sdcard <= '0';
end if;
if do_cyc = '1' then
-- Decode I/O address
-- This is real address bits 29 downto 12
match := "11" & wb_io_in.adr(26 downto 9);
slave_io := SLAVE_IO_SYSCON;
if std_match(match, x"FF---") and HAS_DRAM then
slave_io := SLAVE_IO_EXTERNAL;
io_cycle_external <= '1';
wb_ext_is_dram_init <= '1';
elsif std_match(match, x"F----") then
slave_io := SLAVE_IO_SPI_FLASH;
io_cycle_spi_flash <= '1';
wb_spiflash_is_map <= '1';
elsif std_match(match, x"C8---") then
-- Ext IO "chip selects"
if std_match(match, x"--00-") and HAS_DRAM then
slave_io := SLAVE_IO_EXTERNAL;
io_cycle_external <= '1';
wb_ext_is_dram_csr <= '1';
elsif (std_match(match, x"--02-") or std_match(match, x"--03-")) and
HAS_LITEETH then
slave_io := SLAVE_IO_EXTERNAL;
io_cycle_external <= '1';
wb_ext_is_eth <= '1';
elsif std_match(match, x"--04-") and HAS_SD_CARD then
slave_io := SLAVE_IO_EXTERNAL;
io_cycle_external <= '1';
wb_ext_is_sdcard <= '1';
else
io_cycle_none <= '1';
end if;
elsif std_match(match, x"C0000") then
slave_io := SLAVE_IO_SYSCON;
io_cycle_syscon <= '1';
elsif std_match(match, x"C0002") then
slave_io := SLAVE_IO_UART;
io_cycle_uart <= '1';
elsif std_match(match, x"C0003") then
slave_io := SLAVE_IO_UART1;
io_cycle_uart1 <= '1';
elsif std_match(match, x"C0004") then
slave_io := SLAVE_IO_ICP;
io_cycle_icp <= '1';
elsif std_match(match, x"C0005") then
slave_io := SLAVE_IO_ICS;
io_cycle_ics <= '1';
elsif std_match(match, x"C0006") then
slave_io := SLAVE_IO_SPI_FLASH;
io_cycle_spi_flash <= '1';
wb_spiflash_is_reg <= '1';
elsif std_match(match, x"C0007") then
slave_io := SLAVE_IO_GPIO;
io_cycle_gpio <= '1';
else
io_cycle_none <= '1';
end if;
current_io_decode <= slave_io;
wb_sio_out.cyc <= '1';
end if;
end if; end if;
end process; end process;
-- IO wishbone slave intercon. -- IO wishbone slave interconnect.
-- --
slave_io_intercon: process(wb_sio_out, wb_syscon_out, wb_uart0_out, wb_uart1_out, slave_io_intercon: process(all)
wb_ext_io_out, wb_xics_icp_out, wb_xics_ics_out,
wb_spiflash_out)
variable slave_io : slave_io_type;

variable match : std_ulogic_vector(31 downto 12);
variable ext_valid : boolean;
begin begin

-- Simple address decoder.
slave_io := SLAVE_IO_NONE;
match := "11" & wb_sio_out.adr(27 downto 10);
if std_match(match, x"FF---") and HAS_DRAM then
slave_io := SLAVE_IO_EXTERNAL;
elsif std_match(match, x"F----") then
slave_io := SLAVE_IO_SPI_FLASH_MAP;
elsif std_match(match, x"C0000") then
slave_io := SLAVE_IO_SYSCON;
elsif std_match(match, x"C0002") then
slave_io := SLAVE_IO_UART;
elsif std_match(match, x"C0003") then
slave_io := SLAVE_IO_UART1;
elsif std_match(match, x"C8---") then
slave_io := SLAVE_IO_EXTERNAL;
elsif std_match(match, x"C0004") then
slave_io := SLAVE_IO_ICP;
elsif std_match(match, x"C0005") then
slave_io := SLAVE_IO_ICS;
elsif std_match(match, x"C0006") then
slave_io := SLAVE_IO_SPI_FLASH_REG;
elsif std_match(match, x"C0007") then
slave_io := SLAVE_IO_GPIO;
end if;
slave_io_dbg <= slave_io;
wb_uart0_in <= wb_sio_out; wb_uart0_in <= wb_sio_out;
wb_uart0_in.cyc <= '0'; wb_uart0_in.cyc <= io_cycle_uart;
wb_uart1_in <= wb_sio_out; wb_uart1_in <= wb_sio_out;
wb_uart1_in.cyc <= '0'; wb_uart1_in.cyc <= io_cycle_uart1;

wb_spiflash_in <= wb_sio_out; wb_spiflash_in <= wb_sio_out;
wb_spiflash_in.cyc <= '0'; wb_spiflash_in.cyc <= io_cycle_spi_flash;
wb_spiflash_is_reg <= '0'; -- Clear top bits so they don't make their way to the
wb_spiflash_is_map <= '0'; -- flash chip.
wb_spiflash_in.adr(27 downto 26) <= "00";

wb_gpio_in <= wb_sio_out; wb_gpio_in <= wb_sio_out;
wb_gpio_in.cyc <= '0'; wb_gpio_in.cyc <= io_cycle_gpio;


-- Only give xics 8 bits of wb addr (for now...) -- Only give xics 8 bits of wb addr (for now...)
wb_xics_icp_in <= wb_sio_out; wb_xics_icp_in <= wb_sio_out;
wb_xics_icp_in.adr <= (others => '0'); wb_xics_icp_in.adr <= (others => '0');
wb_xics_icp_in.adr(5 downto 0) <= wb_sio_out.adr(5 downto 0); wb_xics_icp_in.adr(5 downto 0) <= wb_sio_out.adr(5 downto 0);
wb_xics_icp_in.cyc <= '0'; wb_xics_icp_in.cyc <= io_cycle_icp;
wb_xics_ics_in <= wb_sio_out; wb_xics_ics_in <= wb_sio_out;
wb_xics_ics_in.adr <= (others => '0'); wb_xics_ics_in.adr <= (others => '0');
wb_xics_ics_in.adr(9 downto 0) <= wb_sio_out.adr(9 downto 0); wb_xics_ics_in.adr(9 downto 0) <= wb_sio_out.adr(9 downto 0);
wb_xics_ics_in.cyc <= '0'; wb_xics_ics_in.cyc <= io_cycle_ics;


wb_ext_io_in <= wb_sio_out; wb_ext_io_in <= wb_sio_out;
wb_ext_io_in.cyc <= '0'; wb_ext_io_in.cyc <= io_cycle_external;


wb_syscon_in <= wb_sio_out; wb_syscon_in <= wb_sio_out;
wb_syscon_in.cyc <= '0'; wb_syscon_in.cyc <= io_cycle_syscon;

wb_ext_is_dram_csr <= '0';
wb_ext_is_dram_init <= '0';
wb_ext_is_eth <= '0';
wb_ext_is_sdcard <= '0';


-- Default response, ack & return all 1's case current_io_decode is
wb_sio_in.dat <= (others => '1');
wb_sio_in.ack <= wb_sio_out.stb and wb_sio_out.cyc;
wb_sio_in.stall <= '0';

case slave_io is
when SLAVE_IO_EXTERNAL => when SLAVE_IO_EXTERNAL =>
-- Ext IO "chip selects" wb_sio_in <= wb_ext_io_out;
--
-- DRAM init is special at 0xFF* so we just test the top
-- bit. Everything else is at 0xC8* so we test only bits
-- 23 downto 16 (21 downto 14 in the wishbone addr).
--
ext_valid := false;
if wb_sio_out.adr(27) = '1' and HAS_DRAM then -- DRAM init is special
wb_ext_is_dram_init <= '1';
ext_valid := true;
elsif wb_sio_out.adr(21 downto 14) = x"00" and HAS_DRAM then
wb_ext_is_dram_csr <= '1';
ext_valid := true;
elsif wb_sio_out.adr(21 downto 14) = x"02" and HAS_LITEETH then
wb_ext_is_eth <= '1';
ext_valid := true;
elsif wb_sio_out.adr(21 downto 14) = x"03" and HAS_LITEETH then
wb_ext_is_eth <= '1';
ext_valid := true;
elsif wb_sio_out.adr(21 downto 14) = x"04" and HAS_SD_CARD then
wb_ext_is_sdcard <= '1';
ext_valid := true;
end if;
if ext_valid then
wb_ext_io_in.cyc <= wb_sio_out.cyc;
wb_sio_in <= wb_ext_io_out;
end if;

when SLAVE_IO_SYSCON => when SLAVE_IO_SYSCON =>
wb_syscon_in.cyc <= wb_sio_out.cyc;
wb_sio_in <= wb_syscon_out; wb_sio_in <= wb_syscon_out;
when SLAVE_IO_UART => when SLAVE_IO_UART =>
wb_uart0_in.cyc <= wb_sio_out.cyc;
wb_sio_in <= wb_uart0_out; wb_sio_in <= wb_uart0_out;
when SLAVE_IO_ICP => when SLAVE_IO_ICP =>
wb_xics_icp_in.cyc <= wb_sio_out.cyc;
wb_sio_in <= wb_xics_icp_out; wb_sio_in <= wb_xics_icp_out;
when SLAVE_IO_ICS => when SLAVE_IO_ICS =>
wb_xics_ics_in.cyc <= wb_sio_out.cyc;
wb_sio_in <= wb_xics_ics_out; wb_sio_in <= wb_xics_ics_out;
when SLAVE_IO_UART1 => when SLAVE_IO_UART1 =>
wb_uart1_in.cyc <= wb_sio_out.cyc;
wb_sio_in <= wb_uart1_out; wb_sio_in <= wb_uart1_out;
when SLAVE_IO_SPI_FLASH_MAP => when SLAVE_IO_SPI_FLASH =>
-- Clear top bits so they don't make their way to the
-- fash chip.
wb_spiflash_in.adr(27 downto 26) <= "00";
wb_spiflash_in.cyc <= wb_sio_out.cyc;
wb_sio_in <= wb_spiflash_out; wb_sio_in <= wb_spiflash_out;
wb_spiflash_is_map <= '1';
when SLAVE_IO_SPI_FLASH_REG =>
wb_spiflash_in.cyc <= wb_sio_out.cyc;
wb_sio_in <= wb_spiflash_out;
wb_spiflash_is_reg <= '1';
when SLAVE_IO_GPIO => when SLAVE_IO_GPIO =>
wb_gpio_in.cyc <= wb_sio_out.cyc;
wb_sio_in <= wb_gpio_out; wb_sio_in <= wb_gpio_out;
when others =>
end case; end case;


-- Default response, ack & return all 1's
if io_cycle_none = '1' then
wb_sio_in.dat <= (others => '1');
wb_sio_in.ack <= wb_sio_out.stb and wb_sio_out.cyc;
wb_sio_in.stall <= '0';
end if;

end process; end process;


-- Syscon slave -- Syscon slave

@ -54,9 +54,6 @@ architecture behaviour of xics_icp is


signal r, r_next : reg_internal_t; signal r, r_next : reg_internal_t;


-- hardwire the hardware IRQ priority
constant HW_PRIORITY : std_ulogic_vector(7 downto 0) := x"80";

-- 8 bit offsets for each presentation -- 8 bit offsets for each presentation
constant XIRR_POLL : std_ulogic_vector(7 downto 0) := x"00"; constant XIRR_POLL : std_ulogic_vector(7 downto 0) := x"00";
constant XIRR : std_ulogic_vector(7 downto 0) := x"04"; constant XIRR : std_ulogic_vector(7 downto 0) := x"04";
@ -207,12 +204,14 @@ use ieee.numeric_std.all;


library work; library work;
use work.common.all; use work.common.all;
use work.utils.all;
use work.wishbone_types.all; use work.wishbone_types.all;
use work.helpers.all;


entity xics_ics is entity xics_ics is
generic ( generic (
SRC_NUM : integer range 1 to 256 := 16; SRC_NUM : integer range 1 to 256 := 16;
PRIO_BITS : integer range 1 to 8 := 8 PRIO_BITS : integer range 1 to 8 := 3
); );
port ( port (
clk : in std_logic; clk : in std_logic;
@ -228,12 +227,16 @@ end xics_ics;


architecture rtl of xics_ics is architecture rtl of xics_ics is


constant SRC_NUM_BITS : natural := log2(SRC_NUM);

subtype pri_t is std_ulogic_vector(PRIO_BITS-1 downto 0); subtype pri_t is std_ulogic_vector(PRIO_BITS-1 downto 0);
type xive_t is record type xive_t is record
pri : pri_t; pri : pri_t;
end record; end record;
constant pri_masked : pri_t := (others => '1'); constant pri_masked : pri_t := (others => '1');


subtype pri_vector_t is std_ulogic_vector(2**PRIO_BITS - 1 downto 0);

type xive_array_t is array(0 to SRC_NUM-1) of xive_t; type xive_array_t is array(0 to SRC_NUM-1) of xive_t;
signal xives : xive_array_t; signal xives : xive_array_t;


@ -262,8 +265,15 @@ architecture rtl of xics_ics is
end function; end function;


function prio_pack(pri8: std_ulogic_vector(7 downto 0)) return pri_t is function prio_pack(pri8: std_ulogic_vector(7 downto 0)) return pri_t is
variable masked : std_ulogic_vector(7 downto 0);
begin begin
return pri8(PRIO_BITS-1 downto 0); masked := x"00";
masked(PRIO_BITS - 1 downto 0) := (others => '1');
if pri8 >= masked then
return pri_masked;
else
return pri8(PRIO_BITS-1 downto 0);
end if;
end function; end function;


function prio_unpack(pri: pri_t) return std_ulogic_vector is function prio_unpack(pri: pri_t) return std_ulogic_vector is
@ -276,8 +286,27 @@ architecture rtl of xics_ics is
r(PRIO_BITS-1 downto 0) := pri; r(PRIO_BITS-1 downto 0) := pri;
end if; end if;
return r; return r;
end function; end function;


function prio_decode(pri: pri_t) return pri_vector_t is
variable v: pri_vector_t;
begin
v := (others => '0');
v(to_integer(unsigned(pri))) := '1';
return v;
end function;

-- Assumes nbits <= 6; v is 2^nbits wide
function priority_encoder(v: std_ulogic_vector; nbits: natural) return std_ulogic_vector is
variable h: std_ulogic_vector(2**nbits - 1 downto 0);
variable p: std_ulogic_vector(5 downto 0);
begin
-- Set the lowest-priority (highest-numbered) bit
h := v;
h(2**nbits - 1) := '1';
p := count_right_zeroes(h);
return p(nbits - 1 downto 0);
end function;


-- Register map -- Register map
-- 0 : Config -- 0 : Config
@ -391,35 +420,33 @@ begin
end process; end process;


irq_gen: process(all) irq_gen: process(all)
variable max_idx : integer range 0 to SRC_NUM-1; variable max_idx : std_ulogic_vector(SRC_NUM_BITS - 1 downto 0);
variable max_pri : pri_t; variable max_pri : pri_t;

variable pending_pri : pri_vector_t;
-- A more favored than b ? variable pending_at_pri : std_ulogic_vector(SRC_NUM - 1 downto 0);
function a_mf_b(a: pri_t; b: pri_t) return boolean is
variable a_i : unsigned(PRIO_BITS-1 downto 0);
variable b_i : unsigned(PRIO_BITS-1 downto 0);
begin
a_i := unsigned(a);
b_i := unsigned(b);
report "a_mf_b a=" & to_hstring(a) &
" b=" & to_hstring(b) &
" r=" & boolean'image(a < b);
return a_i < b_i;
end function;
begin begin
-- XXX FIXME: Use a tree -- Work out the most-favoured (lowest) priority of the pending interrupts
max_pri := pri_masked; pending_pri := (others => '0');
max_idx := 0;
for i in 0 to SRC_NUM - 1 loop for i in 0 to SRC_NUM - 1 loop
if int_level_l(i) = '1' and a_mf_b(xives(i).pri, max_pri) then if int_level_l(i) = '1' then
max_pri := xives(i).pri; pending_pri := pending_pri or prio_decode(xives(i).pri);
max_idx := i;
end if; end if;
end loop; end loop;
max_pri := priority_encoder(pending_pri, PRIO_BITS);

-- Work out which interrupts are pending at that priority
pending_at_pri := (others => '0');
for i in 0 to SRC_NUM - 1 loop
if int_level_l(i) = '1' and xives(i).pri = max_pri then
pending_at_pri(i) := '1';
end if;
end loop;
max_idx := priority_encoder(pending_at_pri, SRC_NUM_BITS);

if max_pri /= pri_masked then if max_pri /= pri_masked then
report "MFI: " & integer'image(max_idx) & " pri=" & to_hstring(prio_unpack(max_pri)); report "MFI: " & integer'image(to_integer(unsigned(max_idx))) & " pri=" & to_hstring(prio_unpack(max_pri));
end if; end if;
icp_out_next.src <= std_ulogic_vector(to_unsigned(max_idx, 4)); icp_out_next.src <= max_idx;
icp_out_next.pri <= prio_unpack(max_pri); icp_out_next.pri <= prio_unpack(max_pri);
end process; end process;



Loading…
Cancel
Save