From 21ab36a0c0cc05501d036a160f3002d21b7cd9cd Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 29 Jul 2022 20:29:26 +1000 Subject: [PATCH] Pre-decode instructions when writing them to icache This splits out the decoding done in the decode0 step into a separate predecoder, used when writing instructions into the icache. The icache now holds 36 bits per instruction rather than 32. For valid instructions, those 36 bits comprise the bottom 26 bits of the instruction word, a 9-bit insn_code value (which uniquely identifies the instruction), and a zero in the MSB. For illegal instructions, the MSB is one and the full instruction word is in the bottom 32 bits. Having the full instruction word available for illegal instructions means that it can be printed in the log when simulating, or in future could be placed in the HEIR register. If we don't have an FPU, then the floating-point instructions are regarded as illegal. In that case, the insn_code values would fit into 8 bits, which could be used in future to reduce the size of decode_rom from 512 to 256 entries. Signed-off-by: Paul Mackerras --- Makefile | 2 +- cache_ram.vhdl | 11 +- common.vhdl | 3 +- core.vhdl | 1 + decode1.vhdl | 691 +++++-------------------------------------------- icache.vhdl | 127 ++++++--- microwatt.core | 1 + predecode.vhdl | 582 +++++++++++++++++++++++++++++++++++++++++ 8 files changed, 745 insertions(+), 673 deletions(-) create mode 100644 predecode.vhdl diff --git a/Makefile b/Makefile index 85a0fee..ebb1b79 100644 --- a/Makefile +++ b/Makefile @@ -56,7 +56,7 @@ all = core_tb icache_tb dcache_tb dmi_dtm_tb \ all: $(all) core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \ - utils.vhdl plru.vhdl cache_ram.vhdl icache.vhdl \ + utils.vhdl plru.vhdl cache_ram.vhdl icache.vhdl predecode.vhdl \ decode1.vhdl helpers.vhdl insn_helpers.vhdl \ control.vhdl decode2.vhdl register_file.vhdl \ cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \ diff --git a/cache_ram.vhdl b/cache_ram.vhdl index 8c8c495..641917f 100644 --- a/cache_ram.vhdl +++ b/cache_ram.vhdl @@ -7,6 +7,7 @@ entity cache_ram is generic( ROW_BITS : integer := 16; WIDTH : integer := 64; + BYTEWID : integer := 8; TRACE : boolean := false; ADD_BUF : boolean := false ); @@ -16,7 +17,7 @@ entity cache_ram is rd_en : in std_logic; rd_addr : in std_logic_vector(ROW_BITS - 1 downto 0); rd_data : out std_logic_vector(WIDTH - 1 downto 0); - wr_sel : in std_logic_vector(WIDTH/8 - 1 downto 0); + wr_sel : in std_logic_vector(WIDTH/BYTEWID - 1 downto 0); wr_addr : in std_logic_vector(ROW_BITS - 1 downto 0); wr_data : in std_logic_vector(WIDTH - 1 downto 0) ); @@ -38,7 +39,7 @@ begin variable lbit : integer range 0 to WIDTH - 1; variable mbit : integer range 0 to WIDTH - 1; variable widx : integer range 0 to SIZE - 1; - constant sel0 : std_logic_vector(WIDTH/8 - 1 downto 0) + constant sel0 : std_logic_vector(WIDTH/BYTEWID - 1 downto 0) := (others => '0'); begin if rising_edge(clk) then @@ -49,9 +50,9 @@ begin " dat:" & to_hstring(wr_data); end if; end if; - for i in 0 to WIDTH/8-1 loop - lbit := i * 8; - mbit := lbit + 7; + for i in 0 to WIDTH/BYTEWID-1 loop + lbit := i * BYTEWID; + mbit := lbit + BYTEWID - 1; widx := to_integer(unsigned(wr_addr)); if wr_sel(i) = '1' then ram(widx)(mbit downto lbit) <= wr_data(mbit downto lbit); diff --git a/common.vhdl b/common.vhdl index a698ba9..6287be5 100644 --- a/common.vhdl +++ b/common.vhdl @@ -246,12 +246,13 @@ package common is fetch_failed: std_ulogic; nia: std_ulogic_vector(63 downto 0); insn: std_ulogic_vector(31 downto 0); + icode: insn_code; big_endian: std_ulogic; next_predicted: std_ulogic; next_pred_ntaken: std_ulogic; end record; constant IcacheToDecode1Init : IcacheToDecode1Type := - (nia => (others => '0'), insn => (others => '0'), others => '0'); + (nia => (others => '0'), insn => (others => '0'), icode => INSN_illegal, others => '0'); type IcacheEventType is record icache_miss : std_ulogic; diff --git a/core.vhdl b/core.vhdl index 764141a..0624000 100644 --- a/core.vhdl +++ b/core.vhdl @@ -246,6 +246,7 @@ begin icache_0: entity work.icache generic map( SIM => SIM, + HAS_FPU => HAS_FPU, LINE_SIZE => 64, NUM_LINES => ICACHE_NUM_LINES, NUM_WAYS => ICACHE_NUM_WAYS, diff --git a/decode1.vhdl b/decode1.vhdl index d017912..559a505 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -31,22 +31,6 @@ entity decode1 is end entity decode1; architecture behaviour of decode1 is - type dc0_t is record - f_in : IcacheToDecode1Type; - use_row : std_ulogic; - br_pred : std_ulogic; - override : std_ulogic; - ov_insn : insn_code; - spr_info : spr_id; - ram_spr : ram_spr_info; - end record; - constant dc0_t_init : dc0_t := - (f_in => IcacheToDecode1Init, ov_insn => INSN_illegal, - spr_info => spr_id_init, ram_spr => ram_spr_info_init, - others => '0'); - - signal dc0, dc0in : dc0_t; - signal r, rin : Decode1ToDecode2Type; signal f, fin : Decode1ToFetch1Type; @@ -58,437 +42,10 @@ architecture behaviour of decode1 is signal br, br_in : br_predictor_t; - signal maj_rom_addr : std_ulogic_vector(10 downto 0); - signal row_rom_addr : std_ulogic_vector(10 downto 0); - signal major_predecode : insn_code; - signal row_predecode : insn_code; - signal decode_rom_addr : insn_code; signal decode : decode_rom_t; - signal rom_ce : std_ulogic; - - type predecoder_rom_t is array(0 to 2047) of insn_code; - - constant major_predecode_rom : predecoder_rom_t := ( - 2#001100_00000# to 2#001100_11111# => INSN_addic, - 2#001101_00000# to 2#001101_11111# => INSN_addic_dot, - 2#001110_00000# to 2#001110_11111# => INSN_addi, - 2#001111_00000# to 2#001111_11111# => INSN_addis, - 2#010011_00100# to 2#010011_00101# => INSN_addpcis, - 2#011100_00000# to 2#011100_11111# => INSN_andi_dot, - 2#011101_00000# to 2#011101_11111# => INSN_andis_dot, - 2#000000_00000# => INSN_attn, - 2#010010_00000# to 2#010010_11111# => INSN_b, - 2#010000_00000# to 2#010000_11111# => INSN_bc, - 2#001011_00000# to 2#001011_11111# => INSN_cmpi, - 2#001010_00000# to 2#001010_11111# => INSN_cmpli, - 2#100010_00000# to 2#100010_11111# => INSN_lbz, - 2#100011_00000# to 2#100011_11111# => INSN_lbzu, - 2#110010_00000# to 2#110010_11111# => INSN_lfd, - 2#110011_00000# to 2#110011_11111# => INSN_lfdu, - 2#110000_00000# to 2#110000_11111# => INSN_lfs, - 2#110001_00000# to 2#110001_11111# => INSN_lfsu, - 2#101010_00000# to 2#101010_11111# => INSN_lha, - 2#101011_00000# to 2#101011_11111# => INSN_lhau, - 2#101000_00000# to 2#101000_11111# => INSN_lhz, - 2#101001_00000# to 2#101001_11111# => INSN_lhzu, - 2#100000_00000# to 2#100000_11111# => INSN_lwz, - 2#100001_00000# to 2#100001_11111# => INSN_lwzu, - 2#000111_00000# to 2#000111_11111# => INSN_mulli, - 2#011000_00000# to 2#011000_11111# => INSN_ori, - 2#011001_00000# to 2#011001_11111# => INSN_oris, - 2#010100_00000# to 2#010100_11111# => INSN_rlwimi, - 2#010101_00000# to 2#010101_11111# => INSN_rlwinm, - 2#010111_00000# to 2#010111_11111# => INSN_rlwnm, - 2#010001_00000# to 2#010001_11111# => INSN_sc, - 2#100110_00000# to 2#100110_11111# => INSN_stb, - 2#100111_00000# to 2#100111_11111# => INSN_stbu, - 2#110110_00000# to 2#110110_11111# => INSN_stfd, - 2#110111_00000# to 2#110111_11111# => INSN_stfdu, - 2#110100_00000# to 2#110100_11111# => INSN_stfs, - 2#110101_00000# to 2#110101_11111# => INSN_stfsu, - 2#101100_00000# to 2#101100_11111# => INSN_sth, - 2#101101_00000# to 2#101101_11111# => INSN_sthu, - 2#100100_00000# to 2#100100_11111# => INSN_stw, - 2#100101_00000# to 2#100101_11111# => INSN_stwu, - 2#001000_00000# to 2#001000_11111# => INSN_subfic, - 2#000010_00000# to 2#000010_11111# => INSN_tdi, - 2#000011_00000# to 2#000011_11111# => INSN_twi, - 2#011010_00000# to 2#011010_11111# => INSN_xori, - 2#011011_00000# to 2#011011_11111# => INSN_xoris, - -- major opcode 4 - 2#000100_10000# => INSN_maddhd, - 2#000100_10001# => INSN_maddhdu, - 2#000100_10011# => INSN_maddld, - -- major opcode 30 - 2#011110_01000# to 2#011110_01001# => INSN_rldic, - 2#011110_01010# to 2#011110_01011# => INSN_rldic, - 2#011110_00000# to 2#011110_00001# => INSN_rldicl, - 2#011110_00010# to 2#011110_00011# => INSN_rldicl, - 2#011110_00100# to 2#011110_00101# => INSN_rldicr, - 2#011110_00110# to 2#011110_00111# => INSN_rldicr, - 2#011110_01100# to 2#011110_01101# => INSN_rldimi, - 2#011110_01110# to 2#011110_01111# => INSN_rldimi, - 2#011110_10000# to 2#011110_10001# => INSN_rldcl, - 2#011110_10010# to 2#011110_10011# => INSN_rldcr, - -- major opcode 58 - 2#111010_00000# => INSN_ld, - 2#111010_00001# => INSN_ldu, - 2#111010_00010# => INSN_lwa, - 2#111010_00100# => INSN_ld, - 2#111010_00101# => INSN_ldu, - 2#111010_00110# => INSN_lwa, - 2#111010_01000# => INSN_ld, - 2#111010_01001# => INSN_ldu, - 2#111010_01010# => INSN_lwa, - 2#111010_01100# => INSN_ld, - 2#111010_01101# => INSN_ldu, - 2#111010_01110# => INSN_lwa, - 2#111010_10000# => INSN_ld, - 2#111010_10001# => INSN_ldu, - 2#111010_10010# => INSN_lwa, - 2#111010_10100# => INSN_ld, - 2#111010_10101# => INSN_ldu, - 2#111010_10110# => INSN_lwa, - 2#111010_11000# => INSN_ld, - 2#111010_11001# => INSN_ldu, - 2#111010_11010# => INSN_lwa, - 2#111010_11100# => INSN_ld, - 2#111010_11101# => INSN_ldu, - 2#111010_11110# => INSN_lwa, - -- major opcode 59 - 2#111011_00100# to 2#111011_00101# => INSN_fdivs, - 2#111011_01000# to 2#111011_01001# => INSN_fsubs, - 2#111011_01010# to 2#111011_01011# => INSN_fadds, - 2#111011_01100# to 2#111011_01101# => INSN_fsqrts, - 2#111011_10000# to 2#111011_10001# => INSN_fres, - 2#111011_10010# to 2#111011_10011# => INSN_fmuls, - 2#111011_10100# to 2#111011_10101# => INSN_frsqrtes, - 2#111011_11000# to 2#111011_11001# => INSN_fmsubs, - 2#111011_11010# to 2#111011_11011# => INSN_fmadds, - 2#111011_11100# to 2#111011_11101# => INSN_fnmsubs, - 2#111011_11110# to 2#111011_11111# => INSN_fnmadds, - -- major opcode 62 - 2#111110_00000# => INSN_std, - 2#111110_00001# => INSN_stdu, - 2#111110_00100# => INSN_std, - 2#111110_00101# => INSN_stdu, - 2#111110_01000# => INSN_std, - 2#111110_01001# => INSN_stdu, - 2#111110_01100# => INSN_std, - 2#111110_01101# => INSN_stdu, - 2#111110_10000# => INSN_std, - 2#111110_10001# => INSN_stdu, - 2#111110_10100# => INSN_std, - 2#111110_10101# => INSN_stdu, - 2#111110_11000# => INSN_std, - 2#111110_11001# => INSN_stdu, - 2#111110_11100# => INSN_std, - 2#111110_11101# => INSN_stdu, - -- major opcode 63 - 2#111111_00100# to 2#111111_00101# => INSN_fdiv, - 2#111111_01000# to 2#111111_01001# => INSN_fsub, - 2#111111_01010# to 2#111111_01011# => INSN_fadd, - 2#111111_01100# to 2#111111_01101# => INSN_fsqrt, - 2#111111_01110# to 2#111111_01111# => INSN_fsel, - 2#111111_10000# to 2#111111_10001# => INSN_fre, - 2#111111_10010# to 2#111111_10011# => INSN_fmul, - 2#111111_10100# to 2#111111_10101# => INSN_frsqrte, - 2#111111_11000# to 2#111111_11001# => INSN_fmsub, - 2#111111_11010# to 2#111111_11011# => INSN_fmadd, - 2#111111_11100# to 2#111111_11101# => INSN_fnmsub, - 2#111111_11110# to 2#111111_11111# => INSN_fnmadd, - others => INSN_illegal - ); - constant row_predecode_rom : predecoder_rom_t := ( - -- Major opcode 31 - -- Address bits are 0, insn(10:1) - 2#0_01000_01010# => INSN_add, - 2#0_11000_01010# => INSN_add, -- addo - 2#0_00000_01010# => INSN_addc, - 2#0_10000_01010# => INSN_addc, -- addco - 2#0_00100_01010# => INSN_adde, - 2#0_10100_01010# => INSN_adde, -- addeo - 2#0_00101_01010# => INSN_addex, - 2#0_00010_01010# => INSN_addg6s, - 2#0_00111_01010# => INSN_addme, - 2#0_10111_01010# => INSN_addme, -- addmeo - 2#0_00110_01010# => INSN_addze, - 2#0_10110_01010# => INSN_addze, -- addzeo - 2#0_00000_11100# => INSN_and, - 2#0_00001_11100# => INSN_andc, - 2#0_00111_11100# => INSN_bperm, - 2#0_01001_11010# => INSN_cbcdtd, - 2#0_01000_11010# => INSN_cdtbcd, - 2#0_00000_00000# => INSN_cmp, - 2#0_01111_11100# => INSN_cmpb, - 2#0_00111_00000# => INSN_cmpeqb, - 2#0_00001_00000# => INSN_cmpl, - 2#0_00110_00000# => INSN_cmprb, - 2#0_00001_11010# => INSN_cntlzd, - 2#0_00000_11010# => INSN_cntlzw, - 2#0_10001_11010# => INSN_cnttzd, - 2#0_10000_11010# => INSN_cnttzw, - 2#0_10111_10011# => INSN_darn, - 2#0_00010_10110# => INSN_dcbf, - 2#0_00001_10110# => INSN_dcbst, - 2#0_01000_10110# => INSN_dcbt, - 2#0_00111_10110# => INSN_dcbtst, - 2#0_11111_10110# => INSN_dcbz, - 2#0_01100_01001# => INSN_divdeu, - 2#0_11100_01001# => INSN_divdeu, -- divdeuo - 2#0_01100_01011# => INSN_divweu, - 2#0_11100_01011# => INSN_divweu, -- divweuo - 2#0_01101_01001# => INSN_divde, - 2#0_11101_01001# => INSN_divde, -- divdeo - 2#0_01101_01011# => INSN_divwe, - 2#0_11101_01011# => INSN_divwe, -- divweo - 2#0_01110_01001# => INSN_divdu, - 2#0_11110_01001# => INSN_divdu, -- divduo - 2#0_01110_01011# => INSN_divwu, - 2#0_11110_01011# => INSN_divwu, -- divwuo - 2#0_01111_01001# => INSN_divd, - 2#0_11111_01001# => INSN_divd, -- divdo - 2#0_01111_01011# => INSN_divw, - 2#0_11111_01011# => INSN_divw, -- divwo - 2#0_11001_10110# => INSN_nop, -- dss - 2#0_01010_10110# => INSN_nop, -- dst - 2#0_01011_10110# => INSN_nop, -- dstst - 2#0_11010_10110# => INSN_eieio, - 2#0_01000_11100# => INSN_eqv, - 2#0_11101_11010# => INSN_extsb, - 2#0_11100_11010# => INSN_extsh, - 2#0_11110_11010# => INSN_extsw, - 2#0_11011_11010# => INSN_extswsli, - 2#0_11011_11011# => INSN_extswsli, - 2#0_11110_10110# => INSN_icbi, - 2#0_00000_10110# => INSN_icbt, - 2#0_00000_01111# => INSN_isel, - 2#0_00001_01111# => INSN_isel, - 2#0_00010_01111# => INSN_isel, - 2#0_00011_01111# => INSN_isel, - 2#0_00100_01111# => INSN_isel, - 2#0_00101_01111# => INSN_isel, - 2#0_00110_01111# => INSN_isel, - 2#0_00111_01111# => INSN_isel, - 2#0_01000_01111# => INSN_isel, - 2#0_01001_01111# => INSN_isel, - 2#0_01010_01111# => INSN_isel, - 2#0_01011_01111# => INSN_isel, - 2#0_01100_01111# => INSN_isel, - 2#0_01101_01111# => INSN_isel, - 2#0_01110_01111# => INSN_isel, - 2#0_01111_01111# => INSN_isel, - 2#0_10000_01111# => INSN_isel, - 2#0_10001_01111# => INSN_isel, - 2#0_10010_01111# => INSN_isel, - 2#0_10011_01111# => INSN_isel, - 2#0_10100_01111# => INSN_isel, - 2#0_10101_01111# => INSN_isel, - 2#0_10110_01111# => INSN_isel, - 2#0_10111_01111# => INSN_isel, - 2#0_11000_01111# => INSN_isel, - 2#0_11001_01111# => INSN_isel, - 2#0_11010_01111# => INSN_isel, - 2#0_11011_01111# => INSN_isel, - 2#0_11100_01111# => INSN_isel, - 2#0_11101_01111# => INSN_isel, - 2#0_11110_01111# => INSN_isel, - 2#0_11111_01111# => INSN_isel, - 2#0_00001_10100# => INSN_lbarx, - 2#0_11010_10101# => INSN_lbzcix, - 2#0_00011_10111# => INSN_lbzux, - 2#0_00010_10111# => INSN_lbzx, - 2#0_00010_10100# => INSN_ldarx, - 2#0_10000_10100# => INSN_ldbrx, - 2#0_11011_10101# => INSN_ldcix, - 2#0_00001_10101# => INSN_ldux, - 2#0_00000_10101# => INSN_ldx, - 2#0_10010_10111# => INSN_lfdx, - 2#0_10011_10111# => INSN_lfdux, - 2#0_11010_10111# => INSN_lfiwax, - 2#0_11011_10111# => INSN_lfiwzx, - 2#0_10000_10111# => INSN_lfsx, - 2#0_10001_10111# => INSN_lfsux, - 2#0_00011_10100# => INSN_lharx, - 2#0_01011_10111# => INSN_lhaux, - 2#0_01010_10111# => INSN_lhax, - 2#0_11000_10110# => INSN_lhbrx, - 2#0_11001_10101# => INSN_lhzcix, - 2#0_01001_10111# => INSN_lhzux, - 2#0_01000_10111# => INSN_lhzx, - 2#0_00000_10100# => INSN_lwarx, - 2#0_01011_10101# => INSN_lwaux, - 2#0_01010_10101# => INSN_lwax, - 2#0_10000_10110# => INSN_lwbrx, - 2#0_11000_10101# => INSN_lwzcix, - 2#0_00001_10111# => INSN_lwzux, - 2#0_00000_10111# => INSN_lwzx, - 2#0_10010_00000# => INSN_mcrxrx, - 2#0_00000_10011# => INSN_mfcr, - 2#0_00010_10011# => INSN_mfmsr, - 2#0_01010_10011# => INSN_mfspr, - 2#0_01000_01001# => INSN_modud, - 2#0_01000_01011# => INSN_moduw, - 2#0_11000_01001# => INSN_modsd, - 2#0_11000_01011# => INSN_modsw, - 2#0_00100_10000# => INSN_mtcrf, - 2#0_00100_10010# => INSN_mtmsr, - 2#0_00101_10010# => INSN_mtmsrd, - 2#0_01110_10011# => INSN_mtspr, - 2#0_00010_01001# => INSN_mulhd, - 2#0_00000_01001# => INSN_mulhdu, - 2#0_00010_01011# => INSN_mulhw, - 2#0_00000_01011# => INSN_mulhwu, - -- next 4 have reserved bit set - 2#0_10010_01001# => INSN_mulhd, - 2#0_10000_01001# => INSN_mulhdu, - 2#0_10010_01011# => INSN_mulhw, - 2#0_10000_01011# => INSN_mulhwu, - 2#0_00111_01001# => INSN_mulld, - 2#0_10111_01001# => INSN_mulld, -- mulldo - 2#0_00111_01011# => INSN_mullw, - 2#0_10111_01011# => INSN_mullw, -- mullwo - 2#0_01110_11100# => INSN_nand, - 2#0_00011_01000# => INSN_neg, - 2#0_10011_01000# => INSN_neg, -- nego - -- next 8 are reserved no-op instructions - 2#0_10000_10010# => INSN_nop, - 2#0_10001_10010# => INSN_nop, - 2#0_10010_10010# => INSN_nop, - 2#0_10011_10010# => INSN_nop, - 2#0_10100_10010# => INSN_nop, - 2#0_10101_10010# => INSN_nop, - 2#0_10110_10010# => INSN_nop, - 2#0_10111_10010# => INSN_nop, - 2#0_00011_11100# => INSN_nor, - 2#0_01101_11100# => INSN_or, - 2#0_01100_11100# => INSN_orc, - 2#0_00011_11010# => INSN_popcntb, - 2#0_01111_11010# => INSN_popcntd, - 2#0_01011_11010# => INSN_popcntw, - 2#0_00101_11010# => INSN_prtyd, - 2#0_00100_11010# => INSN_prtyw, - 2#0_00100_00000# => INSN_setb, - 2#0_01111_10010# => INSN_slbia, - 2#0_00000_11011# => INSN_sld, - 2#0_00000_11000# => INSN_slw, - 2#0_11000_11010# => INSN_srad, - 2#0_11001_11010# => INSN_sradi, - 2#0_11001_11011# => INSN_sradi, - 2#0_11000_11000# => INSN_sraw, - 2#0_11001_11000# => INSN_srawi, - 2#0_10000_11011# => INSN_srd, - 2#0_10000_11000# => INSN_srw, - 2#0_11110_10101# => INSN_stbcix, - 2#0_10101_10110# => INSN_stbcx, - 2#0_00111_10111# => INSN_stbux, - 2#0_00110_10111# => INSN_stbx, - 2#0_10100_10100# => INSN_stdbrx, - 2#0_11111_10101# => INSN_stdcix, - 2#0_00110_10110# => INSN_stdcx, - 2#0_00101_10101# => INSN_stdux, - 2#0_00100_10101# => INSN_stdx, - 2#0_10110_10111# => INSN_stfdx, - 2#0_10111_10111# => INSN_stfdux, - 2#0_11110_10111# => INSN_stfiwx, - 2#0_10100_10111# => INSN_stfsx, - 2#0_10101_10111# => INSN_stfsux, - 2#0_11100_10110# => INSN_sthbrx, - 2#0_11101_10101# => INSN_sthcix, - 2#0_10110_10110# => INSN_sthcx, - 2#0_01101_10111# => INSN_sthux, - 2#0_01100_10111# => INSN_sthx, - 2#0_10100_10110# => INSN_stwbrx, - 2#0_11100_10101# => INSN_stwcix, - 2#0_00100_10110# => INSN_stwcx, - 2#0_00101_10111# => INSN_stwux, - 2#0_00100_10111# => INSN_stwx, - 2#0_00001_01000# => INSN_subf, - 2#0_10001_01000# => INSN_subf, -- subfo - 2#0_00000_01000# => INSN_subfc, - 2#0_10000_01000# => INSN_subfc, -- subfco - 2#0_00100_01000# => INSN_subfe, - 2#0_10100_01000# => INSN_subfe, -- subfeo - 2#0_00111_01000# => INSN_subfme, - 2#0_10111_01000# => INSN_subfme, -- subfmeo - 2#0_00110_01000# => INSN_subfze, - 2#0_10110_01000# => INSN_subfze, -- subfzeo - 2#0_10010_10110# => INSN_sync, - 2#0_00010_00100# => INSN_td, - 2#0_00000_00100# => INSN_tw, - 2#0_01001_10010# => INSN_tlbie, - 2#0_01000_10010# => INSN_tlbiel, - 2#0_10001_10110# => INSN_tlbsync, - 2#0_00000_11110# => INSN_wait, - 2#0_01001_11100# => INSN_xor, - - -- Major opcode 19 - -- Columns with insn(4) = '1' are all illegal and not mapped here; to - -- fit into 2048 entries, the columns are remapped so that 16-24 are - -- stored here as 8-15; in other words the address bits are - -- 1, insn(10..6), 1, insn(5), insn(3..1) - 2#1_10000_11000# => INSN_bcctr, - 2#1_00000_11000# => INSN_bclr, - 2#1_10001_11000# => INSN_bctar, - 2#1_01000_10001# => INSN_crand, - 2#1_00100_10001# => INSN_crandc, - 2#1_01001_10001# => INSN_creqv, - 2#1_00111_10001# => INSN_crnand, - 2#1_00001_10001# => INSN_crnor, - 2#1_01110_10001# => INSN_cror, - 2#1_01101_10001# => INSN_crorc, - 2#1_00110_10001# => INSN_crxor, - 2#1_00100_11110# => INSN_isync, - 2#1_00000_10000# => INSN_mcrf, - 2#1_00000_11010# => INSN_rfid, - - -- Major opcode 59 - -- Only column 14 is valid here; columns 16-31 are handled in the major table - -- Column 14 is mapped to column 6 of the space which is - -- mostly used for opcode 19. - 2#1_11010_10110# => INSN_fcfids, - 2#1_11110_10110# => INSN_fcfidus, - - -- Major opcode 63 - -- Columns 0-15 are mapped here; columns 16-31 are in the major table. - -- Address bits are 1, insn(10:6), 0, insn(4:1) - 2#1_00000_00000# => INSN_fcmpu, - 2#1_00001_00000# => INSN_fcmpo, - 2#1_00010_00000# => INSN_mcrfs, - 2#1_00100_00000# => INSN_ftdiv, - 2#1_00101_00000# => INSN_ftsqrt, - 2#1_00001_00110# => INSN_mtfsb, - 2#1_00010_00110# => INSN_mtfsb, - 2#1_00100_00110# => INSN_mtfsfi, - 2#1_11010_00110# => INSN_fmrgow, - 2#1_11110_00110# => INSN_fmrgew, - 2#1_10010_00111# => INSN_mffs, - 2#1_10110_00111# => INSN_mtfsf, - 2#1_00000_01000# => INSN_fcpsgn, - 2#1_00001_01000# => INSN_fneg, - 2#1_00010_01000# => INSN_fmr, - 2#1_00100_01000# => INSN_fnabs, - 2#1_01000_01000# => INSN_fabs, - 2#1_01100_01000# => INSN_frin, - 2#1_01101_01000# => INSN_friz, - 2#1_01110_01000# => INSN_frip, - 2#1_01111_01000# => INSN_frim, - 2#1_00000_01100# => INSN_frsp, - 2#1_00000_01110# => INSN_fctiw, - 2#1_00100_01110# => INSN_fctiwu, - 2#1_11001_01110# => INSN_fctid, - 2#1_11010_01110# => INSN_fcfid, - 2#1_11101_01110# => INSN_fctidu, - 2#1_11110_01110# => INSN_fcfidu, - 2#1_00000_01111# => INSN_fctiwz, - 2#1_00100_01111# => INSN_fctiwuz, - 2#1_11001_01111# => INSN_fctidz, - 2#1_11101_01111# => INSN_fctiduz, - - others => INSN_illegal - ); + signal fetch_failed : std_ulogic; -- If we have an FPU, then it is used for integer divisions, -- otherwise a dedicated divider in the ALU is used. @@ -871,16 +428,18 @@ architecture behaviour of decode1 is end; begin - decode0_0: process(clk) + decode1_0: process(clk) begin if rising_edge(clk) then if rst = '1' then - dc0 <= dc0_t_init; + r <= Decode1ToDecode2Init; + fetch_failed <= '0'; elsif flush_in = '1' then - dc0.f_in.valid <= '0'; - dc0.f_in.fetch_failed <= '0'; + r.valid <= '0'; + fetch_failed <= '0'; elsif stall_in = '0' then - dc0 <= dc0in; + r <= rin; + fetch_failed <= f_in.fetch_failed; end if; if rst = '1' then br.br_nia <= (others => '0'); @@ -892,139 +451,73 @@ begin end if; end process; - decode0_roms: process(clk) + busy_out <= stall_in; + + decode1_rom: process(clk) begin if rising_edge(clk) then if stall_in = '0' then - if is_X(maj_rom_addr) then - major_predecode <= INSN_illegal; - else - major_predecode <= major_predecode_rom(to_integer(unsigned(maj_rom_addr))); - end if; - if is_X(row_rom_addr) then - row_predecode <= INSN_illegal; - else - row_predecode <= row_predecode_rom(to_integer(unsigned(row_rom_addr))); - end if; + decode <= decode_rom(decode_rom_addr); end if; end if; end process; - decode0_1: process(all) - variable v : dc0_t; - variable majorop : std_ulogic_vector(5 downto 0); - variable majaddr : std_ulogic_vector(10 downto 0); - variable rowaddr : std_ulogic_vector(10 downto 0); - variable sprn : spr_num_t; + decode1_1: process(all) + variable v : Decode1ToDecode2Type; + variable vr : Decode1ToRegisterFileType; variable br_target : std_ulogic_vector(61 downto 0); variable br_offset : signed(23 downto 0); variable bv : br_predictor_t; + variable icode : insn_code; + variable sprn : spr_num_t; + variable maybe_rb : std_ulogic; begin - v := dc0_t_init; - v.f_in := f_in; - - br_offset := (others => '0'); - - majorop := f_in.insn(31 downto 26); - majaddr := majorop & f_in.insn(4 downto 0); - - -- row_predecode_rom is used for op 19, 31, 59, 63 - -- addr bit 10 is 0 for op 31, 1 for 19, 59, 63 - rowaddr(10) := f_in.insn(31) or not f_in.insn(29); - rowaddr(9 downto 5) := f_in.insn(10 downto 6); - if f_in.insn(28) = '0' then - -- op 19 and op 59 - rowaddr(4 downto 3) := '1' & f_in.insn(5); - else - -- op 31 and 63; for 63 we only use this when f_in.insn(5) = '0' - rowaddr(4 downto 3) := f_in.insn(5 downto 4); - end if; - rowaddr(2 downto 0) := f_in.insn(3 downto 1); + v := Decode1ToDecode2Init; - maj_rom_addr <= majaddr; - row_rom_addr <= rowaddr; + v.valid := f_in.valid; + v.nia := f_in.nia; + v.insn := f_in.insn; + v.stop_mark := f_in.stop_mark; + v.big_endian := f_in.big_endian; if is_X(f_in.insn) then v.spr_info := (sel => "XXX", others => 'X'); v.ram_spr := (index => (others => 'X'), others => 'X'); else - sprn := decode_spr_num(f_in.insn); - v.spr_info := map_spr(sprn); - v.ram_spr := decode_ram_spr(sprn); - end if; - - case unsigned(majorop) is - when "000100" => -- 4 - -- major opcode 4, mostly VMX/VSX stuff but also some integer ops (madd*) - v.override := not f_in.insn(5); - - when "011111" => -- 31 - -- major opcode 31, lots of things - -- Use the first half of the row table for all columns - v.use_row := '1'; - - when "010000" => -- 16 - -- Predict backward branches as taken, forward as untaken - v.br_pred := f_in.insn(15); - br_offset := resize(signed(f_in.insn(15 downto 2)), 24); - - when "010010" => -- 18 - -- Unconditional branches are always taken - v.br_pred := '1'; - br_offset := signed(f_in.insn(25 downto 2)); - - when "010011" => -- 19 - -- Columns 8-15 and 24-31 don't have any valid instructions - -- (where insn(5..1) is the column number). - -- addpcis (column 2) is in the major table - -- Other valid columns are mapped to columns in the second - -- half of the row table: columns 0-1 are mapped to 16-17 - -- and 16-23 are mapped to 24-31. - v.override := f_in.insn(4); - v.use_row := f_in.insn(5) or (not f_in.insn(3) and not f_in.insn(2)); - - when "011000" => -- 24 - -- ori, special-case the standard NOP - if std_match(f_in.insn, "01100000000000000000000000000000") then - v.override := '1'; - v.ov_insn := INSN_nop; - end if; - - when "111011" => -- 59 - if HAS_FPU then - -- floating point operations, mostly single-precision - -- Columns 0-11 are illegal; columns 12-15 are mapped - -- to columns 20-23 in the second half of the row table, - -- and columns 16-31 are in the major table. - v.override := not f_in.insn(5) and (not f_in.insn(4) or not f_in.insn(3)); - v.use_row := not f_in.insn(5); - else - v.override := '1'; - end if; - - when "111111" => -- 63 - if HAS_FPU then - -- floating point operations, general and double-precision - -- Use columns 0-15 of the second half of the row table - -- for columns 0-15, and the major table for columns 16-31. - v.use_row := not f_in.insn(5); - else - v.override := '1'; - end if; + sprn := decode_spr_num(f_in.insn); + v.spr_info := map_spr(sprn); + v.ram_spr := decode_ram_spr(sprn); + end if; - when others => - end case; + icode := f_in.icode; if f_in.fetch_failed = '1' then - v.override := '1'; - v.ov_insn := INSN_fetch_fail; + icode := INSN_fetch_fail; -- Only send down a single OP_FETCH_FAILED - v.f_in.valid := not dc0.f_in.fetch_failed; + v.valid := not fetch_failed; + end if; + decode_rom_addr <= icode; + + if f_in.valid = '1' then + report "Decode " & insn_code'image(icode) & " " & to_hstring(f_in.insn) & + " at " & to_hstring(f_in.nia); end if; -- Branch predictor - -- Note bclr, bcctr and bctar are predicted not taken as we have no + -- Note bclr, bcctr and bctar not predicted as we have no -- count cache or link stack. + br_offset := (others => '0'); + case icode is + when INSN_b => + -- Unconditional branches are always taken + v.br_pred := '1'; + br_offset := signed(f_in.insn(25 downto 2)); + when INSN_bc => + -- Predict backward branches as taken, forward as untaken + v.br_pred := f_in.insn(15); + br_offset := resize(signed(f_in.insn(15 downto 2)), 24); + when others => + end case; bv.br_nia := f_in.nia(63 downto 2); if f_in.insn(1) = '1' then bv.br_nia := (others => '0'); @@ -1039,80 +532,16 @@ begin -- after a clock edge... br_target := std_ulogic_vector(signed(br.br_nia) + br.br_offset); - dc0in <= v; - br_in <= bv; - - f_out.redirect <= br.predict; - f_out.redirect_nia <= br_target & "00"; - flush_out <= bv.predict or br.predict; - end process; - - decode1_0: process(clk) - begin - if rising_edge(clk) then - if rst = '1' then - r <= Decode1ToDecode2Init; - elsif flush_in = '1' then - r.valid <= '0'; - elsif stall_in = '0' then - r <= rin; - end if; - end if; - end process; - - busy_out <= stall_in; - - decode1_rom: process(clk) - begin - if rising_edge(clk) then - if stall_in = '0' then - decode <= decode_rom(decode_rom_addr); - end if; - end if; - end process; - - decode1_1: process(all) - variable v : Decode1ToDecode2Type; - variable vr : Decode1ToRegisterFileType; - variable icode : insn_code; - variable sprn : spr_num_t; - variable maybe_rb : std_ulogic; - begin - v := Decode1ToDecode2Init; - - v.valid := dc0.f_in.valid; - v.nia := dc0.f_in.nia; - v.insn := dc0.f_in.insn; - v.stop_mark := dc0.f_in.stop_mark; - v.big_endian := dc0.f_in.big_endian; - v.br_pred := dc0.br_pred; - v.spr_info := dc0.spr_info; - v.ram_spr := dc0.ram_spr; - - if dc0.override = '1' then - icode := dc0.ov_insn; - elsif dc0.use_row = '0' then - icode := major_predecode; - else - icode := row_predecode; - end if; - decode_rom_addr <= icode; - - if dc0.f_in.valid = '1' then - report "Decode insn " & to_hstring(dc0.f_in.insn) & " at " & to_hstring(dc0.f_in.nia) & - " code " & insn_code'image(icode); - end if; - -- Work out GPR/FPR read addresses maybe_rb := '0'; - vr.reg_1_addr := '0' & insn_ra(dc0.f_in.insn); - vr.reg_2_addr := '0' & insn_rb(dc0.f_in.insn); - vr.reg_3_addr := '0' & insn_rs(dc0.f_in.insn); + vr.reg_1_addr := '0' & insn_ra(f_in.insn); + vr.reg_2_addr := '0' & insn_rb(f_in.insn); + vr.reg_3_addr := '0' & insn_rs(f_in.insn); if icode >= INSN_first_rb then maybe_rb := '1'; if icode < INSN_first_frs then if icode >= INSN_first_rc then - vr.reg_3_addr := '0' & insn_rcreg(dc0.f_in.insn); + vr.reg_3_addr := '0' & insn_rcreg(f_in.insn); end if; else -- access FRS operand @@ -1124,13 +553,13 @@ begin end if; if icode >= INSN_first_frabc then -- access FRC operand - vr.reg_3_addr := '1' & insn_rcreg(dc0.f_in.insn); + vr.reg_3_addr := '1' & insn_rcreg(f_in.insn); end if; end if; end if; - vr.read_1_enable := dc0.f_in.valid and not dc0.f_in.fetch_failed; - vr.read_2_enable := dc0.f_in.valid and not dc0.f_in.fetch_failed and maybe_rb; - vr.read_3_enable := dc0.f_in.valid and not dc0.f_in.fetch_failed; + vr.read_1_enable := f_in.valid; + vr.read_2_enable := f_in.valid and maybe_rb; + vr.read_3_enable := f_in.valid; v.reg_a := vr.reg_1_addr; v.reg_b := vr.reg_2_addr; @@ -1138,11 +567,15 @@ begin -- Update registers rin <= v; + br_in <= bv; -- Update outputs d_out <= r; d_out.decode <= decode; r_out <= vr; + f_out.redirect <= br.predict; + f_out.redirect_nia <= br_target & "00"; + flush_out <= bv.predict or br.predict; end process; d1_log: if LOG_LENGTH > 0 generate diff --git a/icache.vhdl b/icache.vhdl index 9eb08c1..63de229 100644 --- a/icache.vhdl +++ b/icache.vhdl @@ -23,6 +23,7 @@ use ieee.numeric_std.all; library work; use work.utils.all; use work.common.all; +use work.decode_types.all; use work.wishbone_types.all; -- 64 bit direct mapped icache. All instructions are 4B aligned. @@ -30,6 +31,7 @@ use work.wishbone_types.all; entity icache is generic ( SIM : boolean := false; + HAS_FPU : boolean := true; -- Line size in bytes LINE_SIZE : positive := 64; -- BRAM organisation: We never access more than wishbone_data_bits at @@ -122,8 +124,20 @@ architecture rtl of icache is subtype way_t is integer range 0 to NUM_WAYS-1; subtype row_in_line_t is unsigned(ROW_LINEBITS-1 downto 0); + -- We store a pre-decoded 10-bit insn_code along with the bottom 26 bits of + -- each instruction, giving a total of 36 bits per instruction, which + -- fits neatly into the block RAMs available on FPGAs. + -- For illegal instructions, the top 4 bits are ones and the bottom 6 bits + -- are the instruction's primary opcode, so we have the whole instruction + -- word available (e.g. to put in HEIR). For other instructions, the + -- primary opcode is not stored but could be determined from the insn_code. + constant PREDECODE_BITS : natural := 10; + constant INSN_IMAGE_BITS : natural := 26; + constant ICWORDLEN : natural := PREDECODE_BITS + INSN_IMAGE_BITS; + constant ROW_WIDTH : natural := INSN_PER_ROW * ICWORDLEN; + -- The cache data BRAM organized as described above for each way - subtype cache_row_t is std_ulogic_vector(ROW_SIZE_BITS-1 downto 0); + subtype cache_row_t is std_ulogic_vector(ROW_WIDTH-1 downto 0); -- The cache tags LUTRAM has a row per set. Vivado is a pain and will -- not handle a clean (commented) definition of the cache tags as a 3d @@ -184,6 +198,8 @@ architecture rtl of icache is wb : wishbone_master_out; store_way : way_t; store_index : index_t; + recv_row : row_t; + recv_valid : std_ulogic; store_row : row_t; store_tag : cache_tag_t; store_valid : std_ulogic; @@ -214,7 +230,9 @@ architecture rtl of icache is -- Cache RAM interface type cache_ram_out_t is array(way_t) of cache_row_t; - signal cache_out : cache_ram_out_t; + signal cache_out : cache_ram_out_t; + signal cache_wr_data : std_ulogic_vector(ROW_WIDTH - 1 downto 0); + signal wb_rd_data : std_ulogic_vector(ROW_SIZE_BITS - 1 downto 0); -- PLRU output interface type plru_out_t is array(index_t) of std_ulogic_vector(WAY_BITS-1 downto 0); @@ -293,7 +311,7 @@ architecture rtl of icache is variable word: integer range 0 to INSN_PER_ROW-1; begin word := to_integer(unsigned(addr(INSN_BITS+2-1 downto 2))); - return data(31+word*32 downto word*32); + return data(word * ICWORDLEN + ICWORDLEN - 1 downto word * ICWORDLEN); end; -- Get the tag value from the address @@ -327,6 +345,34 @@ architecture rtl of icache is begin + -- byte-swap read data if big endian + process(all) + variable j: integer; + begin + if r.store_tag(TAG_BITS - 1) = '0' then + wb_rd_data <= wishbone_in.dat; + else + for ii in 0 to (wishbone_in.dat'length / 8) - 1 loop + j := ((ii / 4) * 4) + (3 - (ii mod 4)); + wb_rd_data(ii * 8 + 7 downto ii * 8) <= wishbone_in.dat(j * 8 + 7 downto j * 8); + end loop; + end if; + end process; + + predecoder_0: entity work.predecoder + generic map ( + HAS_FPU => HAS_FPU, + WIDTH => INSN_PER_ROW, + ICODE_LEN => PREDECODE_BITS, + IMAGE_LEN => INSN_IMAGE_BITS + ) + port map ( + clk => clk, + valid_in => wishbone_in.ack, + insns_in => wb_rd_data, + icodes_out => cache_wr_data + ); + assert LINE_SIZE mod ROW_SIZE = 0; assert ispow2(LINE_SIZE) report "LINE_SIZE not power of 2" severity FAILURE; assert ispow2(NUM_LINES) report "NUM_LINES not power of 2" severity FAILURE; @@ -367,13 +413,13 @@ begin signal rd_addr : std_ulogic_vector(ROW_BITS-1 downto 0); signal wr_addr : std_ulogic_vector(ROW_BITS-1 downto 0); signal dout : cache_row_t; - signal wr_sel : std_ulogic_vector(ROW_SIZE-1 downto 0); - signal wr_dat : std_ulogic_vector(wishbone_in.dat'left downto 0); + signal wr_sel : std_ulogic_vector(0 downto 0); begin way: entity work.cache_ram generic map ( ROW_BITS => ROW_BITS, - WIDTH => ROW_SIZE_BITS + WIDTH => ROW_WIDTH, + BYTEWID => ROW_WIDTH ) port map ( clk => clk, @@ -382,31 +428,19 @@ begin rd_data => dout, wr_sel => wr_sel, wr_addr => wr_addr, - wr_data => wr_dat + wr_data => cache_wr_data ); process(all) - variable j: integer; begin - -- byte-swap read data if big endian - if r.store_tag(TAG_BITS - 1) = '0' then - wr_dat <= wishbone_in.dat; - else - for ii in 0 to (wishbone_in.dat'length / 8) - 1 loop - j := ((ii / 4) * 4) + (3 - (ii mod 4)); - wr_dat(ii * 8 + 7 downto ii * 8) <= wishbone_in.dat(j * 8 + 7 downto j * 8); - end loop; - end if; do_read <= not stall_in; do_write <= '0'; - if wishbone_in.ack = '1' and replace_way = i then + if r.recv_valid = '1' and r.store_way = i then do_write <= '1'; end if; cache_out(i) <= dout; rd_addr <= std_ulogic_vector(to_unsigned(req_row, ROW_BITS)); wr_addr <= std_ulogic_vector(to_unsigned(r.store_row, ROW_BITS)); - for ii in 0 to ROW_SIZE-1 loop - wr_sel(ii) <= do_write; - end loop; + wr_sel(0) <= do_write; end process; end generate; @@ -515,6 +549,8 @@ begin icache_comb : process(all) variable is_hit : std_ulogic; variable hit_way : way_t; + variable insn : std_ulogic_vector(ICWORDLEN - 1 downto 0); + variable icode : insn_code; begin -- Extract line, row and tag from request if not is_X(i_in.nia) then @@ -575,11 +611,18 @@ begin -- I prefer not to do just yet as it would force fetch2 to know about -- some of the cache geometry information. -- + insn := (others => '0'); + icode := INSN_illegal; if r.hit_valid = '1' then - i_out.insn <= read_insn_word(r.hit_nia, cache_out(r.hit_way)); - else - i_out.insn <= (others => '0'); + insn := read_insn_word(r.hit_nia, cache_out(r.hit_way)); + -- Currently we use only the top bit for indicating illegal + -- instructions because we know that insn_codes fit into 9 bits. + if insn(ICWORDLEN - 1) = '0' then + icode := insn_code'val(to_integer(unsigned(insn(ICWORDLEN-1 downto INSN_IMAGE_BITS)))); + end if; end if; + i_out.insn <= insn(31 downto 0); + i_out.icode <= icode; i_out.valid <= r.hit_valid; i_out.nia <= r.hit_nia; i_out.stop_mark <= r.hit_smark; @@ -640,9 +683,11 @@ begin variable snoop_addr : real_addr_t; variable snoop_tag : cache_tag_t; variable snoop_cache_tags : cache_tags_set_t; + variable replace_way : way_t; begin if rising_edge(clk) then ev.icache_miss <= '0'; + r.recv_valid <= '0'; -- On reset, clear all valid bits to force misses if rst = '1' then for i in index_t loop @@ -714,13 +759,13 @@ begin " IR:" & std_ulogic'image(i_in.virt_mode) & " SM:" & std_ulogic'image(i_in.stop_mark) & " idx:" & integer'image(req_index) & - " way:" & integer'image(replace_way) & " tag:" & to_hstring(req_tag) & " RA:" & to_hstring(real_addr); ev.icache_miss <= '1'; -- Keep track of our index and way for subsequent stores r.store_index <= req_index; + r.recv_row <= get_row(req_raddr); r.store_row <= get_row(req_raddr); r.store_tag <= req_tag; r.store_valid <= '1'; @@ -740,6 +785,7 @@ begin when CLR_TAG | WAIT_ACK => if r.state = CLR_TAG then -- Get victim way from plru + replace_way := to_integer(unsigned(plru_victim(r.store_index))); r.store_way <= replace_way; -- Force misses on that way while reloading that line @@ -757,6 +803,19 @@ begin r.state <= WAIT_ACK; end if; + -- If we are writing in this cycle, mark row valid and see if we are done + if r.recv_valid = '1' then + r.rows_valid(r.store_row mod ROW_PER_LINE) <= not inval_in; + if is_last_row(r.store_row, r.end_row_ix) then + -- Cache line is now valid + cache_valids(r.store_index)(r.store_way) <= r.store_valid and not inval_in; + -- We are done + r.state <= IDLE; + end if; + -- Increment store row counter + r.store_row <= r.recv_row; + end if; + -- If we are still sending requests, was one accepted ? if wishbone_in.stall = '0' and r.wb.stb = '1' then -- That was the last word ? We are done sending. Clear stb. @@ -777,33 +836,27 @@ begin -- Incoming acks processing if wishbone_in.ack = '1' then - r.rows_valid(r.store_row mod ROW_PER_LINE) <= not inval_in; -- Check for completion - if is_last_row(r.store_row, r.end_row_ix) then + if is_last_row(r.recv_row, r.end_row_ix) then -- Complete wishbone cycle r.wb.cyc <= '0'; - - -- Cache line is now valid - cache_valids(r.store_index)(replace_way) <= r.store_valid and not inval_in; - - -- We are done - r.state <= IDLE; end if; + r.recv_valid <= '1'; - -- Increment store row counter - r.store_row <= next_row(r.store_row); + -- Increment receive row counter + r.recv_row <= next_row(r.recv_row); end if; when STOP_RELOAD => -- Wait for all outstanding requests to be satisfied, then -- go to IDLE state. - if get_row_of_line(r.store_row) = get_row_of_line(get_row(wb_to_addr(r.wb.adr))) then + if get_row_of_line(r.recv_row) = get_row_of_line(get_row(wb_to_addr(r.wb.adr))) then r.wb.cyc <= '0'; r.state <= IDLE; end if; if wishbone_in.ack = '1' then -- Increment store row counter - r.store_row <= next_row(r.store_row); + r.recv_row <= next_row(r.recv_row); end if; end case; end if; diff --git a/microwatt.core b/microwatt.core index 46e114e..4c8695e 100644 --- a/microwatt.core +++ b/microwatt.core @@ -9,6 +9,7 @@ filesets: - wishbone_types.vhdl - common.vhdl - fetch1.vhdl + - predecode.vhdl - decode1.vhdl - helpers.vhdl - decode2.vhdl diff --git a/predecode.vhdl b/predecode.vhdl new file mode 100644 index 0000000..1e1d85b --- /dev/null +++ b/predecode.vhdl @@ -0,0 +1,582 @@ +-- Instruction pre-decoder for microwatt +-- One cycle latency. Does 'WIDTH' instructions in parallel. + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +library work; +use work.common.all; +use work.decode_types.all; +use work.insn_helpers.all; + +entity predecoder is + generic ( + HAS_FPU : boolean := true; + WIDTH : natural := 2; + ICODE_LEN : natural := 10; + IMAGE_LEN : natural := 26 + ); + port ( + clk : in std_ulogic; + valid_in : in std_ulogic; + insns_in : in std_ulogic_vector(WIDTH * 32 - 1 downto 0); + icodes_out : out std_ulogic_vector(WIDTH * (ICODE_LEN + IMAGE_LEN) - 1 downto 0) + ); +end entity predecoder; + +architecture behaviour of predecoder is + + type predecoder_rom_t is array(0 to 2047) of insn_code; + + constant major_predecode_rom : predecoder_rom_t := ( + 2#001100_00000# to 2#001100_11111# => INSN_addic, + 2#001101_00000# to 2#001101_11111# => INSN_addic_dot, + 2#001110_00000# to 2#001110_11111# => INSN_addi, + 2#001111_00000# to 2#001111_11111# => INSN_addis, + 2#010011_00100# to 2#010011_00101# => INSN_addpcis, + 2#011100_00000# to 2#011100_11111# => INSN_andi_dot, + 2#011101_00000# to 2#011101_11111# => INSN_andis_dot, + 2#000000_00000# => INSN_attn, + 2#010010_00000# to 2#010010_11111# => INSN_b, + 2#010000_00000# to 2#010000_11111# => INSN_bc, + 2#001011_00000# to 2#001011_11111# => INSN_cmpi, + 2#001010_00000# to 2#001010_11111# => INSN_cmpli, + 2#100010_00000# to 2#100010_11111# => INSN_lbz, + 2#100011_00000# to 2#100011_11111# => INSN_lbzu, + 2#110010_00000# to 2#110010_11111# => INSN_lfd, + 2#110011_00000# to 2#110011_11111# => INSN_lfdu, + 2#110000_00000# to 2#110000_11111# => INSN_lfs, + 2#110001_00000# to 2#110001_11111# => INSN_lfsu, + 2#101010_00000# to 2#101010_11111# => INSN_lha, + 2#101011_00000# to 2#101011_11111# => INSN_lhau, + 2#101000_00000# to 2#101000_11111# => INSN_lhz, + 2#101001_00000# to 2#101001_11111# => INSN_lhzu, + 2#100000_00000# to 2#100000_11111# => INSN_lwz, + 2#100001_00000# to 2#100001_11111# => INSN_lwzu, + 2#000111_00000# to 2#000111_11111# => INSN_mulli, + 2#011000_00000# to 2#011000_11111# => INSN_ori, + 2#011001_00000# to 2#011001_11111# => INSN_oris, + 2#010100_00000# to 2#010100_11111# => INSN_rlwimi, + 2#010101_00000# to 2#010101_11111# => INSN_rlwinm, + 2#010111_00000# to 2#010111_11111# => INSN_rlwnm, + 2#010001_00000# to 2#010001_11111# => INSN_sc, + 2#100110_00000# to 2#100110_11111# => INSN_stb, + 2#100111_00000# to 2#100111_11111# => INSN_stbu, + 2#110110_00000# to 2#110110_11111# => INSN_stfd, + 2#110111_00000# to 2#110111_11111# => INSN_stfdu, + 2#110100_00000# to 2#110100_11111# => INSN_stfs, + 2#110101_00000# to 2#110101_11111# => INSN_stfsu, + 2#101100_00000# to 2#101100_11111# => INSN_sth, + 2#101101_00000# to 2#101101_11111# => INSN_sthu, + 2#100100_00000# to 2#100100_11111# => INSN_stw, + 2#100101_00000# to 2#100101_11111# => INSN_stwu, + 2#001000_00000# to 2#001000_11111# => INSN_subfic, + 2#000010_00000# to 2#000010_11111# => INSN_tdi, + 2#000011_00000# to 2#000011_11111# => INSN_twi, + 2#011010_00000# to 2#011010_11111# => INSN_xori, + 2#011011_00000# to 2#011011_11111# => INSN_xoris, + -- major opcode 4 + 2#000100_10000# => INSN_maddhd, + 2#000100_10001# => INSN_maddhdu, + 2#000100_10011# => INSN_maddld, + -- major opcode 30 + 2#011110_01000# to 2#011110_01001# => INSN_rldic, + 2#011110_01010# to 2#011110_01011# => INSN_rldic, + 2#011110_00000# to 2#011110_00001# => INSN_rldicl, + 2#011110_00010# to 2#011110_00011# => INSN_rldicl, + 2#011110_00100# to 2#011110_00101# => INSN_rldicr, + 2#011110_00110# to 2#011110_00111# => INSN_rldicr, + 2#011110_01100# to 2#011110_01101# => INSN_rldimi, + 2#011110_01110# to 2#011110_01111# => INSN_rldimi, + 2#011110_10000# to 2#011110_10001# => INSN_rldcl, + 2#011110_10010# to 2#011110_10011# => INSN_rldcr, + -- major opcode 58 + 2#111010_00000# => INSN_ld, + 2#111010_00001# => INSN_ldu, + 2#111010_00010# => INSN_lwa, + 2#111010_00100# => INSN_ld, + 2#111010_00101# => INSN_ldu, + 2#111010_00110# => INSN_lwa, + 2#111010_01000# => INSN_ld, + 2#111010_01001# => INSN_ldu, + 2#111010_01010# => INSN_lwa, + 2#111010_01100# => INSN_ld, + 2#111010_01101# => INSN_ldu, + 2#111010_01110# => INSN_lwa, + 2#111010_10000# => INSN_ld, + 2#111010_10001# => INSN_ldu, + 2#111010_10010# => INSN_lwa, + 2#111010_10100# => INSN_ld, + 2#111010_10101# => INSN_ldu, + 2#111010_10110# => INSN_lwa, + 2#111010_11000# => INSN_ld, + 2#111010_11001# => INSN_ldu, + 2#111010_11010# => INSN_lwa, + 2#111010_11100# => INSN_ld, + 2#111010_11101# => INSN_ldu, + 2#111010_11110# => INSN_lwa, + -- major opcode 59 + 2#111011_00100# to 2#111011_00101# => INSN_fdivs, + 2#111011_01000# to 2#111011_01001# => INSN_fsubs, + 2#111011_01010# to 2#111011_01011# => INSN_fadds, + 2#111011_01100# to 2#111011_01101# => INSN_fsqrts, + 2#111011_10000# to 2#111011_10001# => INSN_fres, + 2#111011_10010# to 2#111011_10011# => INSN_fmuls, + 2#111011_10100# to 2#111011_10101# => INSN_frsqrtes, + 2#111011_11000# to 2#111011_11001# => INSN_fmsubs, + 2#111011_11010# to 2#111011_11011# => INSN_fmadds, + 2#111011_11100# to 2#111011_11101# => INSN_fnmsubs, + 2#111011_11110# to 2#111011_11111# => INSN_fnmadds, + -- major opcode 62 + 2#111110_00000# => INSN_std, + 2#111110_00001# => INSN_stdu, + 2#111110_00100# => INSN_std, + 2#111110_00101# => INSN_stdu, + 2#111110_01000# => INSN_std, + 2#111110_01001# => INSN_stdu, + 2#111110_01100# => INSN_std, + 2#111110_01101# => INSN_stdu, + 2#111110_10000# => INSN_std, + 2#111110_10001# => INSN_stdu, + 2#111110_10100# => INSN_std, + 2#111110_10101# => INSN_stdu, + 2#111110_11000# => INSN_std, + 2#111110_11001# => INSN_stdu, + 2#111110_11100# => INSN_std, + 2#111110_11101# => INSN_stdu, + -- major opcode 63 + 2#111111_00100# to 2#111111_00101# => INSN_fdiv, + 2#111111_01000# to 2#111111_01001# => INSN_fsub, + 2#111111_01010# to 2#111111_01011# => INSN_fadd, + 2#111111_01100# to 2#111111_01101# => INSN_fsqrt, + 2#111111_01110# to 2#111111_01111# => INSN_fsel, + 2#111111_10000# to 2#111111_10001# => INSN_fre, + 2#111111_10010# to 2#111111_10011# => INSN_fmul, + 2#111111_10100# to 2#111111_10101# => INSN_frsqrte, + 2#111111_11000# to 2#111111_11001# => INSN_fmsub, + 2#111111_11010# to 2#111111_11011# => INSN_fmadd, + 2#111111_11100# to 2#111111_11101# => INSN_fnmsub, + 2#111111_11110# to 2#111111_11111# => INSN_fnmadd, + others => INSN_illegal + ); + + constant row_predecode_rom : predecoder_rom_t := ( + -- Major opcode 31 + -- Address bits are 0, insn(10:1) + 2#0_01000_01010# => INSN_add, + 2#0_11000_01010# => INSN_add, -- addo + 2#0_00000_01010# => INSN_addc, + 2#0_10000_01010# => INSN_addc, -- addco + 2#0_00100_01010# => INSN_adde, + 2#0_10100_01010# => INSN_adde, -- addeo + 2#0_00101_01010# => INSN_addex, + 2#0_00010_01010# => INSN_addg6s, + 2#0_00111_01010# => INSN_addme, + 2#0_10111_01010# => INSN_addme, -- addmeo + 2#0_00110_01010# => INSN_addze, + 2#0_10110_01010# => INSN_addze, -- addzeo + 2#0_00000_11100# => INSN_and, + 2#0_00001_11100# => INSN_andc, + 2#0_00111_11100# => INSN_bperm, + 2#0_01001_11010# => INSN_cbcdtd, + 2#0_01000_11010# => INSN_cdtbcd, + 2#0_00000_00000# => INSN_cmp, + 2#0_01111_11100# => INSN_cmpb, + 2#0_00111_00000# => INSN_cmpeqb, + 2#0_00001_00000# => INSN_cmpl, + 2#0_00110_00000# => INSN_cmprb, + 2#0_00001_11010# => INSN_cntlzd, + 2#0_00000_11010# => INSN_cntlzw, + 2#0_10001_11010# => INSN_cnttzd, + 2#0_10000_11010# => INSN_cnttzw, + 2#0_10111_10011# => INSN_darn, + 2#0_00010_10110# => INSN_dcbf, + 2#0_00001_10110# => INSN_dcbst, + 2#0_01000_10110# => INSN_dcbt, + 2#0_00111_10110# => INSN_dcbtst, + 2#0_11111_10110# => INSN_dcbz, + 2#0_01100_01001# => INSN_divdeu, + 2#0_11100_01001# => INSN_divdeu, -- divdeuo + 2#0_01100_01011# => INSN_divweu, + 2#0_11100_01011# => INSN_divweu, -- divweuo + 2#0_01101_01001# => INSN_divde, + 2#0_11101_01001# => INSN_divde, -- divdeo + 2#0_01101_01011# => INSN_divwe, + 2#0_11101_01011# => INSN_divwe, -- divweo + 2#0_01110_01001# => INSN_divdu, + 2#0_11110_01001# => INSN_divdu, -- divduo + 2#0_01110_01011# => INSN_divwu, + 2#0_11110_01011# => INSN_divwu, -- divwuo + 2#0_01111_01001# => INSN_divd, + 2#0_11111_01001# => INSN_divd, -- divdo + 2#0_01111_01011# => INSN_divw, + 2#0_11111_01011# => INSN_divw, -- divwo + 2#0_11001_10110# => INSN_nop, -- dss + 2#0_01010_10110# => INSN_nop, -- dst + 2#0_01011_10110# => INSN_nop, -- dstst + 2#0_11010_10110# => INSN_eieio, + 2#0_01000_11100# => INSN_eqv, + 2#0_11101_11010# => INSN_extsb, + 2#0_11100_11010# => INSN_extsh, + 2#0_11110_11010# => INSN_extsw, + 2#0_11011_11010# => INSN_extswsli, + 2#0_11011_11011# => INSN_extswsli, + 2#0_11110_10110# => INSN_icbi, + 2#0_00000_10110# => INSN_icbt, + 2#0_00000_01111# => INSN_isel, + 2#0_00001_01111# => INSN_isel, + 2#0_00010_01111# => INSN_isel, + 2#0_00011_01111# => INSN_isel, + 2#0_00100_01111# => INSN_isel, + 2#0_00101_01111# => INSN_isel, + 2#0_00110_01111# => INSN_isel, + 2#0_00111_01111# => INSN_isel, + 2#0_01000_01111# => INSN_isel, + 2#0_01001_01111# => INSN_isel, + 2#0_01010_01111# => INSN_isel, + 2#0_01011_01111# => INSN_isel, + 2#0_01100_01111# => INSN_isel, + 2#0_01101_01111# => INSN_isel, + 2#0_01110_01111# => INSN_isel, + 2#0_01111_01111# => INSN_isel, + 2#0_10000_01111# => INSN_isel, + 2#0_10001_01111# => INSN_isel, + 2#0_10010_01111# => INSN_isel, + 2#0_10011_01111# => INSN_isel, + 2#0_10100_01111# => INSN_isel, + 2#0_10101_01111# => INSN_isel, + 2#0_10110_01111# => INSN_isel, + 2#0_10111_01111# => INSN_isel, + 2#0_11000_01111# => INSN_isel, + 2#0_11001_01111# => INSN_isel, + 2#0_11010_01111# => INSN_isel, + 2#0_11011_01111# => INSN_isel, + 2#0_11100_01111# => INSN_isel, + 2#0_11101_01111# => INSN_isel, + 2#0_11110_01111# => INSN_isel, + 2#0_11111_01111# => INSN_isel, + 2#0_00001_10100# => INSN_lbarx, + 2#0_11010_10101# => INSN_lbzcix, + 2#0_00011_10111# => INSN_lbzux, + 2#0_00010_10111# => INSN_lbzx, + 2#0_00010_10100# => INSN_ldarx, + 2#0_10000_10100# => INSN_ldbrx, + 2#0_11011_10101# => INSN_ldcix, + 2#0_00001_10101# => INSN_ldux, + 2#0_00000_10101# => INSN_ldx, + 2#0_10010_10111# => INSN_lfdx, + 2#0_10011_10111# => INSN_lfdux, + 2#0_11010_10111# => INSN_lfiwax, + 2#0_11011_10111# => INSN_lfiwzx, + 2#0_10000_10111# => INSN_lfsx, + 2#0_10001_10111# => INSN_lfsux, + 2#0_00011_10100# => INSN_lharx, + 2#0_01011_10111# => INSN_lhaux, + 2#0_01010_10111# => INSN_lhax, + 2#0_11000_10110# => INSN_lhbrx, + 2#0_11001_10101# => INSN_lhzcix, + 2#0_01001_10111# => INSN_lhzux, + 2#0_01000_10111# => INSN_lhzx, + 2#0_00000_10100# => INSN_lwarx, + 2#0_01011_10101# => INSN_lwaux, + 2#0_01010_10101# => INSN_lwax, + 2#0_10000_10110# => INSN_lwbrx, + 2#0_11000_10101# => INSN_lwzcix, + 2#0_00001_10111# => INSN_lwzux, + 2#0_00000_10111# => INSN_lwzx, + 2#0_10010_00000# => INSN_mcrxrx, + 2#0_00000_10011# => INSN_mfcr, + 2#0_00010_10011# => INSN_mfmsr, + 2#0_01010_10011# => INSN_mfspr, + 2#0_01000_01001# => INSN_modud, + 2#0_01000_01011# => INSN_moduw, + 2#0_11000_01001# => INSN_modsd, + 2#0_11000_01011# => INSN_modsw, + 2#0_00100_10000# => INSN_mtcrf, + 2#0_00100_10010# => INSN_mtmsr, + 2#0_00101_10010# => INSN_mtmsrd, + 2#0_01110_10011# => INSN_mtspr, + 2#0_00010_01001# => INSN_mulhd, + 2#0_00000_01001# => INSN_mulhdu, + 2#0_00010_01011# => INSN_mulhw, + 2#0_00000_01011# => INSN_mulhwu, + -- next 4 have reserved bit set + 2#0_10010_01001# => INSN_mulhd, + 2#0_10000_01001# => INSN_mulhdu, + 2#0_10010_01011# => INSN_mulhw, + 2#0_10000_01011# => INSN_mulhwu, + 2#0_00111_01001# => INSN_mulld, + 2#0_10111_01001# => INSN_mulld, -- mulldo + 2#0_00111_01011# => INSN_mullw, + 2#0_10111_01011# => INSN_mullw, -- mullwo + 2#0_01110_11100# => INSN_nand, + 2#0_00011_01000# => INSN_neg, + 2#0_10011_01000# => INSN_neg, -- nego + -- next 8 are reserved no-op instructions + 2#0_10000_10010# => INSN_nop, + 2#0_10001_10010# => INSN_nop, + 2#0_10010_10010# => INSN_nop, + 2#0_10011_10010# => INSN_nop, + 2#0_10100_10010# => INSN_nop, + 2#0_10101_10010# => INSN_nop, + 2#0_10110_10010# => INSN_nop, + 2#0_10111_10010# => INSN_nop, + 2#0_00011_11100# => INSN_nor, + 2#0_01101_11100# => INSN_or, + 2#0_01100_11100# => INSN_orc, + 2#0_00011_11010# => INSN_popcntb, + 2#0_01111_11010# => INSN_popcntd, + 2#0_01011_11010# => INSN_popcntw, + 2#0_00101_11010# => INSN_prtyd, + 2#0_00100_11010# => INSN_prtyw, + 2#0_00100_00000# => INSN_setb, + 2#0_01111_10010# => INSN_slbia, + 2#0_00000_11011# => INSN_sld, + 2#0_00000_11000# => INSN_slw, + 2#0_11000_11010# => INSN_srad, + 2#0_11001_11010# => INSN_sradi, + 2#0_11001_11011# => INSN_sradi, + 2#0_11000_11000# => INSN_sraw, + 2#0_11001_11000# => INSN_srawi, + 2#0_10000_11011# => INSN_srd, + 2#0_10000_11000# => INSN_srw, + 2#0_11110_10101# => INSN_stbcix, + 2#0_10101_10110# => INSN_stbcx, + 2#0_00111_10111# => INSN_stbux, + 2#0_00110_10111# => INSN_stbx, + 2#0_10100_10100# => INSN_stdbrx, + 2#0_11111_10101# => INSN_stdcix, + 2#0_00110_10110# => INSN_stdcx, + 2#0_00101_10101# => INSN_stdux, + 2#0_00100_10101# => INSN_stdx, + 2#0_10110_10111# => INSN_stfdx, + 2#0_10111_10111# => INSN_stfdux, + 2#0_11110_10111# => INSN_stfiwx, + 2#0_10100_10111# => INSN_stfsx, + 2#0_10101_10111# => INSN_stfsux, + 2#0_11100_10110# => INSN_sthbrx, + 2#0_11101_10101# => INSN_sthcix, + 2#0_10110_10110# => INSN_sthcx, + 2#0_01101_10111# => INSN_sthux, + 2#0_01100_10111# => INSN_sthx, + 2#0_10100_10110# => INSN_stwbrx, + 2#0_11100_10101# => INSN_stwcix, + 2#0_00100_10110# => INSN_stwcx, + 2#0_00101_10111# => INSN_stwux, + 2#0_00100_10111# => INSN_stwx, + 2#0_00001_01000# => INSN_subf, + 2#0_10001_01000# => INSN_subf, -- subfo + 2#0_00000_01000# => INSN_subfc, + 2#0_10000_01000# => INSN_subfc, -- subfco + 2#0_00100_01000# => INSN_subfe, + 2#0_10100_01000# => INSN_subfe, -- subfeo + 2#0_00111_01000# => INSN_subfme, + 2#0_10111_01000# => INSN_subfme, -- subfmeo + 2#0_00110_01000# => INSN_subfze, + 2#0_10110_01000# => INSN_subfze, -- subfzeo + 2#0_10010_10110# => INSN_sync, + 2#0_00010_00100# => INSN_td, + 2#0_00000_00100# => INSN_tw, + 2#0_01001_10010# => INSN_tlbie, + 2#0_01000_10010# => INSN_tlbiel, + 2#0_10001_10110# => INSN_tlbsync, + 2#0_00000_11110# => INSN_wait, + 2#0_01001_11100# => INSN_xor, + + -- Major opcode 19 + -- Columns with insn(4) = '1' are all illegal and not mapped here; to + -- fit into 2048 entries, the columns are remapped so that 16-24 are + -- stored here as 8-15; in other words the address bits are + -- 1, insn(10..6), 1, insn(5), insn(3..1) + 2#1_10000_11000# => INSN_bcctr, + 2#1_00000_11000# => INSN_bclr, + 2#1_10001_11000# => INSN_bctar, + 2#1_01000_10001# => INSN_crand, + 2#1_00100_10001# => INSN_crandc, + 2#1_01001_10001# => INSN_creqv, + 2#1_00111_10001# => INSN_crnand, + 2#1_00001_10001# => INSN_crnor, + 2#1_01110_10001# => INSN_cror, + 2#1_01101_10001# => INSN_crorc, + 2#1_00110_10001# => INSN_crxor, + 2#1_00100_11110# => INSN_isync, + 2#1_00000_10000# => INSN_mcrf, + 2#1_00000_11010# => INSN_rfid, + + -- Major opcode 59 + -- Only column 14 is valid here; columns 16-31 are handled in the major table + -- Column 14 is mapped to column 6 of the space which is + -- mostly used for opcode 19. + 2#1_11010_10110# => INSN_fcfids, + 2#1_11110_10110# => INSN_fcfidus, + + -- Major opcode 63 + -- Columns 0-15 are mapped here; columns 16-31 are in the major table. + -- Address bits are 1, insn(10:6), 0, insn(4:1) + 2#1_00000_00000# => INSN_fcmpu, + 2#1_00001_00000# => INSN_fcmpo, + 2#1_00010_00000# => INSN_mcrfs, + 2#1_00100_00000# => INSN_ftdiv, + 2#1_00101_00000# => INSN_ftsqrt, + 2#1_00001_00110# => INSN_mtfsb, + 2#1_00010_00110# => INSN_mtfsb, + 2#1_00100_00110# => INSN_mtfsfi, + 2#1_11010_00110# => INSN_fmrgow, + 2#1_11110_00110# => INSN_fmrgew, + 2#1_10010_00111# => INSN_mffs, + 2#1_10110_00111# => INSN_mtfsf, + 2#1_00000_01000# => INSN_fcpsgn, + 2#1_00001_01000# => INSN_fneg, + 2#1_00010_01000# => INSN_fmr, + 2#1_00100_01000# => INSN_fnabs, + 2#1_01000_01000# => INSN_fabs, + 2#1_01100_01000# => INSN_frin, + 2#1_01101_01000# => INSN_friz, + 2#1_01110_01000# => INSN_frip, + 2#1_01111_01000# => INSN_frim, + 2#1_00000_01100# => INSN_frsp, + 2#1_00000_01110# => INSN_fctiw, + 2#1_00100_01110# => INSN_fctiwu, + 2#1_11001_01110# => INSN_fctid, + 2#1_11010_01110# => INSN_fcfid, + 2#1_11101_01110# => INSN_fctidu, + 2#1_11110_01110# => INSN_fcfidu, + 2#1_00000_01111# => INSN_fctiwz, + 2#1_00100_01111# => INSN_fctiwuz, + 2#1_11001_01111# => INSN_fctidz, + 2#1_11101_01111# => INSN_fctiduz, + + others => INSN_illegal + ); + + constant IOUT_LEN : natural := ICODE_LEN + IMAGE_LEN; + + type predec_t is record + image : std_ulogic_vector(31 downto 0); + maj_predecode : insn_code; + row_predecode : insn_code; + end record; + + subtype index_t is integer range 0 to WIDTH-1; + type predec_array is array(index_t) of predec_t; + + signal pred : predec_array; + +begin + predecode_0: process(clk) + variable majaddr : std_ulogic_vector(10 downto 0); + variable rowaddr : std_ulogic_vector(10 downto 0); + variable iword : std_ulogic_vector(31 downto 0); + begin + if rising_edge(clk) then + for i in index_t loop + if valid_in = '1' then + iword := insns_in(i * 32 + 31 downto i * 32); + + majaddr := iword(31 downto 26) & iword(4 downto 0); + + -- row_predecode_rom is used for op 19, 31, 59, 63 + -- addr bit 10 is 0 for op 31, 1 for 19, 59, 63 + rowaddr(10) := iword(31) or not iword(29); + rowaddr(9 downto 5) := iword(10 downto 6); + if iword(28) = '0' then + -- op 19 and op 59 + rowaddr(4 downto 3) := '1' & iword(5); + else + -- op 31 and 63; for 63 we only use this when iword(5) = '0' + rowaddr(4 downto 3) := iword(5 downto 4); + end if; + rowaddr(2 downto 0) := iword(3 downto 1); + + pred(i).image <= iword; + pred(i).maj_predecode <= major_predecode_rom(to_integer(unsigned(majaddr))); + pred(i).row_predecode <= row_predecode_rom(to_integer(unsigned(rowaddr))); + else + pred(i).image <= (others => '0'); + pred(i).maj_predecode <= INSN_illegal; + pred(i).row_predecode <= INSN_illegal; + end if; + end loop; + end if; + end process; + + predecode_1: process(all) + variable iword : std_ulogic_vector(31 downto 0); + variable use_row : std_ulogic; + variable illegal : std_ulogic; + variable ici : std_ulogic_vector(IOUT_LEN - 1 downto 0); + variable icode : insn_code; + begin + for i in index_t loop + iword := pred(i).image; + icode := pred(i).maj_predecode; + use_row := '0'; + illegal := '0'; + + case iword(31 downto 26) is + when "000100" => -- 4 + -- major opcode 4, mostly VMX/VSX stuff but also some integer ops (madd*) + illegal := not iword(5); + + when "010011" => -- 19 + -- Columns 8-15 and 24-31 don't have any valid instructions + -- (where insn(5..1) is the column number). + -- addpcis (column 2) is in the major table + -- Other valid columns are mapped to columns in the second + -- half of the row table: columns 0-1 are mapped to 16-17 + -- and 16-23 are mapped to 24-31. + illegal := iword(4); + use_row := iword(5) or (not iword(3) and not iword(2)); + + when "011000" => -- 24 + -- ori, special-case the standard NOP + if std_match(iword, "01100000000000000000000000000000") then + icode := INSN_nop; + end if; + + when "011111" => -- 31 + -- major opcode 31, lots of things + -- Use the first half of the row table for all columns + use_row := '1'; + + when "111011" => -- 59 + -- floating point operations, mostly single-precision + -- Columns 0-11 are illegal; columns 12-15 are mapped + -- to columns 20-23 in the second half of the row table, + -- and columns 16-31 are in the major table. + illegal := not iword(5) and (not iword(4) or not iword(3)); + use_row := not iword(5); + + when "111111" => -- 63 + -- floating point operations, general and double-precision + -- Use columns 0-15 of the second half of the row table + -- for columns 0-15, and the major table for columns 16-31. + use_row := not iword(5); + + when others => + end case; + if use_row = '1' then + icode := pred(i).row_predecode; + end if; + + -- Mark FP instructions as illegal if we don't have an FPU + if not HAS_FPU and icode >= INSN_first_frs then + illegal := '1'; + end if; + + ici(31 downto 0) := iword; + ici(IOUT_LEN - 1 downto 32) := (others => '0'); + if illegal = '1' or icode = INSN_illegal then + -- Since an insn_code currently fits in 9 bits, use just + -- the most significant bit of ici to indicate illegal insns. + ici(IOUT_LEN - 1) := '1'; + else + ici(IOUT_LEN - 1 downto IMAGE_LEN) := + std_ulogic_vector(to_unsigned(insn_code'pos(icode), ICODE_LEN)); + end if; + icodes_out(i * IOUT_LEN + IOUT_LEN - 1 downto i * IOUT_LEN) <= ici; + end loop; + end process; + +end architecture behaviour;