From bba668cd93ab27ad7045745b9a3045dfe147adf5 Mon Sep 17 00:00:00 2001 From: Matteo Perotti Date: Mon, 16 Sep 2024 19:01:36 +0200 Subject: [PATCH] [hardware] Modify csr sync and handshake in ara --- hardware/src/ara.sv | 2 + hardware/src/ara_dispatcher.sv | 8 +- hardware/src/ara_pre_decoder.sv | 190 +++++++---------- hardware/src/ara_ring_buffer.sv | 10 +- hardware/src/ara_xif_handler.sv | 351 +++++++++++++++----------------- 5 files changed, 251 insertions(+), 310 deletions(-) diff --git a/hardware/src/ara.sv b/hardware/src/ara.sv index 80367b84a..fb29c42ad 100644 --- a/hardware/src/ara.sv +++ b/hardware/src/ara.sv @@ -112,6 +112,8 @@ module ara import ara_pkg::*; #( fpnew_pkg::roundmode_e frm; // result information writeregflags_t is_writeback; + // does the insn modify a spec CSR? + logic is_spec_csr; } instr_pack_t; typedef struct packed { diff --git a/hardware/src/ara_dispatcher.sv b/hardware/src/ara_dispatcher.sv index 812b2bf55..174d0e973 100644 --- a/hardware/src/ara_dispatcher.sv +++ b/hardware/src/ara_dispatcher.sv @@ -308,9 +308,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( instruction_ready_o = 1'b0; core_v_xif_resp_o.result_valid = 1'b0; - x_result = '{ - id : instruction_i.id, - default : '0 + x_result = '{ + id : instruction_i.id, + rd : instruction_i.instr[11:7], + we : instruction_i.is_writeback, + default : '0 }; acc_resp = '{ diff --git a/hardware/src/ara_pre_decoder.sv b/hardware/src/ara_pre_decoder.sv index b468ddf17..af64bde5f 100644 --- a/hardware/src/ara_pre_decoder.sv +++ b/hardware/src/ara_pre_decoder.sv @@ -26,16 +26,16 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( // Clock and reset input logic clk_i, input logic rst_ni, - // Interface with Ara's backend - input logic ara_req_ready_i, - input logic ara_idle_i, - // XIF - input x_req_t core_v_xif_req_i, - output x_resp_t core_v_xif_resp_o, - // Dispatcher sync - input logic sync_i, + // X issue interface + input x_issue_req_t cvxif_issue_req_i, + input logic cvxif_issue_req_valid_i, + output x_issue_resp_t cvxif_issue_resp_o, + // Update the speculative CSRs + input logic csr_sync_valid_i, input csr_sync_t csr_sync_i, - output logic csr_stall_o + // The current insn is modifying a speculative CSR + output logic csr_spec_mod_o, + output logic csr_spec_mod_reg_o ); ara_resp_t ara_resp; @@ -106,18 +106,6 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( // State // ///////////// - // The backend can either be in normal operation, waiting for Ara to be idle before issuing new - // operations, or injecting a reshuffling uop. - // IDLE can happen, for example, once the vlmul has changed. - // RESHUFFLE can happen when an instruction writes a register with != EEW - typedef enum logic [1:0] { - NORMAL_OPERATION, - WAIT_IDLE, - RESHUFFLE, - SLDU_SEQUENCER - } state_e; - state_e state_d, state_q; - // We need to memorize the element width used to store each vector on the lanes, so that we are // able to deshuffle it when needed. rvv_pkg::vew_e [31:0] eew_d, eew_q; @@ -182,15 +170,7 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( logic is_stride_np2; logic [idx_width(idx_width(VLENB << 3)):0] sldu_popc; - // Is the stride power of two? - popcount #( - .INPUT_WIDTH (idx_width(VLENB << 3)) - ) i_np2_stride ( - .data_i (ara_req_d.stride[idx_width(VLENB << 3)-1:0]), - .popcount_o(sldu_popc ) - ); - - assign is_stride_np2 = sldu_popc > 1; + assign is_stride_np2 = '0; /////////////// // Decoder // @@ -211,17 +191,11 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( logic inv_accept; - assign core_v_xif_resp_o.issue_resp_accept = ~insn_error; - assign core_v_xif_resp_o.issue_resp_writeback = (is_rd || is_fd); - assign core_v_xif_resp_o.issue_resp_register_read[0] = (is_rs1 || is_fs1); - assign core_v_xif_resp_o.issue_resp_register_read[1] = (is_rs2 || is_fs2); - assign core_v_xif_resp_o.issue_resp_is_vfp = is_vfp; - - assign core_v_xif_resp_o.error = '0; - assign core_v_xif_resp_o.fflags = '0; - assign core_v_xif_resp_o.fflags_valid = '0; - assign core_v_xif_resp_o.inval_valid = '0; - assign core_v_xif_resp_o.inval_addr = '0; + assign cvxif_issue_resp_o.accept = ~insn_error; + assign cvxif_issue_resp_o.writeback = (is_rd || is_fd); + assign cvxif_issue_resp_o.register_read[0] = (is_rs1 || is_fs1); + assign cvxif_issue_resp_o.register_read[1] = (is_rs2 || is_fs2); + assign cvxif_issue_resp_o.is_vfp = is_vfp; always_comb begin: p_decoder // Default values @@ -260,7 +234,7 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( inv_accept = 1'b1; insn_error = 1'b0; - csr_stall_o = '0; + csr_spec_mod_reg_o = '0; is_rs1 = 1'b0; is_rs2 = 1'b0; @@ -288,19 +262,19 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( ignore_zero_vl_check = 1'b0; if (1'b1) begin - if (core_v_xif_req_i.issue_valid) begin + if (cvxif_issue_req_valid_i) begin // Decoding is_decoding = 1'b1; // Decode the instructions based on their opcode - unique case (core_v_xif_req_i.issue_req_instr.itype.opcode) + unique case (cvxif_issue_req_i.instr.itype.opcode) ////////////////////////////////////// // Vector Arithmetic instructions // ////////////////////////////////////// riscv::OpcodeVec: begin // Instruction is of one of the RVV types - automatic rvv_instruction_t insn = rvv_instruction_t'(core_v_xif_req_i.issue_req_instr.instr); + automatic rvv_instruction_t insn = rvv_instruction_t'(cvxif_issue_req_i.instr.instr); // Decode based on their func3 field unique case (insn.varith_type.func3) // Configuration instructions @@ -316,11 +290,14 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( // Update vtype if (insn.vsetvli_type.func1 == 1'b0) begin // vsetvli vtype_d = vtype_xlen(riscv::xlen_t'(insn.vsetvli_type.zimm11)); + csr_spec_mod_o = 1'b1; end else if (insn.vsetivli_type.func2 == 2'b11) begin // vsetivli vtype_d = vtype_xlen(riscv::xlen_t'(insn.vsetivli_type.zimm10)); + csr_spec_mod_o = 1'b1; end else if (insn.vsetvl_type.func7 == 7'b100_0000) begin // vsetvl - vtype_d = vtype_xlen(riscv::xlen_t'(core_v_xif_req_i.register_rs[1][7:0])); - csr_stall_o = 1'b1; + // vtype_d = vtype_xlen(riscv::xlen_t'(register_rs[1][7:0])); + csr_spec_mod_o = 1'b1; + csr_spec_mod_reg_o = 1'b1; end else begin illegal_insn = 1'b1; end @@ -331,6 +308,7 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( // LMUL >= SEW/ELEN (signed'($clog2(ELENB)) + signed'(vtype_d.vlmul) < signed'(vtype_d.vsew))) begin vtype_d = '{vill: 1'b1, default: '0}; + csr_spec_mod_o = 1'b1; end end @@ -550,7 +528,6 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( OPIVX: begin: opivx // These generate a request to Ara's backend - ara_req_d.scalar_op = core_v_xif_req_i.register_rs[0]; ara_req_d.use_scalar_op = 1'b1; ara_req_d.vs2 = insn.varith_type.rs2; ara_req_d.use_vs2 = 1'b1; @@ -577,7 +554,6 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( 6'b001011: ara_req_d.op = ara_pkg::VXOR; 6'b001110: begin ara_req_d.op = ara_pkg::VSLIDEUP; - ara_req_d.stride = core_v_xif_req_i.register_rs[0]; ara_req_d.eew_vs2 = vtype_q.vsew; // Encode vslideup/vslide1up on the use_scalar_op field ara_req_d.use_scalar_op = 1'b0; @@ -588,7 +564,6 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( end 6'b001111: begin ara_req_d.op = ara_pkg::VSLIDEDOWN; - ara_req_d.stride = core_v_xif_req_i.register_rs[0]; ara_req_d.eew_vs2 = vtype_q.vsew; // Encode vslidedown/vslide1down on the use_scalar_op field ara_req_d.use_scalar_op = 1'b0; @@ -1404,7 +1379,6 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( is_rs1 = 1'b1; // These generate a request to Ara's backend - ara_req_d.scalar_op = core_v_xif_req_i.register_rs[0]; ara_req_d.use_scalar_op = 1'b1; ara_req_d.vs2 = insn.varith_type.rs2; ara_req_d.use_vs2 = 1'b1; @@ -1648,7 +1622,6 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( ara_req_d.vd = insn.varith_type.rd; ara_req_d.use_vd = 1'b1; ara_req_d.vm = insn.varith_type.vm; - ara_req_d.fp_rm = core_v_xif_req_i.frm; ara_req_valid_d = 1'b1; // Decode based on the func6 field @@ -2052,7 +2025,6 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( if (FPUSupport != FPUSupportNone) begin // These generate a request to Ara's backend - ara_req_d.scalar_op = core_v_xif_req_i.register_rs[0]; ara_req_d.use_scalar_op = 1'b1; ara_req_d.vs2 = insn.varith_type.rs2; ara_req_d.use_vs2 = 1'b1; @@ -2060,7 +2032,6 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( ara_req_d.use_vd = 1'b1; ara_req_d.vm = insn.varith_type.vm; ara_req_d.is_stride_np2 = is_stride_np2; - ara_req_d.fp_rm = core_v_xif_req_i.frm; ara_req_valid_d = 1'b1; // Decode based on the func6 field @@ -2236,12 +2207,6 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( default: illegal_insn = 1'b1; endcase - // Check if the FP scalar operand is NaN-boxed. If not, replace it with a NaN. - case (vtype_q.vsew) - EW16: if (~(&core_v_xif_req_i.register_rs[0][63:16])) ara_req_d.scalar_op = 64'h0000000000007e00; - EW32: if (~(&core_v_xif_req_i.register_rs[0][63:32])) ara_req_d.scalar_op = 64'h000000007fc00000; - endcase - // Instructions with an integer LMUL have extra constraints on the registers they // can access. The constraints can be different for the two source operands and the // destination register. @@ -2292,7 +2257,7 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( riscv::OpcodeLoadFp: begin // Instruction is of one of the RVV types - automatic rvv_instruction_t insn = rvv_instruction_t'(core_v_xif_req_i.issue_req_instr.instr); + automatic rvv_instruction_t insn = rvv_instruction_t'(cvxif_issue_req_i.instr.instr); // The instruction is a load is_vload = 1'b1; @@ -2300,7 +2265,6 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( ara_req_d.vd = insn.vmem_type.rd; ara_req_d.use_vd = 1'b1; ara_req_d.vm = insn.vmem_type.vm; - ara_req_d.scalar_op = core_v_xif_req_i.register_rs[0]; ara_req_valid_d = 1'b1; // Decode the element width @@ -2382,7 +2346,6 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( end 2'b10: begin ara_req_d.op = VLSE; - ara_req_d.stride = core_v_xif_req_i.register_rs[1]; end 2'b01, // Indexed-unordered 2'b11: begin // Indexed-ordered @@ -2444,21 +2407,8 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( // Maximum vector length. VLMAX = nf * VLEN / EW8. ara_req_d.vtype.vsew = EW8; unique case (insn.vmem_type.nf) - 3'd0: begin - ara_req_d.vl = VLENB << 0; - ara_req_d.emul = LMUL_1; - end - 3'd1: begin - ara_req_d.vl = VLENB << 1; - ara_req_d.emul = LMUL_2; - end - 3'd3: begin - ara_req_d.vl = VLENB << 2; - ara_req_d.emul = LMUL_4; - end - 3'd7: begin - ara_req_d.vl = VLENB << 3; - ara_req_d.emul = LMUL_8; + 3'd0, 3'd1, 3'd3, 3'd7: begin + /* Not illegal */ end default: begin // Trigger an error for the reserved simm values @@ -2466,14 +2416,6 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( end endcase end - - // Wait until the back-end answers to acknowledge those instructions - if (1'b1) begin - ara_req_valid_d = 1'b0; - // In case of error, modify vstart - if (ara_resp.error) - vstart_d = ara_resp.error_vl; - end end ///////////////////// @@ -2488,7 +2430,7 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( riscv::OpcodeStoreFp: begin // Instruction is of one of the RVV types - automatic rvv_instruction_t insn = rvv_instruction_t'(core_v_xif_req_i.issue_req_instr.instr); + automatic rvv_instruction_t insn = rvv_instruction_t'(cvxif_issue_req_i.instr.instr); // The instruction is a store is_vstore = 1'b1; @@ -2503,7 +2445,6 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( ara_req_d.use_vs1 = 1'b1; ara_req_d.eew_vs1 = eew_q[insn.vmem_type.rd]; // This is the vs1 EEW ara_req_d.vm = insn.vmem_type.vm; - ara_req_d.scalar_op = core_v_xif_req_i.register_rs[0]; ara_req_valid_d = 1'b1; // Decode the element width @@ -2580,7 +2521,6 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( end 2'b10: begin ara_req_d.op = VSSE; - ara_req_d.stride = core_v_xif_req_i.register_rs[1]; end 2'b01, // Indexed-unordered 2'b11: begin // Indexed-orderd @@ -2657,21 +2597,22 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( //////////////////////////// riscv::OpcodeSystem: begin - automatic rvv_instruction_t insn = rvv_instruction_t'(core_v_xif_req_i.issue_req_instr.instr); + automatic rvv_instruction_t insn = rvv_instruction_t'(cvxif_issue_req_i.instr.instr); // These always respond at the same cycle is_config = 1'b1; - unique case (core_v_xif_req_i.issue_req_instr.itype.funct3) + unique case (cvxif_issue_req_i.instr.itype.funct3) 3'b001: begin // csrrw is_rs1 = is_vector_csr(riscv::csr_reg_t'(insn.i_type.imm)); is_rs2 = is_vector_csr(riscv::csr_reg_t'(insn.i_type.imm)); is_rd = is_vector_csr(riscv::csr_reg_t'(insn.i_type.imm)); // Decode the CSR. - case (riscv::csr_addr_t'(core_v_xif_req_i.issue_req_instr.itype.imm)) + case (riscv::csr_addr_t'(cvxif_issue_req_i.instr.itype.imm)) // Only vstart can be written with CSR instructions. riscv::CSR_VSTART: begin - vstart_d = core_v_xif_req_i.register_rs[0]; - csr_stall_o = 1'b1; + //vstart_d = register_rs[0]; + csr_spec_mod_o = 1'b1; + csr_spec_mod_reg_o = 1'b1; end riscv::CSR_VXRM: begin /* Not illegal */ @@ -2689,26 +2630,27 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( is_rs2 = is_vector_csr(riscv::csr_reg_t'(insn.i_type.imm)); is_rd = is_vector_csr(riscv::csr_reg_t'(insn.i_type.imm)); // Decode the CSR. - case (riscv::csr_addr_t'(core_v_xif_req_i.issue_req_instr.itype.imm)) + case (riscv::csr_addr_t'(cvxif_issue_req_i.instr.itype.imm)) riscv::CSR_VSTART: begin - vstart_d = vstart_q | vlen_t'(core_v_xif_req_i.register_rs[0]); - csr_stall_o = 1'b1; + //vstart_d = vstart_q | vlen_t'(register_rs[0]); + csr_spec_mod_o = 1'b1; + csr_spec_mod_reg_o = 1'b1; end riscv::CSR_VTYPE: begin // Only reads are allowed - if (!(core_v_xif_req_i.issue_req_instr.itype.rs1 == '0)) begin + if (!(cvxif_issue_req_i.instr.itype.rs1 == '0)) begin illegal_insn = 1'b1; end end riscv::CSR_VL: begin // Only reads are allowed - if (!(core_v_xif_req_i.issue_req_instr.itype.rs1 == '0)) begin + if (!(cvxif_issue_req_i.instr.itype.rs1 == '0)) begin illegal_insn = 1'b1; end end riscv::CSR_VLENB: begin // Only reads are allowed - if (!(core_v_xif_req_i.issue_req_instr.itype.rs1 == '0)) begin + if (!(cvxif_issue_req_i.instr.itype.rs1 == '0)) begin illegal_insn = 1'b1; end end @@ -2728,26 +2670,27 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( is_rs2 = is_vector_csr(riscv::csr_reg_t'(insn.i_type.imm)); is_rd = is_vector_csr(riscv::csr_reg_t'(insn.i_type.imm)); // Decode the CSR. - case (riscv::csr_addr_t'(core_v_xif_req_i.issue_req_instr.itype.imm)) + case (riscv::csr_addr_t'(cvxif_issue_req_i.instr.itype.imm)) riscv::CSR_VSTART: begin - vstart_d = vstart_q & ~vlen_t'(core_v_xif_req_i.register_rs[0]); - csr_stall_o = 1'b1; + //vstart_d = vstart_q & ~vlen_t'(register_rs[0]); + csr_spec_mod_o = 1'b1; + csr_spec_mod_reg_o = 1'b1; end riscv::CSR_VTYPE: begin // Only reads are allowed - if (!(core_v_xif_req_i.issue_req_instr.itype.rs1 == '0)) begin + if (!(cvxif_issue_req_i.instr.itype.rs1 == '0)) begin illegal_insn = 1'b1; end end riscv::CSR_VL: begin // Only reads are allowed - if (!(core_v_xif_req_i.issue_req_instr.itype.rs1 == '0)) begin + if (!(cvxif_issue_req_i.instr.itype.rs1 == '0)) begin illegal_insn = 1'b1; end end riscv::CSR_VLENB: begin // Only reads are allowed - if (!(core_v_xif_req_i.issue_req_instr.itype.rs1 == '0)) begin + if (!(cvxif_issue_req_i.instr.itype.rs1 == '0)) begin illegal_insn = 1'b1; end end @@ -2764,10 +2707,11 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( is_rs2 = is_vector_csr(riscv::csr_reg_t'(insn.i_type.imm)); is_rd = is_vector_csr(riscv::csr_reg_t'(insn.i_type.imm)); // Decode the CSR. - case (riscv::csr_addr_t'(core_v_xif_req_i.issue_req_instr.itype.imm)) + case (riscv::csr_addr_t'(cvxif_issue_req_i.instr.itype.imm)) // Only vstart can be written with CSR instructions. riscv::CSR_VSTART: begin - vstart_d = vlen_t'(core_v_xif_req_i.issue_req_instr.itype.rs1); + vstart_d = vlen_t'(cvxif_issue_req_i.instr.itype.rs1); + csr_spec_mod_o = 1'b1; end riscv::CSR_VXRM: begin /* Not illegal */ @@ -2785,25 +2729,26 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( is_rs2 = is_vector_csr(riscv::csr_reg_t'(insn.i_type.imm)); is_rd = is_vector_csr(riscv::csr_reg_t'(insn.i_type.imm)); // Decode the CSR. - case (riscv::csr_addr_t'(core_v_xif_req_i.issue_req_instr.itype.imm)) + case (riscv::csr_addr_t'(cvxif_issue_req_i.instr.itype.imm)) riscv::CSR_VSTART: begin - vstart_d = vstart_q | vlen_t'(core_v_xif_req_i.issue_req_instr.itype.rs1); + vstart_d = vstart_q | vlen_t'(cvxif_issue_req_i.instr.itype.rs1); + csr_spec_mod_o = 1'b1; end riscv::CSR_VTYPE: begin // Only reads are allowed - if (!(core_v_xif_req_i.issue_req_instr.itype.rs1 == '0)) begin + if (!(cvxif_issue_req_i.instr.itype.rs1 == '0)) begin illegal_insn = 1'b1; end end riscv::CSR_VL: begin // Only reads are allowed - if (!(core_v_xif_req_i.issue_req_instr.itype.rs1 == '0)) begin + if (!(cvxif_issue_req_i.instr.itype.rs1 == '0)) begin illegal_insn = 1'b1; end end riscv::CSR_VLENB: begin // Only reads are allowed - if (!(core_v_xif_req_i.issue_req_instr.itype.rs1 == '0)) begin + if (!(cvxif_issue_req_i.instr.itype.rs1 == '0)) begin illegal_insn = 1'b1; end end @@ -2820,25 +2765,26 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( is_rs2 = is_vector_csr(riscv::csr_reg_t'(insn.i_type.imm)); is_rd = is_vector_csr(riscv::csr_reg_t'(insn.i_type.imm)); // Decode the CSR. - unique case (riscv::csr_addr_t'(core_v_xif_req_i.issue_req_instr.itype.imm)) + unique case (riscv::csr_addr_t'(cvxif_issue_req_i.instr.itype.imm)) riscv::CSR_VSTART: begin - vstart_d = vstart_q & ~vlen_t'(core_v_xif_req_i.issue_req_instr.itype.rs1); + vstart_d = vstart_q & ~vlen_t'(cvxif_issue_req_i.instr.itype.rs1); + csr_spec_mod_o = 1'b1; end riscv::CSR_VTYPE: begin // Only reads are allowed - if (!(core_v_xif_req_i.issue_req_instr.itype.rs1 == '0)) begin + if (!(cvxif_issue_req_i.instr.itype.rs1 == '0)) begin illegal_insn = 1'b1; end end riscv::CSR_VL: begin // Only reads are allowed - if (!(core_v_xif_req_i.issue_req_instr.itype.rs1 == '0)) begin + if (!(cvxif_issue_req_i.instr.itype.rs1 == '0)) begin illegal_insn = 1'b1; end end riscv::CSR_VLENB: begin // Only reads are allowed - if (!(core_v_xif_req_i.issue_req_instr.itype.rs1 == '0)) begin + if (!(cvxif_issue_req_i.instr.itype.rs1 == '0)) begin illegal_insn = 1'b1; end end @@ -2858,7 +2804,7 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( end riscv::OpcodeAmo: begin - automatic rvv_instruction_t insn = rvv_instruction_t'(core_v_xif_req_i.issue_req_instr.instr); + automatic rvv_instruction_t insn = rvv_instruction_t'(cvxif_issue_req_i.instr.instr); case (insn.vamo_type.width) 3'b000, //VAMO*EI8.V 3'b101, //VAMO*EI16.V @@ -2893,9 +2839,9 @@ module ara_pre_decoder import ara_pkg::*; import rvv_pkg::*; #( end // Sync the speculative CSRs with their real values - if (sync_i) begin - vstart_d = csr_sync_i.vstart; - vtype_d = csr_sync_i.vtype; + if (csr_sync_valid_i) begin + vstart_d = csr_sync_i.vstart; + vtype_d = csr_sync_i.vtype; end end: p_decoder endmodule : ara_pre_decoder diff --git a/hardware/src/ara_ring_buffer.sv b/hardware/src/ara_ring_buffer.sv index a08e25c9e..628dc28fd 100644 --- a/hardware/src/ara_ring_buffer.sv +++ b/hardware/src/ara_ring_buffer.sv @@ -39,7 +39,8 @@ module ara_ring_buffer #( input readregflags_t rs_valid_i, input fpnew_pkg::roundmode_e frm_i, // Instruction information - output dtype data_o + output dtype data_o, + output logic keep_spec_csr_insn_o ); // Clock gating control logic gate_clock; @@ -56,6 +57,13 @@ module ara_ring_buffer #( logic [ADDR_DEPTH:0] usage; logic [ADDR_DEPTH-1:0] usage_cap; + // Does the buffer contain any instruction that modifies one of the speculative CSRs? + always_comb begin + keep_spec_csr_insn_o = '0; + for (int unsigned i = 0; i < DEPTH; i++) begin + keep_spec_csr_insn_o |= mem_q[i].is_spec_csr; + end + end // assign full_o = (tail_q+1'b1 == head_q) ? 1'b1 : 1'b0; // assign empty_o = (tail_q == head_q) ? 1'b1 : 1'b0; diff --git a/hardware/src/ara_xif_handler.sv b/hardware/src/ara_xif_handler.sv index 3ecdbf1bc..39ea2bbdf 100644 --- a/hardware/src/ara_xif_handler.sv +++ b/hardware/src/ara_xif_handler.sv @@ -7,212 +7,195 @@ // Handler to take care of the XIF signals module ara_xif_handler #( - parameter int unsigned NrLanes = 0, - parameter int unsigned HARTID_WIDTH = ariane_pkg::NR_RGPR_PORTS, - parameter int unsigned ID_WIDTH = ariane_pkg::TRANS_ID_BITS, - parameter type readregflags_t = logic, - parameter type writeregflags_t = logic, - parameter type x_req_t = logic, - parameter type x_resp_t = logic, - parameter type x_issue_req_t = logic, - parameter type x_issue_resp_t = logic, - parameter type x_result_t = logic, - parameter type x_acc_resp_t = logic, - parameter type csr_sync_t = logic, - parameter type instr_pack_t = logic - ) ( - // Clock and Reset - input logic clk_i, - input logic rst_ni, - // XIF - input x_req_t core_v_xif_req_i, - output x_resp_t core_v_xif_resp_o, - // <-> Ara Dispatcher - output instr_pack_t instruction_o, - output logic instruction_valid_o, - input logic instruction_ready_i, - input csr_sync_t csr_sync_i, - input x_resp_t core_v_xif_resp_i, - // Temp - input logic ara_idle, - input logic [NrLanes-1:0] vxsat_flag, - input logic [NrLanes-1:0][4:0] fflags_ex, - input logic [NrLanes-1:0] fflags_ex_valid, - input logic load_complete, - input logic store_complete, - input logic store_pending - ); - - logic csr_stall; - logic csr_block; - logic csr_stall_d, csr_stall_q; - logic [ID_WIDTH-1:0] csr_instr_id_d, csr_instr_id_q; + parameter int unsigned NrLanes = 0, + parameter int unsigned HARTID_WIDTH = ariane_pkg::NR_RGPR_PORTS, + parameter int unsigned ID_WIDTH = ariane_pkg::TRANS_ID_BITS, + parameter type readregflags_t = logic, + parameter type writeregflags_t = logic, + parameter type x_req_t = logic, + parameter type x_resp_t = logic, + parameter type x_issue_req_t = logic, + parameter type x_issue_resp_t = logic, + parameter type x_result_t = logic, + parameter type x_acc_resp_t = logic, + parameter type csr_sync_t = logic, + parameter type instr_pack_t = logic + ) ( + // Clock and Reset + input logic clk_i, + input logic rst_ni, + // XIF + input x_req_t core_v_xif_req_i, + output x_resp_t core_v_xif_resp_o, + // <-> Ara Dispatcher + output instr_pack_t instruction_o, + output logic instruction_valid_o, + input logic instruction_ready_i, + input csr_sync_t csr_sync_i, + input x_resp_t core_v_xif_resp_i, + // Temp + input logic ara_idle, + input logic [NrLanes-1:0] vxsat_flag, + input logic [NrLanes-1:0][4:0] fflags_ex, + input logic [NrLanes-1:0] fflags_ex_valid, + input logic load_complete, + input logic store_complete, + input logic store_pending +); + + localparam int unsigned XIF_BUF_DEPTH = 4; + + // Helpers + logic commit_ok, commit_kill; + logic xif_issue_decode_error; + logic push_instr_buffer; + logic buffer_full, buffer_empty; + + // XIF issue decoder to pre-decode and check for errors + x_issue_req_t predec_xif_issue_req; + logic predec_xif_issue_req_valid; + x_issue_resp_t predec_xif_issue_resp; + + instr_pack_t instr_to_buffer; + + // Speculative CSR sync logic + logic csr_sync_valid, predec_csr_spec_mod_reg, predec_csr_spec_mod, buf_has_spec_csr_insn; + enum logic {NORMAL_OP, WAIT_EMPTY_BUF} csr_sync_state_d, csr_sync_state_q; // Effective commit or kill on the XIF commit intf - logic commit_ok, commit_kill; assign commit_ok = core_v_xif_req_i.commit_valid & !core_v_xif_req_i.commit_commit_kill; assign commit_kill = core_v_xif_req_i.commit_valid & core_v_xif_req_i.commit_commit_kill; // No errors at decode time for this RVV vector insn - logic xif_issue_decode_error; - assign xif_issue_decode_ok = core_v_xif_resp_decoder2.issue_resp_accept; + assign xif_issue_decode_ok = predec_xif_issue_resp.accept; // Push a new instruction from the XIF issue intf into the speculative XIF buffer - logic new_instr, pop_next_instr; - assign new_instr_valid = core_v_xif_req_i.issue_valid & core_v_xif_resp_o.issue_ready & xif_issue_decode_ok & ~csr_stall_q; + assign push_instr_buffer = core_v_xif_req_i.issue_valid & core_v_xif_resp_o.issue_ready & + xif_issue_decode_ok & ~commit_kill; + + // Pass through the pre-decoder + // Pre-decode the instruction if the buffer is not full + assign predec_xif_issue_req_valid = core_v_xif_req_i.issue_valid & ~buffer_full; + assign predec_xif_issue_req.instr = core_v_xif_req_i.issue_req_instr; + assign predec_xif_issue_req.hartid = core_v_xif_req_i.issue_req_hartid; + assign predec_xif_issue_req.id = core_v_xif_req_i.issue_req_id; + + // Input to the XIF buffer + assign instr_to_buffer.instr = core_v_xif_req_i.issue_req_instr; + assign instr_to_buffer.hartid = core_v_xif_req_i.issue_req_hartid; + assign instr_to_buffer.id = core_v_xif_req_i.issue_req_id; + assign instr_to_buffer.is_writeback = predec_xif_issue_resp.writeback; + assign instr_to_buffer.register_read = predec_xif_issue_resp.register_read; + assign instr_to_buffer.rs1 = '0; + assign instr_to_buffer.rs2 = '0; + assign instr_to_buffer.rs_valid = '0; + assign instr_to_buffer.frm = fpnew_pkg::RNE; + assign instr_to_buffer.is_spec_csr = predec_csr_spec_mod; - // XIF issue buffer full - logic buffer_full; + always_comb begin + // Pass through + core_v_xif_resp_o = core_v_xif_resp_i; + + // Issue ready if buffer has space, and we are not waiting for it to be empty + core_v_xif_resp_o.issue_ready = ~buffer_full & (csr_sync_state_q != WAIT_EMPTY_BUF); + core_v_xif_resp_o.issue_resp_writeback = predec_xif_issue_resp.writeback; + core_v_xif_resp_o.issue_resp_register_read = predec_xif_issue_resp.register_read; + core_v_xif_resp_o.issue_resp_is_vfp = predec_xif_issue_resp.is_vfp; + + // Always ready for buffering the registers + core_v_xif_resp_o.register_ready = 1'b1; + end - // XIF issue decoder to pre-decode and check for errors - x_resp_t core_v_xif_resp_decoder2; - x_req_t core_v_xif_req_decoder2; ara_pre_decoder #( - .NrLanes(NrLanes), - .x_req_t (x_req_t), - .x_resp_t (x_resp_t), - .x_issue_req_t(x_issue_req_t), - .x_issue_resp_t(x_issue_resp_t), - .x_acc_resp_t(x_acc_resp_t), - .csr_sync_t (csr_sync_t) + .NrLanes (NrLanes ), + .x_issue_req_t (x_issue_req_t ), + .x_issue_resp_t (x_issue_resp_t), + .csr_sync_t (csr_sync_t ) ) i_xif_issue_pre_decoder ( - .clk_i (clk_i ), - .rst_ni (rst_ni), - // Interface with the sequencer - .ara_req_ready_i (core_v_xif_req_decoder2.issue_valid), - .ara_idle_i (ara_idle ), - // XIF - .core_v_xif_req_i (core_v_xif_req_decoder2 ), - .core_v_xif_resp_o (core_v_xif_resp_decoder2), - // CSR sync - .sync_i (commit_kill), - .csr_sync_i (csr_sync_i ), - .csr_stall_o (csr_stall ) + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .cvxif_issue_req_i (predec_xif_issue_req ), + .cvxif_issue_req_valid_i(predec_xif_issue_req_valid), + .cvxif_issue_resp_o (predec_xif_issue_resp ), + .csr_sync_valid_i (csr_sync_valid ), + .csr_sync_i (csr_sync_i ), + .csr_spec_mod_o (predec_csr_spec_mod ), + .csr_spec_mod_reg_o (predec_csr_spec_mod_reg ) ); - // Issued instruction - instr_pack_t instr_to_buffer; - - assign instr_to_buffer.instr = core_v_xif_req_i.issue_req_instr; - assign instr_to_buffer.hartid = core_v_xif_req_i.issue_req_hartid; - assign instr_to_buffer.id = core_v_xif_req_i.issue_req_id; - assign instr_to_buffer.register_read = core_v_xif_resp_o.issue_resp_register_read; - assign instr_to_buffer.rs1 = '0; - assign instr_to_buffer.rs2 = '0; - assign instr_to_buffer.rs_valid = '0; - assign instr_to_buffer.frm = fpnew_pkg::RNE; - assign instr_to_buffer.is_writeback = core_v_xif_resp_o.issue_resp_writeback; - - // to keep track of the returned instructions - logic [ID_WIDTH-1:0] result_id; - ara_ring_buffer #( - .ID_WIDTH(ID_WIDTH), - .DEPTH(4), - .readregflags_t(readregflags_t), - .dtype(instr_pack_t) - ) i_xif_buffer ( - .clk_i (clk_i ), - .rst_ni (rst_ni ), - .full_o (buffer_full ), - .empty_o (/* unused */ ), - .push_i (new_instr ), - .commit_i (commit_ok ), - .register_valid_i (core_v_xif_req_i.register_valid ), - .flush_i (commit_kill ), - .ready_i (instruction_ready_i ), - .valid_o (instruction_valid_o ), - .commit_id_i (core_v_xif_req_i.commit_id ), - .reg_id_i (core_v_xif_req_i.register_id ), - .id_i (instr_to_buffer.id ), - .data_i (instr_to_buffer ), - .rs1_i (core_v_xif_req_i.register_rs[0] ), - .rs2_i (core_v_xif_req_i.register_rs[1] ), - .rs_valid_i (core_v_xif_req_i.register_rs_valid ), - .frm_i (core_v_xif_req_i.frm ), - .data_o (instruction_o ) - ); + .ID_WIDTH (ID_WIDTH ), + .DEPTH (XIF_BUF_DEPTH ), + .readregflags_t (readregflags_t ), + .dtype (instr_pack_t ) + ) i_xif_buffer ( + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .full_o (buffer_full ), + .empty_o (buffer_empty ), + .push_i (push_instr_buffer ), + .commit_i (commit_ok ), + .register_valid_i (core_v_xif_req_i.register_valid ), + .flush_i (commit_kill ), + .ready_i (instruction_ready_i ), + .valid_o (instruction_valid_o ), + .commit_id_i (core_v_xif_req_i.commit_id ), + .reg_id_i (core_v_xif_req_i.register_id ), + .id_i (instr_to_buffer.id ), + .data_i (instr_to_buffer ), + .rs1_i (core_v_xif_req_i.register_rs[0] ), + .rs2_i (core_v_xif_req_i.register_rs[1] ), + .rs_valid_i (core_v_xif_req_i.register_rs_valid ), + .frm_i (core_v_xif_req_i.frm ), + .data_o (instruction_o ), + .keep_spec_csr_insn_o (buf_has_spec_csr_insn ) + ); + // Speculative CSR sync FSM: + // 1) Upon a commit kill, the speculative CSRs have to be rolled back. + // During the roll back, the pre-decoder cannot decode! + // a) If the xif buffer contains instructions that modified the spec CSRs, wait + // until the xif buffer is empty and then sync. + // b) If the buffer does not contain them, just sync immediately (optimization). + // 2) Upon an instruction that modifies the spec CSRs through a register value, + // we accept it and wait for it to modify the current state of the real CSRs. + // Then, we sync. This operation is slow, but rare. always_comb begin - // Set default - core_v_xif_req_decoder2 = core_v_xif_req_i; - core_v_xif_resp_o = core_v_xif_resp_i; - core_v_xif_resp_o.issue_ready = 1'b0; - csr_stall_d = csr_stall_q; - csr_instr_id_d = csr_instr_id_q; - - // Zero everything but the issue if - core_v_xif_req_decoder2.register_valid = '0; - core_v_xif_req_decoder2.register_hartid = '0; - core_v_xif_req_decoder2.register_id = '0; - core_v_xif_req_decoder2.register_rs[0] = '0; - core_v_xif_req_decoder2.register_rs[1] = '0; - core_v_xif_req_decoder2.register_rs_valid = '0; - core_v_xif_req_decoder2.commit_valid = '0; - core_v_xif_req_decoder2.commit_id = '0; - core_v_xif_req_decoder2.commit_hartid = '0; - core_v_xif_req_decoder2.commit_commit_kill = '0; - core_v_xif_req_decoder2.result_ready = '0; - // core_v_xif_req_decoder2.frm = '0; - core_v_xif_req_decoder2.store_pending = '0; - core_v_xif_req_decoder2.acc_cons_en = '0; - core_v_xif_req_decoder2.inval_ready = '0; - - // Construct relevant inputs for pre decoder - core_v_xif_req_decoder2.register_valid = core_v_xif_req_i.issue_valid; - core_v_xif_req_decoder2.result_ready = core_v_xif_req_i.issue_valid; - core_v_xif_req_decoder2.issue_valid = core_v_xif_req_i.issue_valid && !buffer_full; - - // Issue ready if buffer has space and we are not flushing it - if (core_v_xif_req_i.issue_valid && !buffer_full && !commit_kill) begin - core_v_xif_resp_o.issue_ready = 1'b1; - end - core_v_xif_resp_o.issue_resp_writeback = core_v_xif_resp_decoder2.issue_resp_writeback; - core_v_xif_resp_o.issue_resp_register_read = core_v_xif_resp_decoder2.issue_resp_register_read; - core_v_xif_resp_o.issue_resp_is_vfp = core_v_xif_resp_decoder2.issue_resp_is_vfp; - - // If we predecode a vsetvl instruction we need to stall to wait for its response to correctly compute the CSR's in the predecoder - if (csr_stall) begin - csr_stall_d = 1'b1; - csr_instr_id_d = core_v_xif_req_i.issue_req_id; - end - - // If we are waiting for a vsetvl to complete we need to mask the outpu of the pre decoder - if (csr_stall_q) begin - core_v_xif_resp_o.issue_ready = 1'b0; - // If the registers for the stalling instruction are passed we can resolve the stall of the pre decoder - if (core_v_xif_req_i.register_id == csr_instr_id_q && core_v_xif_req_i.register_valid) begin - core_v_xif_req_decoder2.register_valid = 1'b1; - core_v_xif_req_decoder2.result_ready = 1'b1; - core_v_xif_req_decoder2.issue_valid = 1'b1; - core_v_xif_req_decoder2.register_rs[0] = core_v_xif_req_i.register_rs[0]; - core_v_xif_req_decoder2.register_rs[1] = core_v_xif_req_i.register_rs[1]; - // Mask decoded instruction - core_v_xif_req_decoder2.issue_req_instr = instruction_o.instr; - // Stop stalling the pre decoder - csr_stall_d = 1'b0; + csr_sync_state_d = csr_sync_state_q; + csr_sync_valid = 1'b0; + + unique case(csr_sync_state_q) + WAIT_EMPTY_BUF: begin + // Stall the issue intf here and wait for empty xif buffer + if (!instruction_valid_o) begin + // Sync and unstall the issue intf + csr_sync_valid = 1'b1; + csr_sync_state_d = NORMAL_OP; + end end - end - - if (commit_kill) begin - csr_stall_d = 1'b0; - end - - // register - core_v_xif_resp_o.register_ready = core_v_xif_req_i.register_valid; - - // result - core_v_xif_resp_o.result_rd = instruction_o.instr[11:7]; - core_v_xif_resp_o.result_we = instruction_o.is_writeback; + default: begin + if (commit_kill) begin + // We are not accepting the instruction here + if (buf_has_spec_csr_insn) begin + // Stall the intf and wait until we can safely sync + csr_sync_state_d = WAIT_EMPTY_BUF; + end else begin + // Sync without stalling the issue intf + csr_sync_valid = 1'b1; + end + end else begin + // Accept the instruction and stall in the next cycle + if (predec_csr_spec_mod_reg) csr_sync_state_d = WAIT_EMPTY_BUF; + end + end + endcase end - always_ff @(posedge clk_i or negedge rst_ni) begin : csr_stall_fsm + always_ff @(posedge clk_i or negedge rst_ni) begin if(~rst_ni) begin - csr_stall_q <= 0; - csr_instr_id_q <= 0; + csr_sync_state_q = NORMAL_OP; end else begin - csr_stall_q <= csr_stall_d; - csr_instr_id_q <= csr_instr_id_d; + csr_sync_state_q = csr_sync_state_d; end end endmodule : ara_xif_handler