From 565706e9e1a8ba7304e1f1882bb19a27e107756a Mon Sep 17 00:00:00 2001 From: 250HandsomeLiang <2502481961@qq.com> Date: Sat, 21 May 2022 20:12:37 +0800 Subject: [PATCH 01/41] feat: updated AXI to cache interface --- src/vsrc/AXI/README.md | 64 +++++++++------ src/vsrc/AXI/axi_master.sv | 158 +++++++++++++++++++++---------------- 2 files changed, 129 insertions(+), 93 deletions(-) diff --git a/src/vsrc/AXI/README.md b/src/vsrc/AXI/README.md index 3705999..e115b10 100644 --- a/src/vsrc/AXI/README.md +++ b/src/vsrc/AXI/README.md @@ -21,30 +21,24 @@ 来自cpu和输出到cpu的信号,其中只有inst_cpu_data_o,data_cpu_data_o和inst_stallreq,data_stallreq是输出的。 //icache/IF input wire [`ADDR]inst_cpu_addr_i, - input wire inst_cpu_ce_i, input wire [`Data]inst_cpu_data_i, - input wire inst_cpu_we_i , - input wire [3:0]inst_cpu_sel_i, - input wire inst_stall_i, input wire inst_flush_i, output reg [`Data]inst_cpu_data_o, - output wire inst_stallreq, input wire [3:0]inst_id,//决定是读数据还是取指令,默认4’b0000 //icache 读请求的类型,3’b100表示一次性读取1个cache行(一个cache行默认4*32bit的数据,inst[addr],inst[addr+4],inst[addr+8],inst[addr+12]);其他值表示一次读取1*32bit数据 input wire [2:0]icache_rd_type_i, - + input wire icache_rd_req_i,//读请求使能信号,高位有效 + output reg icache_rd_rdy_o,//读请求可被接受 + output reg icache_ret_valid_o,//读数据有效 + output reg [1:0]icache_ret_last_o,//最后一个读数据 //dcache/MEM input wire [`ADDR]data_cpu_addr_i, - input wire data_cpu_ce_i, input wire [`Data]data_cpu_data_i, - input wire data_cpu_we_i , input wire [3:0]data_cpu_sel_i, - input wire data_stall_i, input wire data_flush_i, output reg [`Data]data_cpu_data_o, - output wire data_stallreq, input wire [3:0]data_id,//决定是读数据还是取指令,默认4'b0001 // 同icache_rd_type_i input wire [2:0]dcache_rd_type_i, @@ -52,6 +46,15 @@ input wire [2:0]dcache_wr_type_i,//decache write type //4*32bit的写入数据,如果只想写一个数据,只需要保证31:0是正确的写入数据即可 input wire [`BurstData]dcache_wr_data,//data from dcache + input wire [2:0]dcache_rd_type_i, + input wire dcache_rd_req_i, + output reg dcache_rd_rdy_o, + output reg dcache_ret_valid_o, + output reg [1:0]dcache_ret_last_o, + input wire dcache_wr_req_i,//写使能,高为有效 + output reg dcache_wr_rdy,//写请求可被接受 + + AXI标准信号接口,输出到从机或从从机输入,无需关心内部逻辑,照着接线就好,s是前缀。 //Slave @@ -103,16 +106,16 @@ output reg s_bready ``` +# 本版本为面向cache的版本,面向CPU的版本,请见5.9号提交的版本。AXI对cache有特定要求务必要仔细阅读说明。 + ## 使用说明 1. 把axi_Master主机接口放到cpuTop中实例化 - * 实现仲裁的axi接口(不支持同种请求的连续发送和突发传输,支持同时送取指和取数),取值id接口`4’b0000`,取数id接口`4'b0001` + * 实现仲裁的axi接口(不支持同种请求的连续发送,支持同时送取指和取数核突发传送),取值id接口`4’b0000`,取数id接口`4'b0001` * 支持写操作,读指令和读数据。若同时发出取指和取数,会并行执行(指同时发送两种请求,若先取指后取数或先取数后取指都无法并行) * 如果连续发送两次读请求,则会等待第一个读请求结束在处理第二个读请求 * 先写后读,写请求结束后才会处理读请求 * 所有的请求在请求结束前,都需要保证来自cpu的输入信号不变 * dcache/icache_rd/wr_type_i表示一次性读或写的数据量。`3'b100`表示一次性读/写连续四个地址的数据;其他值表示只读/写一个数据,推荐直接写0 - * 即使没有cache。icache和dcache开头的信号都要接 - * 下面的说明,不适用突发传输,icache/dcache_rd/wr_type_i照着抄就好,不需要改。需要注意的是dcache_wr_data需要是128bit的数据,如果只想写一个的话,需要再前面添加96个0,例如写data[31:0],则dcache_wr_data({{96{1'b0}}},data[31:0]) ``` wire aresetn=~rst; @@ -128,27 +131,36 @@ .aresetn(aresetn), //low is valid //icache/IF .inst_cpu_addr_i(inst_pc), - .inst_cpu_ce_i(inst_chip_enable), - .inst_cpu_we_i(0) , .inst_cpu_sel_i(4'b1111), .inst_flush_i(0), .inst_cpu_data_o(inst_data_from_axi), - .inst_stallreq(stallreq_from_if), .inst_id(4'b0000),//决定是读数据还是取指令 - .icache_rd_type_i(0),//3'b100开启连续读4个数据;0只读一个数据 - + .icache_rd_type_i(3'b100),//3'b100开启连续读4个数据;0只读一个数据 + .icache_rd_req_i(),//接读使能 + .icache_rd_rdy_o(),//接读请求握手信号 + .icache_ret_valid_o(),//接读有效信号 + .icache_ret_last_o(),//接最后一个读数据信号 + + //dacache/MEM .data_cpu_addr_i(data_pc), - .data_cpu_ce_i(data_chip_enable), - .data_cpu_we_i(data_we) , + .data_cpu_data_i(data), .data_cpu_sel_i(4'b1111), .data_flush_i(0), .data_cpu_data_o(mem_data_from_axi), - .data_stallreq(stallreq_from_mem), .data_id(4'b0001),//决定是读数据还是取指令 - .dcache_rd_type_i(0),//同icache - .dcache_wr_type_i(0),//写的数据量,3'b100表示连续写四个数据至相邻的地址;0表示只写一个数据 - .dcache_wr_data({{96{1'b0}},data[31:0]}),//128bit的写入数据,如果只想写一个那么只需要保证31:0正确 + .dcache_rd_type_i(),//同icache + .dcache_wr_type_i(),//写的数据量,3'b100表示连续写四个数据至相邻的地址;0表示只写一个数据 + .dcache_wr_data(),//128bit的写入数据,如果只想写一个那么只需要保证31:0正确 + .dcache_rd_req_i(), + .dcache_rd_rdy_o(), + .dcache_ret_valid_o(), + .dcache_ret_last_o(), + .dcache_wr_data(),//data from dcache + .dcache_wr_req_i(),//write enable signal + .dcache_wr_rdy(),//write can receive + + //ar .s_arid(i_arid), //arbitration .s_araddr(i_araddr), @@ -203,4 +215,6 @@ 3. xxx_cpu_sel_i为字节选通使能,用来实现store类型。 -4. stallreq_if和stallreq_mem为暂停请求,因为AXI直接面向CPU,所以,在AXI进行读写数据时,CPU必须暂停,等到AXI完成读写数据的操作。 +4. 关于cache给AXI的信号。在AXI完成写或读请求前,cache的信号必须要持续的拉高。对于读指令,当ret_last拉高时,才能更新输出给AXI的信号(req,type,addr);对于写,和wr_rdy正常握手就好,输出给AXI的信号只需要保存一个时钟周期。 + +5. cache何时给信号。cache只要发出读或写请求就立刻给出所有信号(req,type,addr,data)。`重点`addr,data,type,req是同时给到AXI,而不是等到cache与rdy握手后才给addr和data,这样AXI就无法接受数据。握手是指握手后接收方立刻把数据存到寄存器里。 \ No newline at end of file diff --git a/src/vsrc/AXI/axi_master.sv b/src/vsrc/AXI/axi_master.sv index a8711c1..8519231 100644 --- a/src/vsrc/AXI/axi_master.sv +++ b/src/vsrc/AXI/axi_master.sv @@ -1,28 +1,32 @@ `include "AXI/axi_defines.sv" module axi_master ( input wire aclk, - input wire aresetn, //low is valid + input wire aresetn, //low is valid + //inst input wire [`ADDR] inst_cpu_addr_i, - input wire inst_cpu_ce_i, - input wire inst_cpu_we_i, - input wire [3:0] inst_cpu_sel_i, output reg [`Data] inst_cpu_data_o, - output wire inst_stallreq, input wire [3:0] inst_id, //决定是读数据还是取指令 input wire [2:0] icache_rd_type_i, //icahce read type + input wire icache_rd_req_i, //read enable signal + output reg icache_rd_rdy_o, //read can receive + output reg icache_ret_valid_o, //read data is valid + output reg [1:0] icache_ret_last_o, // read is over //data input wire [`ADDR] data_cpu_addr_i, - input wire data_cpu_ce_i, - input wire data_cpu_we_i, input wire [3:0] data_cpu_sel_i, output reg [`Data] data_cpu_data_o, - output wire data_stallreq, input wire [3:0] data_id, //决定是读数据还是取指令 input wire [2:0] dcache_rd_type_i, // dacache read type + input wire dcache_rd_req_i, + output reg dcache_rd_rdy_o, + output reg dcache_ret_valid_o, + output reg [1:0] dcache_ret_last_o, input wire [2:0] dcache_wr_type_i, //decache write type input wire [`BurstData] dcache_wr_data, //data from dcache + input wire dcache_wr_req_i, //write enable signal + output reg dcache_wr_rdy, //write can receive //Slave @@ -74,23 +78,16 @@ module axi_master ( ); reg write_wait_enable; - //read instruction stall - reg inst_stall_req_r; - assign inst_stallreq = inst_stall_req_r; reg [31:0] inst_buffer; //read and write data stall - reg stall_req_w; - reg data_stall_req_r; reg [31:0] data_buffer; - assign data_stallreq = data_stall_req_r || stall_req_w; reg [3:0] inst_r_state; reg [3:0] data_r_state; //fetch instruction before fetch data reg is_fetching_inst; - reg is_fetch_inst_OK; //read instruction signal to slave reg [`ID] inst_s_arid; //arbitration @@ -112,48 +109,62 @@ module axi_master ( //改变输出 always @(*) begin if (!aresetn) begin - inst_stall_req_r = 0; - inst_cpu_data_o = 0; + inst_cpu_data_o = 0; is_fetching_inst = 0; + icache_rd_rdy_o = 1; + icache_ret_valid_o = 0; + icache_ret_last_o = 0; end else begin case (inst_r_state) `R_FREE: begin - if (inst_cpu_ce_i && inst_cpu_we_i == 0) begin - inst_stall_req_r = 1; - inst_cpu_data_o = 0; - //is_fetching_inst=1; + if (icache_rd_req_i) begin + inst_cpu_data_o = 0; is_fetching_inst = 0; + + icache_rd_rdy_o = 1; + icache_ret_valid_o = 0; + icache_ret_last_o = 0; end else begin - inst_stall_req_r = 0; - inst_cpu_data_o = 0; + inst_cpu_data_o = 0; is_fetching_inst = 0; + + icache_rd_rdy_o = 1; + icache_ret_valid_o = 0; + icache_ret_last_o = 0; end end `R_ADDR: begin - inst_stall_req_r = 1; - inst_cpu_data_o = 0; + inst_cpu_data_o = 0; is_fetching_inst = 1; + + icache_rd_rdy_o = 0; + icache_ret_valid_o = 0; + icache_ret_last_o = 0; end `R_DATA: begin - //use id to judge the s_rdata type if (s_rvalid && s_rlast && s_rid[0] == 0) begin - inst_stall_req_r = 0; - inst_cpu_data_o = s_rdata; + inst_cpu_data_o = s_rdata; is_fetching_inst = 0; + + icache_rd_rdy_o = 1; + icache_ret_valid_o = 1; + icache_ret_last_o = 1; end else if (s_rvalid && s_rready && s_rid[0] == 0) begin - inst_stall_req_r = 1; - inst_cpu_data_o = s_rdata; + inst_cpu_data_o = s_rdata; is_fetching_inst = 1; + + icache_rd_rdy_o = 0; + icache_ret_valid_o = 1; + icache_ret_last_o = 0; end else begin - inst_stall_req_r = 1; - inst_cpu_data_o = 0; + inst_cpu_data_o = 0; is_fetching_inst = 1; + icache_rd_rdy_o = 0; + icache_ret_valid_o = 0; + icache_ret_last_o = 0; end end default: begin - inst_stall_req_r = 0; - inst_cpu_data_o = 0; - is_fetching_inst = 0; end endcase end @@ -177,7 +188,7 @@ module axi_master ( `R_FREE: begin if (write_wait_enable == 0) begin - if((inst_cpu_ce_i&&(inst_cpu_we_i==0))&&(!(data_cpu_ce_i&&(data_cpu_we_i==0))))//fetch inst but don't fetch data + if((icache_rd_req_i)&&(!(dcache_rd_req_i)))//fetch inst but don't fetch data begin inst_r_state <= `R_ADDR; inst_s_arid <= inst_id; @@ -190,7 +201,7 @@ module axi_master ( inst_s_arvalid <= 1; end - else if((inst_cpu_ce_i&&(inst_cpu_we_i==0))&&(data_cpu_ce_i&&(data_cpu_we_i==0)))//fetch inst and fetch data + else if((icache_rd_req_i)&&(dcache_rd_req_i))//fetch inst and fetch data begin //wait for fetch data request run into R_DATA state if (data_r_state == `R_DATA) begin @@ -271,7 +282,6 @@ module axi_master ( /** R **/ `R_DATA: begin if (s_rvalid && s_rlast && s_rid[0] == 0) begin - inst_r_state <= `R_FREE; inst_buffer <= s_rdata; inst_s_rready <= 0; @@ -290,7 +300,6 @@ module axi_master ( inst_s_arsize <= inst_s_arsize; inst_s_arlen <= 0; end - end default: begin @@ -319,33 +328,47 @@ module axi_master ( //改变输出 always @(*) begin if (!aresetn) begin - data_stall_req_r = 0; - data_cpu_data_o = 0; + data_cpu_data_o = 0; + dcache_rd_rdy_o = 1; + dcache_ret_valid_o = 0; + dcache_ret_last_o = 0; end else begin case (data_r_state) `R_FREE: begin - if (data_cpu_ce_i && data_cpu_we_i == 0) begin - data_stall_req_r = 1; - data_cpu_data_o = 0; + if (dcache_rd_req_i) begin + data_cpu_data_o = 0; + dcache_rd_rdy_o = 1; + dcache_ret_valid_o = 0; + dcache_ret_last_o = 0; end else begin - data_stall_req_r = 0; - data_cpu_data_o = 0; + data_cpu_data_o = 0; + dcache_rd_rdy_o = 1; + dcache_ret_valid_o = 0; + dcache_ret_last_o = 0; end end `R_ADDR: begin - data_stall_req_r = 1; - data_cpu_data_o = 0; + data_cpu_data_o = 0; + dcache_rd_rdy_o = 0; + dcache_ret_valid_o = 0; + dcache_ret_last_o = 0; end `R_DATA: begin if (s_rvalid && s_rlast && s_rid[0] == 1) begin - data_stall_req_r = 0; - data_cpu_data_o = s_rdata; + data_cpu_data_o = s_rdata; + dcache_rd_rdy_o = 1; + dcache_ret_valid_o = 1; + dcache_ret_last_o = 1; end else if (s_rvalid && s_rready && s_rid[0] == 1) begin - data_stall_req_r = 1; - data_cpu_data_o = s_rdata; + data_cpu_data_o = s_rdata; + dcache_rd_rdy_o = 0; + dcache_ret_valid_o = 1; + dcache_ret_last_o = 0; end else begin - data_stall_req_r = 1; - data_cpu_data_o = 0; + data_cpu_data_o = 0; + dcache_rd_rdy_o = 0; + dcache_ret_valid_o = 0; + dcache_ret_last_o = 0; end end default: begin @@ -372,7 +395,7 @@ module axi_master ( `R_FREE: begin - if(data_cpu_ce_i&&(data_cpu_we_i==0)&&(is_fetching_inst==0)&&(write_wait_enable==0)) + if(dcache_rd_req_i&&(is_fetching_inst==0)&&(write_wait_enable==0)) begin data_r_state <= `R_ADDR; data_s_arid <= data_id; @@ -446,7 +469,6 @@ module axi_master ( data_s_arsize <= 0; data_s_arlen <= 0; end - end default: begin @@ -490,33 +512,35 @@ module axi_master ( //改变输出 always @(*) begin if (!aresetn) begin - stall_req_w = 0; write_wait_enable = 0; + dcache_wr_rdy = 1; end else begin case (w_state) `W_FREE: begin - if (data_cpu_ce_i && (data_cpu_we_i)) begin - stall_req_w = 1; + if (dcache_wr_req_i) begin write_wait_enable = 1; + dcache_wr_rdy = 1; end else begin - stall_req_w = 0; write_wait_enable = 0; + dcache_wr_rdy = 1; end end `W_ADDR, `W_DATA: begin - stall_req_w = 1; write_wait_enable = 1; + dcache_wr_rdy = 0; end `W_RESP: begin if (s_bvalid && s_bready) begin - stall_req_w = 0; write_wait_enable = 0; + dcache_wr_rdy = 1; end else begin - stall_req_w = 1; write_wait_enable = 1; + dcache_wr_rdy = 0; end end default: begin + write_wait_enable = 0; + dcache_wr_rdy = 0; end endcase end @@ -540,7 +564,7 @@ module axi_master ( `W_FREE: begin - if (data_cpu_ce_i && (data_cpu_we_i)) begin + if (dcache_wr_req_i) begin w_state <= `W_ADDR; s_awaddr <= data_cpu_addr_i; s_awsize <= 3'b010; @@ -593,7 +617,6 @@ module axi_master ( s_wlast <= s_wlast; end end - /** W **/ `W_DATA: begin @@ -618,7 +641,7 @@ module axi_master ( w_state <= w_state; s_wdata <= s_wdata; s_wvalid <= s_wvalid; - s_wlast = s_wlast; + s_wlast <= s_wlast; end end @@ -652,8 +675,6 @@ module axi_master ( assign s_awprot = 0; assign s_wid = 0; assign s_wstrb = data_cpu_sel_i; - // assign s_wlast=1; - //set axi signal assign s_arid = inst_s_arid | data_s_arid; @@ -661,4 +682,5 @@ module axi_master ( assign s_arsize = inst_s_arsize | data_s_arsize; assign s_arvalid = inst_s_arvalid | data_s_arvalid; assign s_rready = inst_s_rready | data_s_rready; + endmodule From b9c5f01186de4c01e902a79dae491598dc4a72d7 Mon Sep 17 00:00:00 2001 From: Easton Man Date: Sat, 21 May 2022 20:24:04 +0800 Subject: [PATCH 02/41] fix: rm legacy wires in cpu_top --- src/vsrc/cpu_top.sv | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/vsrc/cpu_top.sv b/src/vsrc/cpu_top.sv index 64ff98f..75ce590 100644 --- a/src/vsrc/cpu_top.sv +++ b/src/vsrc/cpu_top.sv @@ -118,19 +118,13 @@ module cpu_top ( // <-> ICache .inst_cpu_addr_i(axi_addr), - .inst_cpu_ce_i(axi_addr != 0), // FIXME: ce should not be used as valid? - .inst_cpu_sel_i(4'b1111), .inst_cpu_data_o(axi_data), - .inst_stallreq(axi_busy), - .inst_id(4'b0000), // Read Instruction only, TODO: move this from AXI to cache + .inst_id(4'b0000), // Read Instruction only // <-> MEM Stage .data_cpu_addr_i(data_axi_addr), - .data_cpu_ce_i(data_axi_addr != 0), // FIXME: ce should not be used as valid? - .data_cpu_we_i(data_axi_we), // FIXME: Write enable .data_cpu_sel_i(data_axi_sel), .data_cpu_data_o(axi_mem_data), - .data_stallreq(data_axi_busy), .data_id(4'b0001), .dcache_rd_type_i(3'b000), // For [31:0] .dcache_wr_type_i(3'b000), From 2d0121a931900d6e5cc497cc27610ae4f0e311fb Mon Sep 17 00:00:00 2001 From: Easton Man Date: Sat, 21 May 2022 20:33:56 +0800 Subject: [PATCH 03/41] feat: change AXI to 128b --- src/vsrc/AXI/axi_defines.sv | 2 +- src/vsrc/cpu_top.sv | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/vsrc/AXI/axi_defines.sv b/src/vsrc/AXI/axi_defines.sv index 1642a42..fd4de6b 100644 --- a/src/vsrc/AXI/axi_defines.sv +++ b/src/vsrc/AXI/axi_defines.sv @@ -7,7 +7,7 @@ `define Lock 1:0 `define Cache 3:0 `define Prot 2:0 -`define Data 31:0 +`define Data 127:0 `define Resp 1:0 `define BurstData 127:0 diff --git a/src/vsrc/cpu_top.sv b/src/vsrc/cpu_top.sv index 75ce590..17afe4f 100644 --- a/src/vsrc/cpu_top.sv +++ b/src/vsrc/cpu_top.sv @@ -39,7 +39,7 @@ module cpu_top ( input arready, // read back input [ 3:0] rid, - input [31:0] rdata, + input [127:0] rdata, input [ 1:0] rresp, input rlast, input rvalid, @@ -57,7 +57,7 @@ module cpu_top ( input awready, // write data output [ 3:0] wid, - output [31:0] wdata, + output [127:0] wdata, output [ 3:0] wstrb, output wlast, output wvalid, From 78500adfbcca79cc73e82ea36f736958b7d670e0 Mon Sep 17 00:00:00 2001 From: Easton Man Date: Sat, 21 May 2022 22:13:55 +0800 Subject: [PATCH 04/41] feat: implemented icache --- src/vsrc/cpu_top.sv | 77 ++++++++++-------- src/vsrc/dummy_icache.sv | 165 --------------------------------------- src/vsrc/icache.sv | 143 +++++++++++++++++++++++++++++++-- 3 files changed, 181 insertions(+), 204 deletions(-) delete mode 100644 src/vsrc/dummy_icache.sv diff --git a/src/vsrc/cpu_top.sv b/src/vsrc/cpu_top.sv index 17afe4f..7f96ea6 100644 --- a/src/vsrc/cpu_top.sv +++ b/src/vsrc/cpu_top.sv @@ -8,7 +8,7 @@ `include "AXI/axi_master.sv" `include "frontend/frontend.sv" `include "instr_buffer.sv" -`include "dummy_icache.sv" +`include "icache.sv" `include "ctrl.sv" `include "pipeline_defines.sv" `include "pipeline/1_decode/id.sv" @@ -92,9 +92,10 @@ module cpu_top ( assign rst = ~rst_n; // ICache <-> AXI Controller - logic axi_busy; - logic [`RegBus] axi_data; - logic [`RegBus] axi_addr; + logic icache_axi_rreq; + logic axi_icache_rdy, axi_icache_rvalid; + logic [127:0] axi_icache_data; // 128b + logic [`RegBus] icache_axi_addr; // MEM <-> AXI Controller // TODO: replace with DCache @@ -117,9 +118,14 @@ module cpu_top ( .aresetn(aresetn), // <-> ICache - .inst_cpu_addr_i(axi_addr), - .inst_cpu_data_o(axi_data), + .inst_cpu_addr_i(icache_axi_addr), + .inst_cpu_data_o(axi_icache_data), .inst_id(4'b0000), // Read Instruction only + .icache_rd_type_i(3'b000), // Read 128b for 1 time + .icache_rd_req_i(icache_axi_rreq), + .icache_rd_rdy_o(axi_icache_rdy), + .icache_ret_valid_o(axi_icache_rvalid), + .icache_ret_last_o(), // <-> MEM Stage .data_cpu_addr_i(data_axi_addr), @@ -178,34 +184,39 @@ module cpu_top ( // ICache -> Frontend logic icache_frontend_stallreq; logic icache_frontend_valid[FETCH_WIDTH]; - logic [`InstAddrBus] icache_frontend_addr[FETCH_WIDTH]; + logic [`InstAddrBus] icache_frontend_addr[FETCH_WIDTH] = '{1,1}; logic [`RegBus] icache_frontend_data[FETCH_WIDTH]; - - dummy_icache #( - .ADDR_WIDTH(`RegWidth), - .DATA_WIDTH(`RegWidth) - ) u_dummy_icache ( - .clk(clk), - .rst(rst), - - // <-> Frontend - .flush(backend_flush), - .raddr_1_i (frontend_icache_addr[0]), - .raddr_2_i (frontend_icache_addr[1]), - .stallreq_o(icache_frontend_stallreq), - .rvalid_1_o(icache_frontend_valid[0]), - .rvalid_2_o(icache_frontend_valid[1]), - .raddr_1_o (icache_frontend_addr[0]), - .raddr_2_o (icache_frontend_addr[1]), - .rdata_1_o (icache_frontend_data[0]), - .rdata_2_o (icache_frontend_data[1]), - - // <-> AXI Controller - .axi_addr_o(axi_addr), - .axi_data_i(axi_data), - .axi_busy_i(axi_busy) - ); + logic [31:0] tmp_pc, tmp_pc_plus4; + assign tmp_pc_plus4 = tmp_pc + 4; + always_ff @( posedge clk or negedge rst_n) begin + if (!rst_n) begin + tmp_pc <= 32'h1c000000; + end else if (icache_frontend_valid[0]) begin + tmp_pc <= tmp_pc+8; + end + end + + + icache u_icache( + .clk (clk ), + .rst (rst ), + .rreq_1_i (1'b1), + .raddr_1_i (tmp_pc), + .rvalid_1_o (icache_frontend_valid[0]), + .rdata_1_o (icache_frontend_data[0]), + .rreq_2_i (1'b1), + .raddr_2_i (tmp_pc_plus4), + .rvalid_2_o (icache_frontend_valid[1]), + .rdata_2_o (icache_frontend_data[1]), + .axi_addr_o (icache_axi_addr), + .axi_rreq_o (icache_axi_rreq), + .axi_rdy_i (axi_icache_rdy), + .axi_rvalid_i (axi_icache_rvalid), + .axi_rlast_i (), + .axi_data_i (axi_icache_data) + ); + // Frontend <-> Instruction Buffer logic ib_frontend_stallreq; @@ -223,7 +234,7 @@ module cpu_top ( // <-> ICache .icache_read_addr_o(frontend_icache_addr), // -> ICache .icache_stallreq_i(icache_frontend_stallreq), // <- ICache, I$ cannot accept more addr requests - .icache_read_valid_i(icache_frontend_valid), // <- ICache + .icache_read_valid_i(), // <- ICache .icache_read_addr_i(icache_frontend_addr), // <- ICache .icache_read_data_i(icache_frontend_data), // <- ICache diff --git a/src/vsrc/dummy_icache.sv b/src/vsrc/dummy_icache.sv deleted file mode 100644 index 407cbab..0000000 --- a/src/vsrc/dummy_icache.sv +++ /dev/null @@ -1,165 +0,0 @@ -`include "defines.sv" - -/* dummy_icache -* hold output until AXI returns value -*/ -module dummy_icache #( - parameter ADDR_WIDTH = 32, - parameter DATA_WIDTH = 32 -) ( - input logic clk, - input logic rst, - - // <-> IF - // All signals are 1 cycle valid - input flush, - // all 0 means invalid - input logic [ADDR_WIDTH-1:0] raddr_1_i, - input logic [ADDR_WIDTH-1:0] raddr_2_i, - // Require IF stage not to send more instr addr - // stallreq is pull up the next clk when queue is full - // and pull down then next clk when queue can accept addr - output logic stallreq_o, - // rvalid is 1 when output is valid - output logic rvalid_1_o, - output logic rvalid_2_o, - // Must return the addr as well - output logic [ADDR_WIDTH-1:0] raddr_1_o, - output logic [ADDR_WIDTH-1:0] raddr_2_o, - output logic [DATA_WIDTH-1:0] rdata_1_o, - output logic [DATA_WIDTH-1:0] rdata_2_o, - - // <-> AXI Controller - output logic [ADDR_WIDTH-1:0] axi_addr_o, - - // Assume busy is pull down the same cycle when data is ready - input logic [DATA_WIDTH-1:0] axi_data_i, - input logic axi_busy_i -); - - // Reset signal - logic rst_n; - assign rst_n = ~rst; - - logic [ADDR_WIDTH-1:0] raddrs[2]; // Accept two addr - - // States - enum int unsigned { - ACCEPT_ADDR = 0, - IN_TRANSACTION_1 = 1, - IN_TRANSACTION_2 = 2 - } - state, next_state; - - always_ff @(posedge clk or negedge rst_n) begin : state_ff - if (!rst_n || flush) begin - state <= ACCEPT_ADDR; - end else begin - state <= next_state; - end - end - - always_comb begin : transition_comb - case (state) - ACCEPT_ADDR: begin - if ((raddr_1_i != 0 || raddr_2_i != 0) & ~axi_busy_i) begin - next_state = IN_TRANSACTION_1; - end else begin - next_state = ACCEPT_ADDR; - end - end - IN_TRANSACTION_1: begin - if (axi_busy_i == 0) begin - next_state = IN_TRANSACTION_2; - end else begin - next_state = IN_TRANSACTION_1; - end - end - IN_TRANSACTION_2: begin - if (axi_busy_i == 0) begin - next_state = ACCEPT_ADDR; - end else begin - next_state = IN_TRANSACTION_2; - end - end - default: begin - next_state = ACCEPT_ADDR; - end - endcase - end - - always_ff @(posedge clk or negedge rst_n) begin : raddrs_ff - if (!rst_n) begin - raddrs[0] <= 0; - raddrs[1] <= 0; - end else begin - case (state) - ACCEPT_ADDR: begin - raddrs[0] <= raddr_1_i; - raddrs[1] <= raddr_2_i; - end - IN_TRANSACTION_1, IN_TRANSACTION_2: begin - // Do nothing - end - endcase - end - end - - assign stallreq_o = ~(state == ACCEPT_ADDR) | axi_busy_i; - - always_ff @(posedge clk or negedge rst_n) begin : axi_ff - if (!rst_n) begin - axi_addr_o <= 0; - end else begin - case (state) - ACCEPT_ADDR: begin - if (raddr_1_i != 0 && axi_busy_i == 0) axi_addr_o <= raddr_1_i; - end - IN_TRANSACTION_1: begin - if (raddrs[1] != 0 && axi_busy_i == 0) axi_addr_o <= raddrs[1]; - end - IN_TRANSACTION_2: begin - if (next_state == ACCEPT_ADDR) begin - axi_addr_o <= 0; - end - end - default: begin - axi_addr_o <= 0; - end - endcase - end - end - - // Output logic - always_ff @(posedge clk or negedge rst_n) begin : output_ff - if (!rst_n || flush) begin - rvalid_1_o <= 0; - rvalid_2_o <= 0; - raddr_1_o <= 0; - raddr_2_o <= 0; - rdata_1_o <= 0; - rdata_2_o <= 0; - end else begin - rvalid_1_o <= 0; - rvalid_2_o <= 0; - raddr_1_o <= 0; - raddr_2_o <= 0; - rdata_1_o <= 0; - rdata_2_o <= 0; - case (state) - ACCEPT_ADDR: begin - end - IN_TRANSACTION_1: begin - rvalid_1_o <= ~axi_busy_i; - raddr_1_o <= axi_busy_i ? 0 : raddrs[0]; - rdata_1_o <= axi_busy_i ? 0 : axi_data_i; - end - IN_TRANSACTION_2: begin - rvalid_2_o <= ~axi_busy_i; - raddr_2_o <= axi_busy_i ? 0 : raddrs[1]; - rdata_2_o <= axi_busy_i ? 0 : axi_data_i; - end - endcase - end - end -endmodule diff --git a/src/vsrc/icache.sv b/src/vsrc/icache.sv index f72e406..3a22c82 100644 --- a/src/vsrc/icache.sv +++ b/src/vsrc/icache.sv @@ -26,10 +26,20 @@ module icache #( // <-> AXI Controller output logic [ADDR_WIDTH-1:0] axi_addr_o, output logic axi_rreq_o, - input logic axi_busy_i, // High effective - input logic [DATA_WIDTH-1:0] axi_data_i + input logic axi_rdy_i, + input logic axi_rvalid_i, + input logic [1:0] axi_rlast_i, + input logic [CACHELINE_WIDTH-1:0] axi_data_i ); + // Reset signal + logic rst_n; + assign rst_n = ~rst; + + + ///////////////////////////////////////////////// + // PO, query BRAM + //////////////////////////////////////////////// logic [NWAY-1:0][1:0][CACHELINE_WIDTH-1:0] data_bram_rdata; logic [NWAY-1:0][1:0][CACHELINE_WIDTH-1:0] data_bram_wdata; @@ -103,18 +113,35 @@ module icache #( end end + + + //////////////////////////////////////////////////// + // P1, output gen + /////////////////////////////////////////////////// + + // Input reg + logic rreq_1_delay1, rreq_2_delay1; + logic [ADDR_WIDTH-1:0] raddr_1_delay1, raddr_2_delay1; + always_ff @(posedge clk) begin + rreq_1_delay1 <= rreq_1_i; + rreq_2_delay1 <= rreq_2_i; + raddr_1_delay1 <= raddr_1_i; + raddr_2_delay1 <= raddr_2_i; + end + + logic [NWAY-1:0][1:0] tag_hit; always_comb begin for (integer i = 0; i < NWAY; i++) begin - tag_hit[i][0] = tag_bram_rdata[i][0][19:0] == raddr_1_i[ADDR_WIDTH-1:ADDR_WIDTH-20]; - tag_hit[i][1] = tag_bram_rdata[i][1][19:0] == raddr_2_i[ADDR_WIDTH-1:ADDR_WIDTH-20]; + tag_hit[i][0] = tag_bram_rdata[i][0][19:0] == raddr_1_delay1[ADDR_WIDTH-1:ADDR_WIDTH-20]; + tag_hit[i][1] = tag_bram_rdata[i][1][19:0] == raddr_2_delay1[ADDR_WIDTH-1:ADDR_WIDTH-20]; end end // Generate read output logic [1:0] offset_1, offset_2; - assign offset_1 = raddr_1_i[3:2]; - assign offset_2 = raddr_2_i[3:2]; + assign offset_1 = raddr_1_delay1[3:2]; + assign offset_2 = raddr_2_delay1[3:2]; logic [NWAY-1:0][1:0][DATA_WIDTH-1:0] data_inside_cacheline; always_comb begin for (integer i = 0; i < NWAY; i++) begin @@ -149,4 +176,108 @@ module icache #( end end + + // Refill state machine + enum int { + IDLE, + REFILL_1_REQ, + REFILL_1_WAIT, + REFILL_2_REQ, + REFILL_2_WAIT + } + state, next_state; + always_ff @(posedge clk or negedge rst_n) begin + if (!rst_n) begin + state <= IDLE; + end else begin + state <= next_state; + end + end + + + logic miss_1, miss_2; + assign miss_1 = rreq_1_delay1 & ~rvalid_1_o; + assign miss_2 = rreq_2_delay1 & ~rvalid_2_o; + + always_comb begin : transition_comb + case (state) + IDLE: begin + if (miss_1) next_state = REFILL_1_REQ; + else if (miss_2) next_state = REFILL_2_REQ; + else next_state = IDLE; + end + REFILL_1_REQ: begin + if (axi_rdy_i) next_state = REFILL_1_WAIT; + else next_state = REFILL_1_REQ; + end + REFILL_2_REQ: begin + if (axi_rdy_i) next_state = REFILL_2_WAIT; + else next_state = REFILL_2_REQ; + end + REFILL_1_WAIT: begin + if (rvalid_1_o) begin + if (miss_2) next_state = REFILL_2_REQ; + else next_state = IDLE; + end else next_state = REFILL_1_WAIT; + end + REFILL_2_WAIT: begin + if (rvalid_2_o) begin + if (miss_1) next_state = REFILL_1_REQ; + else next_state = IDLE; + end else next_state = REFILL_2_WAIT; + end + default: begin + next_state = IDLE; + end + endcase + end + + // State machine output + always_comb begin + // Default value + axi_rreq_o = 0; + axi_addr_o = 0; + case (state) + REFILL_1_REQ, REFILL_1_WAIT: begin + axi_rreq_o = 1; + axi_addr_o = raddr_1_i; + end + REFILL_2_REQ, REFILL_2_WAIT: begin + axi_rreq_o = 1; + axi_addr_o = raddr_2_i; + end + default: begin + end + endcase + end + + // Refill write BRAM + logic random_r; + always_ff @(posedge clk) begin + random_r <= ~random_r; + end + always_comb begin + for (integer i = 0; i < NWAY; i++) begin + tag_bram_we[i] = 0; + tag_bram_wdata[i] = 0; + data_bram_we[i] = 0; + data_bram_wdata[i] = 0; + if (i[0] == random_r) begin + // write this way + if (state == REFILL_1_WAIT && axi_rvalid_i) begin + tag_bram_we[i][0] = 1; + tag_bram_wdata[i][0] = {1'b1, raddr_1_i[31:12]}; + data_bram_we[i][0] = 1; + data_bram_wdata[i][0] = axi_data_i; + end + if (state == REFILL_2_WAIT && axi_rvalid_i) begin + tag_bram_we[i][1] = 1; + tag_bram_wdata[i][1] = {1'b1, raddr_1_i[31:12]}; + data_bram_we[i][1] = 1; + data_bram_wdata[i][1] = axi_data_i; + end + end + end + end + endmodule From 23b7c9bd9711ad527d523e025653b25b36a53c2a Mon Sep 17 00:00:00 2001 From: Easton Man Date: Sat, 21 May 2022 22:55:58 +0800 Subject: [PATCH 05/41] feat: add IP core --- src/vsrc/icache.sv | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/vsrc/icache.sv b/src/vsrc/icache.sv index 3a22c82..4410f0b 100644 --- a/src/vsrc/icache.sv +++ b/src/vsrc/icache.sv @@ -56,6 +56,22 @@ module icache #( generate for (genvar i = 0; i < NWAY; i++) begin : tag_bram + +`ifdef BRAM_IP + bram_icache_tag_ram u_bram ( + .clka (clk), + .clkb (clk), + .wea (tag_bram_we[i][0]), + .web (tag_bram_we[i][1]), + .dina (tag_bram_wdata[i][0]), + .addra(tag_bram_addr[i][0]), + .douta(tag_bram_rdata[i][0]), + .dinb (tag_bram_wdata[i][1]), + .addrb(tag_bram_addr[i][1]), + .doutb(tag_bram_rdata[i][1]) + ); +`else + bram #( .DATA_WIDTH (TAG_BRAM_WIDTH), .DATA_DEPTH_EXP2(10) @@ -70,10 +86,25 @@ module icache #( .addrb(tag_bram_addr[i][1]), .doutb(tag_bram_rdata[i][1]) ); +`endif end endgenerate generate for (genvar i = 0; i < NWAY; i++) begin : data_bram +`ifdef BRAM_IP + bram_icache_data_ram u_bram ( + .clka (clk), + .clkb (clk), + .wea (data_bram_we[i][0]), + .web (data_bram_we[i][1]), + .dina (data_bram_wdata[i][0]), + .addra(data_bram_addr[i][0]), + .douta(data_bram_rdata[i][0]), + .dinb (data_bram_wdata[i][1]), + .addrb(data_bram_addr[i][1]), + .doutb(data_bram_rdata[i][1]) + ); +`else bram #( .DATA_WIDTH (128), .DATA_DEPTH_EXP2(10) @@ -88,6 +119,7 @@ module icache #( .addrb(data_bram_addr[i][1]), .doutb(data_bram_rdata[i][1]) ); +`endif end endgenerate From 083b9f4cf7d1f700ba586736c91aee1a7a91e75c Mon Sep 17 00:00:00 2001 From: Easton Man Date: Sun, 22 May 2022 11:34:01 +0800 Subject: [PATCH 06/41] feat: implememnt naive FTQ --- src/vsrc/frontend/frontend_defines.sv | 31 ++++++++++++ src/vsrc/frontend/ftq.sv | 70 +++++++++++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 src/vsrc/frontend/frontend_defines.sv create mode 100644 src/vsrc/frontend/ftq.sv diff --git a/src/vsrc/frontend/frontend_defines.sv b/src/vsrc/frontend/frontend_defines.sv new file mode 100644 index 0000000..e89ae47 --- /dev/null +++ b/src/vsrc/frontend/frontend_defines.sv @@ -0,0 +1,31 @@ +`ifndef FRONTEND_DEFINES_SV +`define FRONTEND_DEFINES_SV +`include "defines.sv" + +`define FETCH_WIDTH 4 + +typedef struct packed { + logic valid; + logic is_cross_cacheline; + logic [`InstAddrBus] start_pc; + logic [$clog2(`FETCH_WIDTH)-1:0] length; + + // TODO: add BPU meta +} bpu_ftq_t; + +typedef struct packed { + logic valid; + logic is_cross_cacheline; + logic [`InstAddrBus] start_pc; + logic [$clog2(`FETCH_WIDTH)-1:0] length; +} ftq_block_t; + +// FTQ <-> IFU +typedef struct packed { + logic valid; + logic is_cross_cacheline; + logic [`InstAddrBus] start_pc; + logic [$clog2(`FETCH_WIDTH)-1:0] length; +} ftq_ifu_t; + +`endif diff --git a/src/vsrc/frontend/ftq.sv b/src/vsrc/frontend/ftq.sv new file mode 100644 index 0000000..20ef5e7 --- /dev/null +++ b/src/vsrc/frontend/ftq.sv @@ -0,0 +1,70 @@ + +`include "frontend/frontend_defines.sv" + +module ftq #( + parameter FETCH_WIDTH = 4, + parameter QUEUE_SIZE = 4 +) ( + input logic clk, + input logic rst, + + // <-> BPU + input bpu_ftq_t bpu_i, + output logic bpu_queue_full_o, + + // <-> Backend + input logic backend_commit_i, + + // <-> IFU + output ftq_ifu_t ifu_o, + input logic ifu_accept_i +); + + // Reset signal + logic rst_n; + assign rst_n = ~rst; + + + ftq_block_t [QUEUE_SIZE-1:0] FTQ, next_FTQ; + always_ff @(posedge clk or negedge rst_n) begin + if (~rst_n) begin + FTQ <= 0; + end else begin + FTQ <= next_FTQ; + end + end + + // PTR + logic [$clog2(QUEUE_SIZE)-1:0] bpu_ptr, ifu_ptr, comm_ptr; + always_ff @(posedge clk or negedge rst_n) begin : ptr_ff + if (~rst_n) begin + bpu_ptr <= 0; + ifu_ptr <= 0; + comm_ptr <= 0; + end else begin + if (backend_commit_i) comm_ptr <= comm_ptr + 1; + if (ifu_accept_i) ifu_ptr <= ifu_ptr + 1; + if (bpu_i.valid) bpu_ptr <= bpu_ptr + 1; + end + end + + // next_FTQ + always_comb begin : next_FTQ_comb + // Default, no change + next_FTQ = FTQ; + if (backend_commit_i) next_FTQ[comm_ptr] = 0; + if (bpu_i.valid) next_FTQ[bpu_ptr] = bpu_i; + end + + // Output + // -> IFU + assign ifu_o.valid = FTQ[ifu_ptr].valid; + assign ifu_o.is_cross_cacheline = FTQ[ifu_ptr].is_cross_cacheline; + assign ifu_o.start_pc = FTQ[ifu_ptr].start_pc; + assign ifu_o.length = FTQ[ifu_ptr].length; + + // -> BPU + assign bpu_queue_full_o = (bpu_ptr == comm_ptr - 1); + + +endmodule From 24d9b027751ede26474b868d0587896853670a78 Mon Sep 17 00:00:00 2001 From: Easton Man Date: Sun, 22 May 2022 14:44:15 +0800 Subject: [PATCH 07/41] feat: implemented simple decoupled frontend design --- src/vsrc/cpu_top.sv | 22 ++--- src/vsrc/frontend/frontend.sv | 119 ++++++++++++-------------- src/vsrc/frontend/frontend_defines.sv | 6 +- src/vsrc/frontend/ftq.sv | 16 +++- src/vsrc/frontend/ifu.sv | 85 ++++++++++++++++++ src/vsrc/icache.sv | 31 ++----- 6 files changed, 174 insertions(+), 105 deletions(-) create mode 100644 src/vsrc/frontend/ifu.sv diff --git a/src/vsrc/cpu_top.sv b/src/vsrc/cpu_top.sv index 7f96ea6..f6c9559 100644 --- a/src/vsrc/cpu_top.sv +++ b/src/vsrc/cpu_top.sv @@ -176,16 +176,17 @@ module cpu_top ( .s_bready(bready) ); - // FETCH_WIDTH is 2 - localparam FETCH_WIDTH = 2; + // FETCH_WIDTH is 4 + localparam FETCH_WIDTH = 4; + // Frontend -> ICache - logic [`InstAddrBus] frontend_icache_addr[FETCH_WIDTH]; + logic [1:0] frontend_icache_rreq; + logic [1:0][`InstAddrBus] frontend_icache_addr; // ICache -> Frontend logic icache_frontend_stallreq; - logic icache_frontend_valid[FETCH_WIDTH]; - logic [`InstAddrBus] icache_frontend_addr[FETCH_WIDTH] = '{1,1}; - logic [`RegBus] icache_frontend_data[FETCH_WIDTH]; + logic [1:0]icache_frontend_valid; + logic [1:0][127:0] icache_frontend_data; // Cacheline is 128b logic [31:0] tmp_pc, tmp_pc_plus4; assign tmp_pc_plus4 = tmp_pc + 4; @@ -233,9 +234,8 @@ module cpu_top ( // <-> ICache .icache_read_addr_o(frontend_icache_addr), // -> ICache - .icache_stallreq_i(icache_frontend_stallreq), // <- ICache, I$ cannot accept more addr requests - .icache_read_valid_i(), // <- ICache - .icache_read_addr_i(icache_frontend_addr), // <- ICache + .icache_read_req_o(frontend_icache_rreq), + .icache_read_valid_i(icache_frontend_valid), // <- ICache .icache_read_data_i(icache_frontend_data), // <- ICache // <-> Backend @@ -753,7 +753,7 @@ module cpu_top ( .clock (aclk), .coreid (0), // only one core, so always 0 .index (0), // commit channel index - .valid (difftest_commit_info_delay1[0].valid), // 1 means valid + // .valid (difftest_commit_info_delay1[0].valid), // 1 means valid .pc (difftest_commit_info_delay1[0].pc), .instr (difftest_commit_info_delay1[0].instr), .skip (0), // not sure meaning, but keep 0 for now @@ -772,7 +772,7 @@ module cpu_top ( .coreid (0), // only one core, so always 0 .index (1), // commit channel index .skip (0), // not sure meaning, but keep 0 for now - .valid (difftest_commit_info_delay1[1].valid), // 1 means valid + // .valid (difftest_commit_info_delay1[1].valid), // 1 means valid .pc (difftest_commit_info_delay1[1].pc), .instr (difftest_commit_info_delay1[1].instr), .is_TLBFILL (), diff --git a/src/vsrc/frontend/frontend.sv b/src/vsrc/frontend/frontend.sv index b142ad2..a740037 100644 --- a/src/vsrc/frontend/frontend.sv +++ b/src/vsrc/frontend/frontend.sv @@ -1,18 +1,24 @@ `include "instr_info.sv" +`include "frontend/frontend_defines.sv" + +`include "frontend/ftq.sv" +`include "frontend/ifu.sv" + module frontend #( - parameter FETCH_WIDTH = 2, - parameter ADDR_WIDTH = 32, - parameter DATA_WIDTH = 32 + parameter FETCH_WIDTH = 4, + parameter ADDR_WIDTH = 32, + parameter DATA_WIDTH = 32, + parameter CACHELINE_WIDTH = 128 ) ( input logic clk, input logic rst, // <-> ICache - output logic [ADDR_WIDTH-1:0] icache_read_addr_o[FETCH_WIDTH], - input logic icache_stallreq_i, // ICache cannot accept more addr input - input logic icache_read_valid_i[FETCH_WIDTH], - input logic [ADDR_WIDTH-1:0] icache_read_addr_i[FETCH_WIDTH], - input logic [DATA_WIDTH-1:0] icache_read_data_i[FETCH_WIDTH], + // ICache is fixed dual port + output logic [1:0] icache_read_req_o, + output logic [1:0][ADDR_WIDTH-1:0] icache_read_addr_o, + input logic [1:0] icache_read_valid_i, + input logic [1:0][CACHELINE_WIDTH-1:0] icache_read_data_i, // <-> Backend @@ -40,72 +46,61 @@ module frontend #( end end + logic ftq_full; + always_comb begin : next_pc_comb if (backend_flush_i) begin next_pc = backend_next_pc_i; - end else if (instr_buffer_stallreq_i) begin - next_pc = pc; - end else if (icache_stallreq_i) begin + end else if (instr_buffer_stallreq_i || ftq_full) begin next_pc = pc; end else begin next_pc = pc + 8; end end - // ICache read_addr_o - always_comb begin : icache_read_addr_o_comb - for (integer i = 0; i < FETCH_WIDTH; i++) begin - icache_read_addr_o[i] = pc + i * 4; - end - end - - typedef struct packed { - bit valid; - bit [ADDR_WIDTH-1:0] pc; - bit [DATA_WIDTH-1:0] instr; - } icache_resp_t; - icache_resp_t icache_resp_buffer[FETCH_WIDTH]; - always_ff @(posedge clk or negedge rst_n) begin : icache_resp_buffer_ff - if (!rst_n || icache_resp_ready) begin - for (integer i = 0; i < FETCH_WIDTH; i++) begin - icache_resp_buffer[i] <= 0; - end + // BPU + bpu_ftq_t bpu_ftq_block; + always_comb begin + if (~ftq_full) begin + bpu_ftq_block.start_pc = pc; + bpu_ftq_block.valid = 1; + bpu_ftq_block.length = 4; + bpu_ftq_block.is_cross_cacheline = 0; end else begin - for (integer i = 0; i < FETCH_WIDTH; i++) begin - if (icache_read_valid_i[i]) begin - icache_resp_buffer[i].valid <= 1; - icache_resp_buffer[i].pc <= icache_read_addr_i[i]; - icache_resp_buffer[i].instr <= icache_read_data_i[i]; - end - end - end - end - logic icache_resp_ready; // 1 if all the instr in icache_resp_buffer is valid - always_comb begin : icache_resp_ready_comb - icache_resp_ready = 1; - for (integer i = 0; i < FETCH_WIDTH; i++) begin - icache_resp_ready = icache_resp_ready & icache_resp_buffer[i].valid; + bpu_ftq_block = 0; end end - always_ff @(posedge clk or negedge rst_n) begin : instr_buffer_o_ff - if (!rst_n || backend_flush_i) begin - for (integer i = 0; i < FETCH_WIDTH; i++) begin - instr_buffer_o[i] <= 0; - end - end else begin - // Keep 0 for most of the time - for (integer i = 0; i < FETCH_WIDTH; i++) begin - instr_buffer_o[i] <= 0; - end - if (icache_resp_ready && !instr_buffer_stallreq_i) begin - for (integer i = 0; i < FETCH_WIDTH; i++) begin - instr_buffer_o[i].valid <= 1; - instr_buffer_o[i].pc <= icache_resp_buffer[i].pc; - instr_buffer_o[i].instr <= icache_resp_buffer[i].instr; - end - end - end - end + ftq_ifu_t ftq_ifu_block; + logic ifu_ftq_accept; + + ftq #( + .FETCH_WIDTH(4), + .QUEUE_SIZE (4) + ) u_ftq ( + .clk (clk), + .rst (rst), + .bpu_i (bpu_ftq_block), + .bpu_queue_full_o(ftq_full), + .backend_commit_i(), + .ifu_o (ftq_ifu_block), + .ifu_accept_i (ifu_ftq_accept) + ); + + + ifu u_ifu ( + .clk (clk), + .rst (rst), + .ftq_i (ftq_ifu_block), + .ftq_accept_o (ifu_ftq_accept), + .icache_rreq_o (icache_read_req_o), + .icache_raddr_o (icache_read_addr_o), + .icache_rvalid_i(icache_read_valid_i), + .icache_rdata_i (icache_read_data_i), + .stallreq_i (instr_buffer_stallreq_i), + .instr_buffer_o (instr_buffer_o) + ); + + endmodule diff --git a/src/vsrc/frontend/frontend_defines.sv b/src/vsrc/frontend/frontend_defines.sv index e89ae47..d022db1 100644 --- a/src/vsrc/frontend/frontend_defines.sv +++ b/src/vsrc/frontend/frontend_defines.sv @@ -8,7 +8,7 @@ typedef struct packed { logic valid; logic is_cross_cacheline; logic [`InstAddrBus] start_pc; - logic [$clog2(`FETCH_WIDTH)-1:0] length; + logic [$clog2(`FETCH_WIDTH+1)-1:0] length; // TODO: add BPU meta } bpu_ftq_t; @@ -17,7 +17,7 @@ typedef struct packed { logic valid; logic is_cross_cacheline; logic [`InstAddrBus] start_pc; - logic [$clog2(`FETCH_WIDTH)-1:0] length; + logic [$clog2(`FETCH_WIDTH+1)-1:0] length; } ftq_block_t; // FTQ <-> IFU @@ -25,7 +25,7 @@ typedef struct packed { logic valid; logic is_cross_cacheline; logic [`InstAddrBus] start_pc; - logic [$clog2(`FETCH_WIDTH)-1:0] length; + logic [$clog2(`FETCH_WIDTH+1)-1:0] length; } ftq_ifu_t; `endif diff --git a/src/vsrc/frontend/ftq.sv b/src/vsrc/frontend/ftq.sv index 20ef5e7..ad8b5f2 100644 --- a/src/vsrc/frontend/ftq.sv +++ b/src/vsrc/frontend/ftq.sv @@ -1,4 +1,4 @@ - +`include "defines.sv" `include "frontend/frontend_defines.sv" module ftq #( @@ -24,7 +24,7 @@ module ftq #( logic rst_n; assign rst_n = ~rst; - + // QUEUE data structure ftq_block_t [QUEUE_SIZE-1:0] FTQ, next_FTQ; always_ff @(posedge clk or negedge rst_n) begin if (~rst_n) begin @@ -34,6 +34,14 @@ module ftq #( end end + // DEBUG signal + logic [`InstAddrBus] debug_queue_pc[QUEUE_SIZE]; + always_comb begin + for (integer i = 0; i < QUEUE_SIZE; i++) begin + debug_queue_pc[i] = FTQ[i].start_pc; + end + end + // PTR logic [$clog2(QUEUE_SIZE)-1:0] bpu_ptr, ifu_ptr, comm_ptr; always_ff @(posedge clk or negedge rst_n) begin : ptr_ff @@ -64,7 +72,9 @@ module ftq #( assign ifu_o.length = FTQ[ifu_ptr].length; // -> BPU - assign bpu_queue_full_o = (bpu_ptr == comm_ptr - 1); + logic [$clog2(QUEUE_SIZE)-1:0] bpu_ptr_plus1; // Limit the bit width + assign bpu_ptr_plus1 = bpu_ptr + 1; + assign bpu_queue_full_o = (bpu_ptr_plus1 == comm_ptr); endmodule diff --git a/src/vsrc/frontend/ifu.sv b/src/vsrc/frontend/ifu.sv new file mode 100644 index 0000000..39d3f4b --- /dev/null +++ b/src/vsrc/frontend/ifu.sv @@ -0,0 +1,85 @@ +`include "frontend/frontend_defines.sv" +`include "instr_info.sv" + + +module ifu #( + parameter FETCH_WIDTH = 4, + parameter ADDR_WIDTH = 32, + parameter DATA_WIDTH = 32, + parameter CACHELINE_WIDTH = 128 // FETCH_WIDTH and CACHELINE_WIDTH must match +) ( + input logic clk, + input logic rst, + + // <-> Fetch Target Queue + input ftq_ifu_t ftq_i, + output logic ftq_accept_o, // In current cycle + + + // <-> Frontend <-> ICache + output logic [1:0] icache_rreq_o, + output logic [1:0][ADDR_WIDTH-1:0] icache_raddr_o, + input logic [1:0] icache_rvalid_i, + input logic [1:0][CACHELINE_WIDTH-1:0] icache_rdata_i, + + + // <-> Frontend <-> Instruction Buffer + input logic stallreq_i, + output instr_buffer_info_t instr_buffer_o[FETCH_WIDTH] +); + + // Reset signal + logic rst_n; + assign rst_n = ~rst; + + logic accept_ftq_input; + assign ftq_accept_o = accept_ftq_input; + + // P0 + + // Send addr to ICache + assign icache_rreq_o[0] = 1; + assign icache_rreq_o[1] = ftq_i.is_cross_cacheline ? 1 : 0; + assign icache_raddr_o[0] = ftq_i.start_pc; + assign icache_raddr_o[1] = ftq_i.is_cross_cacheline ? ftq_i.start_pc + 16 : 0; // TODO: remove magic number + + + // P1 + // Cacheline returned + logic [FETCH_WIDTH-1:0][DATA_WIDTH-1:0] cacheline_0, cacheline_1; + assign cacheline_0 = icache_rdata_i[0]; + assign cacheline_1 = icache_rdata_i[1]; + logic icache_result_valid; + always_comb begin + if (ftq_i.is_cross_cacheline) icache_result_valid = icache_rvalid_i[0] & icache_rvalid_i[1]; + else icache_result_valid = icache_rvalid_i[0]; + end + + // FTQ input + ftq_ifu_t current_fetch_block; + logic [ADDR_WIDTH-1:0] debug_p1_pc = current_fetch_block.start_pc; // DEBUG + always_ff @(posedge clk or negedge rst_n) begin + if (!rst_n) begin + current_fetch_block <= 0; + end else if (accept_ftq_input) begin + current_fetch_block <= ftq_i; + end + end + // If last req to icache is valid, then accept another ftq input + assign accept_ftq_input = icache_result_valid; + + // P2 + // Send instr info to IB + always_ff @(posedge clk or negedge rst_n) begin + for (integer i = 0; i < FETCH_WIDTH; i++) begin + if (i < current_fetch_block.length && ~stallreq_i) begin + instr_buffer_o[i].valid <= 1; + instr_buffer_o[i].pc <= current_fetch_block.start_pc + i * 4; // Instr is 4 bytes long + instr_buffer_o[i].instr <= cacheline_0[current_fetch_block.start_pc[3:2]+i]; + end else begin + instr_buffer_o[i] <= 0; + end + end + end + +endmodule diff --git a/src/vsrc/icache.sv b/src/vsrc/icache.sv index 4410f0b..6f1ff0a 100644 --- a/src/vsrc/icache.sv +++ b/src/vsrc/icache.sv @@ -5,8 +5,7 @@ module icache #( parameter NSET = 256, parameter NWAY = 2, parameter CACHELINE_WIDTH = 128, - parameter ADDR_WIDTH = 32, - parameter DATA_WIDTH = 32 + parameter ADDR_WIDTH = 32 ) ( input logic clk, input logic rst, @@ -15,13 +14,13 @@ module icache #( input logic rreq_1_i, input logic [ADDR_WIDTH-1:0] raddr_1_i, output logic rvalid_1_o, - output logic [DATA_WIDTH-1:0] rdata_1_o, + output logic [CACHELINE_WIDTH-1:0] rdata_1_o, // Read port 2 input logic rreq_2_i, input logic [ADDR_WIDTH-1:0] raddr_2_i, output logic rvalid_2_o, - output logic [DATA_WIDTH-1:0] rdata_2_o, + output logic [CACHELINE_WIDTH-1:0] rdata_2_o, // <-> AXI Controller output logic [ADDR_WIDTH-1:0] axi_addr_o, @@ -171,26 +170,6 @@ module icache #( end // Generate read output - logic [1:0] offset_1, offset_2; - assign offset_1 = raddr_1_delay1[3:2]; - assign offset_2 = raddr_2_delay1[3:2]; - logic [NWAY-1:0][1:0][DATA_WIDTH-1:0] data_inside_cacheline; - always_comb begin - for (integer i = 0; i < NWAY; i++) begin - case (offset_1) - 2'b00: data_inside_cacheline[i][0] = data_bram_rdata[i][0][31:0]; - 2'b01: data_inside_cacheline[i][0] = data_bram_rdata[i][0][63:32]; - 2'b10: data_inside_cacheline[i][0] = data_bram_rdata[i][0][95:64]; - 2'b11: data_inside_cacheline[i][0] = data_bram_rdata[i][0][127:96]; - endcase - case (offset_2) - 2'b00: data_inside_cacheline[i][1] = data_bram_rdata[i][1][31:0]; - 2'b01: data_inside_cacheline[i][1] = data_bram_rdata[i][1][63:32]; - 2'b10: data_inside_cacheline[i][1] = data_bram_rdata[i][1][95:64]; - 2'b11: data_inside_cacheline[i][1] = data_bram_rdata[i][1][127:96]; - endcase - end - end always_comb begin rvalid_1_o = 0; rdata_1_o = 0; @@ -199,11 +178,11 @@ module icache #( for (integer i = 0; i < NWAY; i++) begin if (tag_hit[i][0]) begin rvalid_1_o = 1; - rdata_1_o = data_inside_cacheline[i][0]; + rdata_1_o = data_bram_rdata[i][0]; end if (tag_hit[i][1]) begin rvalid_2_o = 1; - rdata_2_o = data_inside_cacheline[i][1]; + rdata_2_o = data_bram_rdata[i][1]; end end end From ee5e5b0aa32ec226fa45e2c91d73aa8d805ec9d5 Mon Sep 17 00:00:00 2001 From: Easton Man Date: Sun, 22 May 2022 15:13:10 +0800 Subject: [PATCH 08/41] fix: fix timing, seq fetching OK --- src/vsrc/cpu_top.sv | 24 +++++++++--------------- src/vsrc/frontend/frontend.sv | 2 +- src/vsrc/frontend/frontend_defines.sv | 6 +++--- src/vsrc/frontend/ftq.sv | 8 ++++---- src/vsrc/frontend/ifu.sv | 22 +++++++++++++++------- src/vsrc/icache.sv | 4 ++-- 6 files changed, 34 insertions(+), 32 deletions(-) diff --git a/src/vsrc/cpu_top.sv b/src/vsrc/cpu_top.sv index f6c9559..274a2a2 100644 --- a/src/vsrc/cpu_top.sv +++ b/src/vsrc/cpu_top.sv @@ -188,28 +188,22 @@ module cpu_top ( logic [1:0]icache_frontend_valid; logic [1:0][127:0] icache_frontend_data; // Cacheline is 128b - logic [31:0] tmp_pc, tmp_pc_plus4; - assign tmp_pc_plus4 = tmp_pc + 4; - always_ff @( posedge clk or negedge rst_n) begin - if (!rst_n) begin - tmp_pc <= 32'h1c000000; - end else if (icache_frontend_valid[0]) begin - tmp_pc <= tmp_pc+8; - end - end - - icache u_icache( .clk (clk ), .rst (rst ), - .rreq_1_i (1'b1), - .raddr_1_i (tmp_pc), + + // Port A + .rreq_1_i (frontend_icache_rreq[0]), + .raddr_1_i (frontend_icache_addr[0]), .rvalid_1_o (icache_frontend_valid[0]), .rdata_1_o (icache_frontend_data[0]), - .rreq_2_i (1'b1), - .raddr_2_i (tmp_pc_plus4), + // Port B + .rreq_2_i (frontend_icache_rreq[1]), + .raddr_2_i (frontend_icache_addr[1]), .rvalid_2_o (icache_frontend_valid[1]), .rdata_2_o (icache_frontend_data[1]), + + // <-> AXI Controller .axi_addr_o (icache_axi_addr), .axi_rreq_o (icache_axi_rreq), .axi_rdy_i (axi_icache_rdy), diff --git a/src/vsrc/frontend/frontend.sv b/src/vsrc/frontend/frontend.sv index a740037..721e534 100644 --- a/src/vsrc/frontend/frontend.sv +++ b/src/vsrc/frontend/frontend.sv @@ -54,7 +54,7 @@ module frontend #( end else if (instr_buffer_stallreq_i || ftq_full) begin next_pc = pc; end else begin - next_pc = pc + 8; + next_pc = pc + FETCH_WIDTH * 4; end end diff --git a/src/vsrc/frontend/frontend_defines.sv b/src/vsrc/frontend/frontend_defines.sv index d022db1..e752019 100644 --- a/src/vsrc/frontend/frontend_defines.sv +++ b/src/vsrc/frontend/frontend_defines.sv @@ -6,8 +6,8 @@ typedef struct packed { logic valid; - logic is_cross_cacheline; logic [`InstAddrBus] start_pc; + logic is_cross_cacheline; logic [$clog2(`FETCH_WIDTH+1)-1:0] length; // TODO: add BPU meta @@ -15,16 +15,16 @@ typedef struct packed { typedef struct packed { logic valid; - logic is_cross_cacheline; logic [`InstAddrBus] start_pc; + logic is_cross_cacheline; logic [$clog2(`FETCH_WIDTH+1)-1:0] length; } ftq_block_t; // FTQ <-> IFU typedef struct packed { logic valid; - logic is_cross_cacheline; logic [`InstAddrBus] start_pc; + logic is_cross_cacheline; logic [$clog2(`FETCH_WIDTH+1)-1:0] length; } ftq_ifu_t; diff --git a/src/vsrc/frontend/ftq.sv b/src/vsrc/frontend/ftq.sv index ad8b5f2..e6b355b 100644 --- a/src/vsrc/frontend/ftq.sv +++ b/src/vsrc/frontend/ftq.sv @@ -66,10 +66,10 @@ module ftq #( // Output // -> IFU - assign ifu_o.valid = FTQ[ifu_ptr].valid; - assign ifu_o.is_cross_cacheline = FTQ[ifu_ptr].is_cross_cacheline; - assign ifu_o.start_pc = FTQ[ifu_ptr].start_pc; - assign ifu_o.length = FTQ[ifu_ptr].length; + assign ifu_o.valid = FTQ[ifu_ptr+ifu_accept_i].valid; + assign ifu_o.is_cross_cacheline = FTQ[ifu_ptr+ifu_accept_i].is_cross_cacheline; + assign ifu_o.start_pc = FTQ[ifu_ptr+ifu_accept_i].start_pc; + assign ifu_o.length = FTQ[ifu_ptr+ifu_accept_i].length; // -> BPU logic [$clog2(QUEUE_SIZE)-1:0] bpu_ptr_plus1; // Limit the bit width diff --git a/src/vsrc/frontend/ifu.sv b/src/vsrc/frontend/ifu.sv index 39d3f4b..1bcbbc5 100644 --- a/src/vsrc/frontend/ifu.sv +++ b/src/vsrc/frontend/ifu.sv @@ -36,12 +36,19 @@ module ifu #( assign ftq_accept_o = accept_ftq_input; // P0 - + logic ftq_input_valid = ftq_i.valid; // Send addr to ICache - assign icache_rreq_o[0] = 1; - assign icache_rreq_o[1] = ftq_i.is_cross_cacheline ? 1 : 0; - assign icache_raddr_o[0] = ftq_i.start_pc; - assign icache_raddr_o[1] = ftq_i.is_cross_cacheline ? ftq_i.start_pc + 16 : 0; // TODO: remove magic number + always_comb begin + if (ftq_input_valid) begin + icache_rreq_o[0] = 1; + icache_rreq_o[1] = ftq_i.is_cross_cacheline ? 1 : 0; + icache_raddr_o[0] = ftq_i.start_pc; + icache_raddr_o[1] = ftq_i.is_cross_cacheline ? ftq_i.start_pc + 16 : 0; // TODO: remove magic number + end else begin + icache_rreq_o = 0; + icache_raddr_o = 0; + end + end // P1 @@ -58,10 +65,11 @@ module ifu #( // FTQ input ftq_ifu_t current_fetch_block; logic [ADDR_WIDTH-1:0] debug_p1_pc = current_fetch_block.start_pc; // DEBUG + logic [ADDR_WIDTH-1:0] debug_p0_pc = ftq_i.start_pc; // DEBUG always_ff @(posedge clk or negedge rst_n) begin if (!rst_n) begin current_fetch_block <= 0; - end else if (accept_ftq_input) begin + end else begin current_fetch_block <= ftq_i; end end @@ -72,7 +80,7 @@ module ifu #( // Send instr info to IB always_ff @(posedge clk or negedge rst_n) begin for (integer i = 0; i < FETCH_WIDTH; i++) begin - if (i < current_fetch_block.length && ~stallreq_i) begin + if (i < current_fetch_block.length && ~stallreq_i && icache_result_valid) begin instr_buffer_o[i].valid <= 1; instr_buffer_o[i].pc <= current_fetch_block.start_pc + i * 4; // Instr is 4 bytes long instr_buffer_o[i].instr <= cacheline_0[current_fetch_block.start_pc[3:2]+i]; diff --git a/src/vsrc/icache.sv b/src/vsrc/icache.sv index 6f1ff0a..a1795c4 100644 --- a/src/vsrc/icache.sv +++ b/src/vsrc/icache.sv @@ -164,8 +164,8 @@ module icache #( logic [NWAY-1:0][1:0] tag_hit; always_comb begin for (integer i = 0; i < NWAY; i++) begin - tag_hit[i][0] = tag_bram_rdata[i][0][19:0] == raddr_1_delay1[ADDR_WIDTH-1:ADDR_WIDTH-20]; - tag_hit[i][1] = tag_bram_rdata[i][1][19:0] == raddr_2_delay1[ADDR_WIDTH-1:ADDR_WIDTH-20]; + tag_hit[i][0] = tag_bram_rdata[i][0][19:0] == raddr_1_delay1[ADDR_WIDTH-1:ADDR_WIDTH-20] && tag_bram_rdata[i][0][20]; + tag_hit[i][1] = tag_bram_rdata[i][1][19:0] == raddr_2_delay1[ADDR_WIDTH-1:ADDR_WIDTH-20] && tag_bram_rdata[i][1][20]; end end From ac6223d2a4d18218f227f58d1ebcd4edea533480 Mon Sep 17 00:00:00 2001 From: Easton Man Date: Sun, 22 May 2022 17:55:10 +0800 Subject: [PATCH 09/41] fix: fix amb clock ctrl --- src/vsrc/frontend/ifu.sv | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/vsrc/frontend/ifu.sv b/src/vsrc/frontend/ifu.sv index 1bcbbc5..5a40667 100644 --- a/src/vsrc/frontend/ifu.sv +++ b/src/vsrc/frontend/ifu.sv @@ -79,14 +79,20 @@ module ifu #( // P2 // Send instr info to IB always_ff @(posedge clk or negedge rst_n) begin - for (integer i = 0; i < FETCH_WIDTH; i++) begin - if (i < current_fetch_block.length && ~stallreq_i && icache_result_valid) begin - instr_buffer_o[i].valid <= 1; - instr_buffer_o[i].pc <= current_fetch_block.start_pc + i * 4; // Instr is 4 bytes long - instr_buffer_o[i].instr <= cacheline_0[current_fetch_block.start_pc[3:2]+i]; - end else begin + if (!rst_n) begin + for (integer i = 0; i < FETCH_WIDTH; i++) begin instr_buffer_o[i] <= 0; end + end else begin + for (integer i = 0; i < FETCH_WIDTH; i++) begin + if (i < current_fetch_block.length && ~stallreq_i && icache_result_valid) begin + instr_buffer_o[i].valid <= 1; + instr_buffer_o[i].pc <= current_fetch_block.start_pc + i * 4; // Instr is 4 bytes long + instr_buffer_o[i].instr <= cacheline_0[current_fetch_block.start_pc[3:2]+i]; + end else begin + instr_buffer_o[i] <= 0; + end + end end end From f62cea2f70d769e7aed5dde2dc9f2b28a6ae3fe7 Mon Sep 17 00:00:00 2001 From: Easton Man Date: Sun, 22 May 2022 18:14:33 +0800 Subject: [PATCH 10/41] feat: finish backend feedback in FTQ --- src/vsrc/cpu_top.sv | 7 ++++++- src/vsrc/frontend/frontend.sv | 3 ++- src/vsrc/frontend/ifu.sv | 16 +++++++++++++--- src/vsrc/instr_info.sv | 1 + src/vsrc/pipeline/1_decode/id.sv | 15 +++++++++------ src/vsrc/pipeline/4_mem/mem_wb.sv | 31 ++++++++----------------------- 6 files changed, 39 insertions(+), 34 deletions(-) diff --git a/src/vsrc/cpu_top.sv b/src/vsrc/cpu_top.sv index 274a2a2..04322ff 100644 --- a/src/vsrc/cpu_top.sv +++ b/src/vsrc/cpu_top.sv @@ -220,6 +220,7 @@ module cpu_top ( // Frontend <-> Backend logic backend_flush; + logic [1:0] is_last_in_block; // <- WB, suggest whether last instr in basic block is committed // All frontend structures frontend u_frontend ( @@ -236,6 +237,7 @@ module cpu_top ( .branch_update_info_i(), // branch update signals, <- EXE Stage, unused .backend_next_pc_i (next_pc), // backend PC, <- pc_gen .backend_flush_i (backend_flush), // backend flush, usually come with next_pc + .backend_commit_i (is_last_in_block[0] | is_last_in_block[1]), // <-> Instruction Buffer .instr_buffer_stallreq_i(ib_frontend_stallreq), // instruction buffer is full @@ -551,7 +553,10 @@ module cpu_top ( .flush(flush), //to ctrl - .wb_ctrl_signal(wb_ctrl_signal[i]) + .wb_ctrl_signal(wb_ctrl_signal[i]), + + // -> Frontend + .is_last_in_block(is_last_in_block[i]) ); end endgenerate diff --git a/src/vsrc/frontend/frontend.sv b/src/vsrc/frontend/frontend.sv index 721e534..09ac385 100644 --- a/src/vsrc/frontend/frontend.sv +++ b/src/vsrc/frontend/frontend.sv @@ -25,6 +25,7 @@ module frontend #( input branch_update_info_t branch_update_info_i, input logic [ADDR_WIDTH-1:0] backend_next_pc_i, input logic backend_flush_i, + input logic backend_commit_i, // <-> Instruction buffer input logic instr_buffer_stallreq_i, @@ -82,7 +83,7 @@ module frontend #( .rst (rst), .bpu_i (bpu_ftq_block), .bpu_queue_full_o(ftq_full), - .backend_commit_i(), + .backend_commit_i(backend_commit_i), .ifu_o (ftq_ifu_block), .ifu_accept_i (ifu_ftq_accept) ); diff --git a/src/vsrc/frontend/ifu.sv b/src/vsrc/frontend/ifu.sv index 5a40667..3d50f8a 100644 --- a/src/vsrc/frontend/ifu.sv +++ b/src/vsrc/frontend/ifu.sv @@ -85,10 +85,20 @@ module ifu #( end end else begin for (integer i = 0; i < FETCH_WIDTH; i++) begin + // Default + instr_buffer_o[i].is_last_in_block <= 0; + if (i < current_fetch_block.length && ~stallreq_i && icache_result_valid) begin - instr_buffer_o[i].valid <= 1; - instr_buffer_o[i].pc <= current_fetch_block.start_pc + i * 4; // Instr is 4 bytes long - instr_buffer_o[i].instr <= cacheline_0[current_fetch_block.start_pc[3:2]+i]; + if (i == current_fetch_block.length - 1) begin + instr_buffer_o[i].valid <= 1; + instr_buffer_o[i].is_last_in_block <= 1; + instr_buffer_o[i].pc <= current_fetch_block.start_pc + i * 4; // Instr is 4 bytes long + instr_buffer_o[i].instr <= cacheline_0[current_fetch_block.start_pc[3:2]+i]; + end else begin + instr_buffer_o[i].valid <= 1; + instr_buffer_o[i].pc <= current_fetch_block.start_pc + i * 4; // Instr is 4 bytes long + instr_buffer_o[i].instr <= cacheline_0[current_fetch_block.start_pc[3:2]+i]; + end end else begin instr_buffer_o[i] <= 0; end diff --git a/src/vsrc/instr_info.sv b/src/vsrc/instr_info.sv index 0a2f1dd..600eda9 100644 --- a/src/vsrc/instr_info.sv +++ b/src/vsrc/instr_info.sv @@ -5,6 +5,7 @@ typedef struct packed { bit valid; + bit is_last_in_block; // Mark the last instruction in basic block bit [`InstAddrBus] pc; bit [`InstBus] instr; diff --git a/src/vsrc/pipeline/1_decode/id.sv b/src/vsrc/pipeline/1_decode/id.sv index dc811a5..6f924c3 100644 --- a/src/vsrc/pipeline/1_decode/id.sv +++ b/src/vsrc/pipeline/1_decode/id.sv @@ -45,6 +45,8 @@ module id ( assign pc_i = instr_buffer_i.valid ? instr_buffer_i.pc : `ZeroWord; logic [`InstBus] inst_i; assign inst_i = instr_buffer_i.valid ? instr_buffer_i.instr : `ZeroWord; + logic is_last_in_block; + assign is_last_in_block = instr_buffer_i.valid ? instr_buffer_i.is_last_in_block : 0; logic instr_break, instr_syscall, kernel_instr; assign kernel_instr = dispatch_o.aluop == `EXE_CSRRD_OP | dispatch_o.aluop == `EXE_CSRWR_OP | dispatch_o.aluop == `EXE_CSRXCHG_OP | @@ -163,8 +165,10 @@ module id ( // Generate imm, using OR logic [`RegBus] imm; assign dispatch_o.use_imm = (imm != 0) && !(dispatch_o.aluop == `EXE_ST_B_OP | dispatch_o.aluop == `EXE_ST_H_OP | - dispatch_o.aluop == `EXE_ST_W_OP | dispatch_o.aluop == `EXE_CSRRD_OP | dispatch_o.aluop == `EXE_CSRWR_OP - | dispatch_o.aluop == `EXE_CSRRD_OP | dispatch_o.aluop == `EXE_CSRXCHG_OP); // HACK: works for now + dispatch_o.aluop == `EXE_ST_W_OP | dispatch_o.aluop == `EXE_CSRRD_OP | dispatch_o.aluop == + `EXE_CSRWR_OP + | dispatch_o.aluop == `EXE_CSRRD_OP | dispatch_o.aluop == `EXE_CSRXCHG_OP) + ; // HACK: works for now assign dispatch_o.imm = imm; always_comb begin imm = 0; @@ -208,6 +212,7 @@ module id ( assign dispatch_o.instr_info.valid = instr_valid; assign dispatch_o.instr_info.pc = pc_i; assign dispatch_o.instr_info.instr = inst_i; + assign dispatch_o.instr_info.is_last_in_block = is_last_in_block; // TODO: add explanation @@ -216,15 +221,13 @@ module id ( logic excp; logic [8:0] excp_num; - assign dispatch_o.refetch = (dispatch_o.aluop == `EXE_TLBFILL_OP || dispatch_o.aluop == `EXE_TLBRD_OP || dispatch_o.aluop == `EXE_TLBWR_OP || dispatch_o.aluop == `EXE_TLBSRCH_OP || dispatch_o.aluop == `EXE_ERTN_OP || dispatch_o.aluop == `EXE_INVTLB_OP) ; + assign dispatch_o.refetch = (dispatch_o.aluop == `EXE_TLBFILL_OP || dispatch_o.aluop == `EXE_TLBRD_OP || dispatch_o.aluop == `EXE_TLBWR_OP || dispatch_o.aluop == `EXE_TLBSRCH_OP || dispatch_o.aluop == `EXE_ERTN_OP || dispatch_o.aluop == `EXE_INVTLB_OP) ; assign excp_ine = !(instr_valid == `InstInvalid) && !instr_buffer_i.valid; assign excp_ipe = kernel_instr && (csr_plv == 2'b11); assign excp = excp_ipe | instr_syscall | instr_break | excp_i | excp_ine | has_int; - assign excp_num = { - excp_ipe, excp_ine, instr_break, instr_syscall, excp_num_i, has_int - }; + assign excp_num = {excp_ipe, excp_ine, instr_break, instr_syscall, excp_num_i, has_int}; assign dispatch_o.excp = excp; assign dispatch_o.excp_num = excp_num; diff --git a/src/vsrc/pipeline/4_mem/mem_wb.sv b/src/vsrc/pipeline/4_mem/mem_wb.sv index b58319b..97673ac 100644 --- a/src/vsrc/pipeline/4_mem/mem_wb.sv +++ b/src/vsrc/pipeline/4_mem/mem_wb.sv @@ -13,7 +13,10 @@ module mem_wb ( input logic flush, // load store relate difftest - output wb_ctrl wb_ctrl_signal + output wb_ctrl wb_ctrl_signal, + + // <-> Frontend + output logic is_last_in_block ); // For observability @@ -22,33 +25,15 @@ module mem_wb ( always @(posedge clk) begin if (rst == `RstEnable) begin - wb_ctrl_signal.valid <= 1'b0; - wb_ctrl_signal.aluop <= 8'b0; - wb_ctrl_signal.wb_reg_o.waddr <= `NOPRegAddr; - wb_ctrl_signal.wb_reg_o.we <= `WriteDisable; - wb_ctrl_signal.wb_reg_o.wdata <= `ZeroWord; - wb_ctrl_signal.wb_reg_o.pc <= `ZeroWord; - wb_ctrl_signal.llbit_o.we <= 1'b0; - wb_ctrl_signal.llbit_o.value <= 1'b0; - wb_ctrl_signal.excp <= 1'b0; - wb_ctrl_signal.excp_num <= 16'b0; - wb_ctrl_signal.fetch_flush <= 1'b0; - wb_ctrl_signal.csr_signal_o <= 47'b0; - wb_ctrl_signal.diff_commit_o.instr <= `ZeroWord; - wb_ctrl_signal.diff_commit_o.pc <= `ZeroWord; - wb_ctrl_signal.diff_commit_o.valid <= `InstInvalid; - wb_ctrl_signal.diff_commit_o.inst_ld_en <= 8'b0; - wb_ctrl_signal.diff_commit_o.inst_st_en <= 8'b0; - wb_ctrl_signal.diff_commit_o.ld_paddr <= `ZeroWord; - wb_ctrl_signal.diff_commit_o.ld_vaddr <= `ZeroWord; - wb_ctrl_signal.diff_commit_o.st_paddr <= `ZeroWord; - wb_ctrl_signal.diff_commit_o.st_vaddr <= `ZeroWord; - wb_ctrl_signal.diff_commit_o.st_data <= `ZeroWord; + wb_ctrl_signal <= 0; + is_last_in_block <= 0; end else if (stall == `Stop) begin wb_ctrl_signal.diff_commit_o.instr <= `ZeroWord; wb_ctrl_signal.diff_commit_o.pc <= `ZeroWord; wb_ctrl_signal.diff_commit_o.valid <= `InstInvalid; + is_last_in_block <= 0; end else begin + is_last_in_block <= mem_signal_o.instr_info.is_last_in_block; wb_ctrl_signal.valid <= 1'b1; wb_ctrl_signal.aluop <= mem_signal_o.aluop; wb_ctrl_signal.wb_reg_o.waddr <= mem_signal_o.waddr; From 384263790cef632b10fbc615fd41f7e49e176987 Mon Sep 17 00:00:00 2001 From: 250HandsomeLiang <2502481961@qq.com> Date: Sun, 22 May 2022 20:31:36 +0800 Subject: [PATCH 11/41] update AXI128 --- src/vsrc/AXI/axi_master.sv | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/vsrc/AXI/axi_master.sv b/src/vsrc/AXI/axi_master.sv index 8519231..ec1f99f 100644 --- a/src/vsrc/AXI/axi_master.sv +++ b/src/vsrc/AXI/axi_master.sv @@ -78,10 +78,10 @@ module axi_master ( ); reg write_wait_enable; - reg [31:0] inst_buffer; + reg [`DATA] inst_buffer; //read and write data stall - reg [31:0] data_buffer; + reg [`DATA] data_buffer; reg [3:0] inst_r_state; reg [3:0] data_r_state; @@ -193,7 +193,7 @@ module axi_master ( inst_r_state <= `R_ADDR; inst_s_arid <= inst_id; inst_s_araddr <= inst_cpu_addr_i; - inst_s_arsize <= 3'b010; + inst_s_arsize <= 3'b100; inst_buffer <= 0; inst_s_arlen <= inst_real_s_arlen; inst_s_rready <= 0; @@ -208,7 +208,7 @@ module axi_master ( inst_r_state <= `R_ADDR; inst_s_arid <= inst_id; inst_s_araddr <= inst_cpu_addr_i; - inst_s_arsize <= 3'b010; + inst_s_arsize <= 3'b100; inst_buffer <= 0; inst_s_arlen <= inst_real_s_arlen; inst_s_rready <= 0; @@ -400,7 +400,7 @@ module axi_master ( data_r_state <= `R_ADDR; data_s_arid <= data_id; data_s_araddr <= data_cpu_addr_i; - data_s_arsize <= 3'b010; + data_s_arsize <= 3'b100; data_buffer <= 0; data_s_arlen <= data_real_s_arlen; data_s_rready <= 0; @@ -567,7 +567,7 @@ module axi_master ( if (dcache_wr_req_i) begin w_state <= `W_ADDR; s_awaddr <= data_cpu_addr_i; - s_awsize <= 3'b010; + s_awsize <= 3'b100; s_awvalid <= 1; s_wdata <= 0; @@ -599,7 +599,7 @@ module axi_master ( s_awvalid <= 0; s_wvalid <= 1; s_bready <= 1; - s_wdata <= write_buffer[31:0]; + s_wdata <= write_buffer; write_buffer <= {{32{1'b0}}, write_buffer[127:32]}; if (s_awlen == 0) s_wlast <= 1; From d259c8007f6ec9412024a31530cc11450017d5ce Mon Sep 17 00:00:00 2001 From: Easton Man Date: Sun, 22 May 2022 23:26:24 +0800 Subject: [PATCH 12/41] fix: fix typo --- src/vsrc/AXI/axi_master.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vsrc/AXI/axi_master.sv b/src/vsrc/AXI/axi_master.sv index ec1f99f..0b0faf1 100644 --- a/src/vsrc/AXI/axi_master.sv +++ b/src/vsrc/AXI/axi_master.sv @@ -78,10 +78,10 @@ module axi_master ( ); reg write_wait_enable; - reg [`DATA] inst_buffer; + reg [`Data] inst_buffer; //read and write data stall - reg [`DATA] data_buffer; + reg [`Data] data_buffer; reg [3:0] inst_r_state; reg [3:0] data_r_state; From 0bc6388de0e7a2e37bcbc19656f2cd3aaa51f7f8 Mon Sep 17 00:00:00 2001 From: Easton Man Date: Mon, 23 May 2022 00:02:21 +0800 Subject: [PATCH 13/41] feat: implement branch flush - BUG: cross cacheline is not handled --- src/vsrc/cpu_top.sv | 4 ++-- src/vsrc/frontend/frontend.sv | 25 +++++++++++++++++++------ src/vsrc/frontend/ftq.sv | 12 ++++++++++++ src/vsrc/frontend/ifu.sv | 18 ++++++++++++++++++ src/vsrc/icache.sv | 12 ++++++------ 5 files changed, 57 insertions(+), 14 deletions(-) diff --git a/src/vsrc/cpu_top.sv b/src/vsrc/cpu_top.sv index 04322ff..bd056da 100644 --- a/src/vsrc/cpu_top.sv +++ b/src/vsrc/cpu_top.sv @@ -752,7 +752,7 @@ module cpu_top ( .clock (aclk), .coreid (0), // only one core, so always 0 .index (0), // commit channel index - // .valid (difftest_commit_info_delay1[0].valid), // 1 means valid + .valid (difftest_commit_info_delay1[0].valid), // 1 means valid .pc (difftest_commit_info_delay1[0].pc), .instr (difftest_commit_info_delay1[0].instr), .skip (0), // not sure meaning, but keep 0 for now @@ -771,7 +771,7 @@ module cpu_top ( .coreid (0), // only one core, so always 0 .index (1), // commit channel index .skip (0), // not sure meaning, but keep 0 for now - // .valid (difftest_commit_info_delay1[1].valid), // 1 means valid + .valid (difftest_commit_info_delay1[1].valid), // 1 means valid .pc (difftest_commit_info_delay1[1].pc), .instr (difftest_commit_info_delay1[1].instr), .is_TLBFILL (), diff --git a/src/vsrc/frontend/frontend.sv b/src/vsrc/frontend/frontend.sv index 09ac385..b14b990 100644 --- a/src/vsrc/frontend/frontend.sv +++ b/src/vsrc/frontend/frontend.sv @@ -79,19 +79,32 @@ module frontend #( .FETCH_WIDTH(4), .QUEUE_SIZE (4) ) u_ftq ( - .clk (clk), - .rst (rst), + .clk(clk), + .rst(rst), + + // Flush + .backend_flush_i(backend_flush_i), + + // <-> BPU .bpu_i (bpu_ftq_block), .bpu_queue_full_o(ftq_full), + + // <-> Backend .backend_commit_i(backend_commit_i), - .ifu_o (ftq_ifu_block), - .ifu_accept_i (ifu_ftq_accept) + + // <-> IFU + .ifu_o (ftq_ifu_block), + .ifu_accept_i(ifu_ftq_accept) ); ifu u_ifu ( - .clk (clk), - .rst (rst), + .clk(clk), + .rst(rst), + + // Flush + .flush_i(backend_flush_i), + .ftq_i (ftq_ifu_block), .ftq_accept_o (ifu_ftq_accept), .icache_rreq_o (icache_read_req_o), diff --git a/src/vsrc/frontend/ftq.sv b/src/vsrc/frontend/ftq.sv index e6b355b..b172b06 100644 --- a/src/vsrc/frontend/ftq.sv +++ b/src/vsrc/frontend/ftq.sv @@ -8,6 +8,9 @@ module ftq #( input logic clk, input logic rst, + // <-> Frontend + input logic backend_flush_i, + // <-> BPU input bpu_ftq_t bpu_i, output logic bpu_queue_full_o, @@ -53,6 +56,12 @@ module ftq #( if (backend_commit_i) comm_ptr <= comm_ptr + 1; if (ifu_accept_i) ifu_ptr <= ifu_ptr + 1; if (bpu_i.valid) bpu_ptr <= bpu_ptr + 1; + + // If backend redirect triggered, back to comm_ptr + if (backend_flush_i) begin + ifu_ptr <= comm_ptr; + bpu_ptr <= comm_ptr; + end end end @@ -62,6 +71,9 @@ module ftq #( next_FTQ = FTQ; if (backend_commit_i) next_FTQ[comm_ptr] = 0; if (bpu_i.valid) next_FTQ[bpu_ptr] = bpu_i; + + // If backend redirect triggered, clear FTQ + if (backend_flush_i) next_FTQ = 0; end // Output diff --git a/src/vsrc/frontend/ifu.sv b/src/vsrc/frontend/ifu.sv index 3d50f8a..7662d2e 100644 --- a/src/vsrc/frontend/ifu.sv +++ b/src/vsrc/frontend/ifu.sv @@ -11,6 +11,9 @@ module ifu #( input logic clk, input logic rst, + // Flush + input flush_i, + // <-> Fetch Target Queue input ftq_ifu_t ftq_i, output logic ftq_accept_o, // In current cycle @@ -50,6 +53,17 @@ module ifu #( end end + // Flush state + logic is_flushing; + always_ff @(posedge clk or negedge rst_n) begin + if (!rst_n) begin + is_flushing <= 0; + end else if (flush_i) begin + is_flushing <= 1; + end else if (icache_rvalid_i[0] | icache_rvalid_i[1]) begin + is_flushing <= 0; + end + end // P1 // Cacheline returned @@ -60,6 +74,8 @@ module ifu #( always_comb begin if (ftq_i.is_cross_cacheline) icache_result_valid = icache_rvalid_i[0] & icache_rvalid_i[1]; else icache_result_valid = icache_rvalid_i[0]; + + if (is_flushing) icache_result_valid = 0; end // FTQ input @@ -69,6 +85,8 @@ module ifu #( always_ff @(posedge clk or negedge rst_n) begin if (!rst_n) begin current_fetch_block <= 0; + end else if (flush_i) begin + current_fetch_block <= 0; end else begin current_fetch_block <= ftq_i; end diff --git a/src/vsrc/icache.sv b/src/vsrc/icache.sv index a1795c4..a3ddb2b 100644 --- a/src/vsrc/icache.sv +++ b/src/vsrc/icache.sv @@ -154,10 +154,10 @@ module icache #( logic rreq_1_delay1, rreq_2_delay1; logic [ADDR_WIDTH-1:0] raddr_1_delay1, raddr_2_delay1; always_ff @(posedge clk) begin - rreq_1_delay1 <= rreq_1_i; - rreq_2_delay1 <= rreq_2_i; - raddr_1_delay1 <= raddr_1_i; - raddr_2_delay1 <= raddr_2_i; + rreq_1_delay1 <= rreq_1_i; + rreq_2_delay1 <= rreq_2_i; + if (rreq_1_i) raddr_1_delay1 <= raddr_1_i; + if (rreq_2_i) raddr_2_delay1 <= raddr_2_i; end @@ -277,13 +277,13 @@ module icache #( // write this way if (state == REFILL_1_WAIT && axi_rvalid_i) begin tag_bram_we[i][0] = 1; - tag_bram_wdata[i][0] = {1'b1, raddr_1_i[31:12]}; + tag_bram_wdata[i][0] = {1'b1, raddr_1_delay1[31:12]}; data_bram_we[i][0] = 1; data_bram_wdata[i][0] = axi_data_i; end if (state == REFILL_2_WAIT && axi_rvalid_i) begin tag_bram_we[i][1] = 1; - tag_bram_wdata[i][1] = {1'b1, raddr_1_i[31:12]}; + tag_bram_wdata[i][1] = {1'b1, raddr_2_delay1[31:12]}; data_bram_we[i][1] = 1; data_bram_wdata[i][1] = axi_data_i; end From 27c94046d924085452e042619ce6f31c56357f6c Mon Sep 17 00:00:00 2001 From: Easton Man Date: Mon, 23 May 2022 10:39:31 +0800 Subject: [PATCH 14/41] feat: implement cross cacheline handle --- src/vsrc/frontend/frontend.sv | 2 +- src/vsrc/frontend/ifu.sv | 37 +++++++++++++++++++---------------- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/src/vsrc/frontend/frontend.sv b/src/vsrc/frontend/frontend.sv index b14b990..a9c75b4 100644 --- a/src/vsrc/frontend/frontend.sv +++ b/src/vsrc/frontend/frontend.sv @@ -66,7 +66,7 @@ module frontend #( bpu_ftq_block.start_pc = pc; bpu_ftq_block.valid = 1; bpu_ftq_block.length = 4; - bpu_ftq_block.is_cross_cacheline = 0; + bpu_ftq_block.is_cross_cacheline = (pc[3:2] != 2'b00); end else begin bpu_ftq_block = 0; end diff --git a/src/vsrc/frontend/ifu.sv b/src/vsrc/frontend/ifu.sv index 7662d2e..75ea446 100644 --- a/src/vsrc/frontend/ifu.sv +++ b/src/vsrc/frontend/ifu.sv @@ -45,8 +45,8 @@ module ifu #( if (ftq_input_valid) begin icache_rreq_o[0] = 1; icache_rreq_o[1] = ftq_i.is_cross_cacheline ? 1 : 0; - icache_raddr_o[0] = ftq_i.start_pc; - icache_raddr_o[1] = ftq_i.is_cross_cacheline ? ftq_i.start_pc + 16 : 0; // TODO: remove magic number + icache_raddr_o[0] = {ftq_i.start_pc[ADDR_WIDTH-1:4], 4'b0}; + icache_raddr_o[1] = ftq_i.is_cross_cacheline ? {ftq_i.start_pc[ADDR_WIDTH-1:4], 4'b0} + 16 : 0; // TODO: remove magic number end else begin icache_rreq_o = 0; icache_raddr_o = 0; @@ -66,19 +66,7 @@ module ifu #( end // P1 - // Cacheline returned - logic [FETCH_WIDTH-1:0][DATA_WIDTH-1:0] cacheline_0, cacheline_1; - assign cacheline_0 = icache_rdata_i[0]; - assign cacheline_1 = icache_rdata_i[1]; - logic icache_result_valid; - always_comb begin - if (ftq_i.is_cross_cacheline) icache_result_valid = icache_rvalid_i[0] & icache_rvalid_i[1]; - else icache_result_valid = icache_rvalid_i[0]; - - if (is_flushing) icache_result_valid = 0; - end - - // FTQ input + // FTQ input pass to P1 ftq_ifu_t current_fetch_block; logic [ADDR_WIDTH-1:0] debug_p1_pc = current_fetch_block.start_pc; // DEBUG logic [ADDR_WIDTH-1:0] debug_p0_pc = ftq_i.start_pc; // DEBUG @@ -91,11 +79,26 @@ module ifu #( current_fetch_block <= ftq_i; end end + // Cacheline returned + logic [FETCH_WIDTH-1:0][DATA_WIDTH-1:0] cacheline_0, cacheline_1; + assign cacheline_0 = icache_rdata_i[0]; + assign cacheline_1 = icache_rdata_i[1]; + logic icache_result_valid; + always_comb begin + if (current_fetch_block.is_cross_cacheline) + icache_result_valid = icache_rvalid_i[0] & icache_rvalid_i[1]; + else icache_result_valid = icache_rvalid_i[0]; + + if (is_flushing) icache_result_valid = 0; + end + // If last req to icache is valid, then accept another ftq input assign accept_ftq_input = icache_result_valid; // P2 // Send instr info to IB + logic [FETCH_WIDTH*2-1:0][DATA_WIDTH-1:0] cacheline_combined; + assign cacheline_combined = {cacheline_1, cacheline_0}; always_ff @(posedge clk or negedge rst_n) begin if (!rst_n) begin for (integer i = 0; i < FETCH_WIDTH; i++) begin @@ -111,11 +114,11 @@ module ifu #( instr_buffer_o[i].valid <= 1; instr_buffer_o[i].is_last_in_block <= 1; instr_buffer_o[i].pc <= current_fetch_block.start_pc + i * 4; // Instr is 4 bytes long - instr_buffer_o[i].instr <= cacheline_0[current_fetch_block.start_pc[3:2]+i]; + instr_buffer_o[i].instr <= cacheline_combined[current_fetch_block.start_pc[3:2]+i]; end else begin instr_buffer_o[i].valid <= 1; instr_buffer_o[i].pc <= current_fetch_block.start_pc + i * 4; // Instr is 4 bytes long - instr_buffer_o[i].instr <= cacheline_0[current_fetch_block.start_pc[3:2]+i]; + instr_buffer_o[i].instr <= cacheline_combined[current_fetch_block.start_pc[3:2]+i]; end end else begin instr_buffer_o[i] <= 0; From 099f8caece47577fe085d9f46d42ff762375bbcd Mon Sep 17 00:00:00 2001 From: Easton Man Date: Mon, 23 May 2022 20:48:11 +0800 Subject: [PATCH 15/41] fix: fix icache timing --- src/vsrc/frontend/ftq.sv | 4 ++-- src/vsrc/frontend/ifu.sv | 22 ++++++++++++++++++- src/vsrc/icache.sv | 47 +++++++++++++++++++++++++--------------- 3 files changed, 52 insertions(+), 21 deletions(-) diff --git a/src/vsrc/frontend/ftq.sv b/src/vsrc/frontend/ftq.sv index b172b06..bfd4e23 100644 --- a/src/vsrc/frontend/ftq.sv +++ b/src/vsrc/frontend/ftq.sv @@ -59,8 +59,8 @@ module ftq #( // If backend redirect triggered, back to comm_ptr if (backend_flush_i) begin - ifu_ptr <= comm_ptr; - bpu_ptr <= comm_ptr; + ifu_ptr <= comm_ptr + 1; + bpu_ptr <= comm_ptr + 1; end end end diff --git a/src/vsrc/frontend/ifu.sv b/src/vsrc/frontend/ifu.sv index 75ea446..6f75958 100644 --- a/src/vsrc/frontend/ifu.sv +++ b/src/vsrc/frontend/ifu.sv @@ -55,13 +55,33 @@ module ifu #( // Flush state logic is_flushing; + logic [1:0] flushing_rvalid; + always_ff @(posedge clk or negedge rst_n) begin + if (~rst_n) begin + flushing_rvalid <= 0; + end else if (flush_i) begin + flushing_rvalid <= 0; + end else begin + if (icache_rvalid_i[0]) flushing_rvalid[0] <= 1; + if (icache_rvalid_i[1]) flushing_rvalid[1] <= 1; + end + end + logic last_rreq_cross_cacheline; always_ff @(posedge clk or negedge rst_n) begin if (!rst_n) begin is_flushing <= 0; + last_rreq_cross_cacheline <= 0; end else if (flush_i) begin is_flushing <= 1; - end else if (icache_rvalid_i[0] | icache_rvalid_i[1]) begin + last_rreq_cross_cacheline <= current_fetch_block.is_cross_cacheline; + end else if (last_rreq_cross_cacheline) begin + if ((icache_rvalid_i | flushing_rvalid) == 2'b11) begin + is_flushing <= 0; + last_rreq_cross_cacheline <= 0; + end + end else if (icache_rvalid_i[0] == 1) begin is_flushing <= 0; + last_rreq_cross_cacheline <= 0; end end diff --git a/src/vsrc/icache.sv b/src/vsrc/icache.sv index a3ddb2b..7fbbb9d 100644 --- a/src/vsrc/icache.sv +++ b/src/vsrc/icache.sv @@ -125,7 +125,10 @@ module icache #( // Cache addr always_comb begin : cache_addr_gen for (integer i = 0; i < NWAY; i++) begin - if (rreq_1_i) begin + if (tag_bram_we[i][0]) begin + tag_bram_addr[i][0] = raddr_1_delay1[11:4]; + data_bram_addr[i][0] = raddr_1_delay1[11:4]; + end else if (rreq_1_i) begin tag_bram_addr[i][0] = raddr_1_i[11:4]; data_bram_addr[i][0] = raddr_1_i[11:4]; end else begin // TODO: write @@ -134,7 +137,10 @@ module icache #( end end for (integer i = 0; i < NWAY; i++) begin - if (rreq_2_i) begin + if (tag_bram_we[i][1]) begin + tag_bram_addr[i][1] = raddr_2_delay1[11:4]; + data_bram_addr[i][1] = raddr_2_delay1[11:4]; + end else if (rreq_2_i) begin tag_bram_addr[i][1] = raddr_2_i[11:4]; data_bram_addr[i][1] = raddr_2_i[11:4]; end else begin @@ -156,8 +162,8 @@ module icache #( always_ff @(posedge clk) begin rreq_1_delay1 <= rreq_1_i; rreq_2_delay1 <= rreq_2_i; - if (rreq_1_i) raddr_1_delay1 <= raddr_1_i; - if (rreq_2_i) raddr_2_delay1 <= raddr_2_i; + if (rreq_1_i & (rvalid_1 | state == IDLE)) raddr_1_delay1 <= raddr_1_i; + if (rreq_2_i & (rvalid_2 | state == IDLE)) raddr_2_delay1 <= raddr_2_i; end @@ -169,20 +175,23 @@ module icache #( end end + logic rvalid_1, rvalid_2; + assign rvalid_1_o = rvalid_1 && (rreq_1_delay1 || state == REFILL_1_WAIT); + assign rvalid_2_o = rvalid_2 && (rreq_2_delay1 || state == REFILL_2_WAIT); // Generate read output always_comb begin - rvalid_1_o = 0; - rdata_1_o = 0; - rvalid_2_o = 0; - rdata_2_o = 0; + rvalid_1 = 0; + rdata_1_o = 0; + rvalid_2 = 0; + rdata_2_o = 0; for (integer i = 0; i < NWAY; i++) begin if (tag_hit[i][0]) begin - rvalid_1_o = 1; - rdata_1_o = data_bram_rdata[i][0]; + rvalid_1 = 1; + rdata_1_o = data_bram_rdata[i][0]; end if (tag_hit[i][1]) begin - rvalid_2_o = 1; - rdata_2_o = data_bram_rdata[i][1]; + rvalid_2 = 1; + rdata_2_o = data_bram_rdata[i][1]; end end end @@ -226,15 +235,17 @@ module icache #( else next_state = REFILL_2_REQ; end REFILL_1_WAIT: begin - if (rvalid_1_o) begin - if (miss_2) next_state = REFILL_2_REQ; - else next_state = IDLE; + if (rvalid_1) begin + // if (miss_2) next_state = REFILL_2_REQ; + // else + next_state = IDLE; end else next_state = REFILL_1_WAIT; end REFILL_2_WAIT: begin - if (rvalid_2_o) begin - if (miss_1) next_state = REFILL_1_REQ; - else next_state = IDLE; + if (rvalid_2) begin + // if (miss_1) next_state = REFILL_1_REQ; + // else + next_state = IDLE; end else next_state = REFILL_2_WAIT; end default: begin From 239d3138f9934a170438349df0acfe650dfad2ea Mon Sep 17 00:00:00 2001 From: Easton Man Date: Mon, 23 May 2022 21:20:55 +0800 Subject: [PATCH 16/41] fix: fix AXI handshake signal --- src/vsrc/cpu_top.sv | 1 - src/vsrc/frontend/frontend.sv | 11 +++++++++-- src/vsrc/frontend/ftq.sv | 5 +++-- src/vsrc/icache.sv | 4 ++-- src/vsrc/instr_buffer.sv | 2 +- 5 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/vsrc/cpu_top.sv b/src/vsrc/cpu_top.sv index bd056da..ccdeb6e 100644 --- a/src/vsrc/cpu_top.sv +++ b/src/vsrc/cpu_top.sv @@ -184,7 +184,6 @@ module cpu_top ( logic [1:0][`InstAddrBus] frontend_icache_addr; // ICache -> Frontend - logic icache_frontend_stallreq; logic [1:0]icache_frontend_valid; logic [1:0][127:0] icache_frontend_data; // Cacheline is 128b diff --git a/src/vsrc/frontend/frontend.sv b/src/vsrc/frontend/frontend.sv index a9c75b4..e456449 100644 --- a/src/vsrc/frontend/frontend.sv +++ b/src/vsrc/frontend/frontend.sv @@ -52,7 +52,7 @@ module frontend #( always_comb begin : next_pc_comb if (backend_flush_i) begin next_pc = backend_next_pc_i; - end else if (instr_buffer_stallreq_i || ftq_full) begin + end else if (ftq_full) begin next_pc = pc; end else begin next_pc = pc + FETCH_WIDTH * 4; @@ -85,6 +85,9 @@ module frontend #( // Flush .backend_flush_i(backend_flush_i), + // <-> Frontend + .instr_buffer_stallreq_i(instr_buffer_stallreq_i), + // <-> BPU .bpu_i (bpu_ftq_block), .bpu_queue_full_o(ftq_full), @@ -98,6 +101,8 @@ module frontend #( ); + instr_buffer_info_t ifu_instr_output[FETCH_WIDTH]; + assign instr_buffer_o = instr_buffer_stallreq_i ? '{FETCH_WIDTH{0}} : ifu_instr_output; ifu u_ifu ( .clk(clk), .rst(rst), @@ -112,7 +117,9 @@ module frontend #( .icache_rvalid_i(icache_read_valid_i), .icache_rdata_i (icache_read_data_i), .stallreq_i (instr_buffer_stallreq_i), - .instr_buffer_o (instr_buffer_o) + + // <-> Frontend + .instr_buffer_o(ifu_instr_output) ); diff --git a/src/vsrc/frontend/ftq.sv b/src/vsrc/frontend/ftq.sv index bfd4e23..4c8c03b 100644 --- a/src/vsrc/frontend/ftq.sv +++ b/src/vsrc/frontend/ftq.sv @@ -10,6 +10,7 @@ module ftq #( // <-> Frontend input logic backend_flush_i, + input logic instr_buffer_stallreq_i, // <-> BPU input bpu_ftq_t bpu_i, @@ -54,8 +55,8 @@ module ftq #( comm_ptr <= 0; end else begin if (backend_commit_i) comm_ptr <= comm_ptr + 1; - if (ifu_accept_i) ifu_ptr <= ifu_ptr + 1; - if (bpu_i.valid) bpu_ptr <= bpu_ptr + 1; + if (ifu_accept_i & ~instr_buffer_stallreq_i) ifu_ptr <= ifu_ptr + 1; + if (bpu_i.valid & ~instr_buffer_stallreq_i) bpu_ptr <= bpu_ptr + 1; // If backend redirect triggered, back to comm_ptr if (backend_flush_i) begin diff --git a/src/vsrc/icache.sv b/src/vsrc/icache.sv index 7fbbb9d..faeae62 100644 --- a/src/vsrc/icache.sv +++ b/src/vsrc/icache.sv @@ -262,11 +262,11 @@ module icache #( case (state) REFILL_1_REQ, REFILL_1_WAIT: begin axi_rreq_o = 1; - axi_addr_o = raddr_1_i; + axi_addr_o = raddr_1_delay1; end REFILL_2_REQ, REFILL_2_WAIT: begin axi_rreq_o = 1; - axi_addr_o = raddr_2_i; + axi_addr_o = raddr_2_delay1; end default: begin end diff --git a/src/vsrc/instr_buffer.sv b/src/vsrc/instr_buffer.sv index 7b3705b..1d68f59 100644 --- a/src/vsrc/instr_buffer.sv +++ b/src/vsrc/instr_buffer.sv @@ -3,7 +3,7 @@ module instr_buffer #( parameter IF_WIDTH = 2, parameter ID_WIDTH = 2, - parameter BUFFER_SIZE = 8 + parameter BUFFER_SIZE = 16 ) ( input logic clk, input logic rst, From c8bff8a141a1f2083e332ed1488777007f1e3e70 Mon Sep 17 00:00:00 2001 From: Easton Man Date: Mon, 23 May 2022 21:51:40 +0800 Subject: [PATCH 17/41] fix: fix issues with IB full - still BUGGY --- src/vsrc/frontend/ifu.sv | 2 +- src/vsrc/instr_buffer.sv | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/vsrc/frontend/ifu.sv b/src/vsrc/frontend/ifu.sv index 6f75958..aa4bc52 100644 --- a/src/vsrc/frontend/ifu.sv +++ b/src/vsrc/frontend/ifu.sv @@ -129,7 +129,7 @@ module ifu #( // Default instr_buffer_o[i].is_last_in_block <= 0; - if (i < current_fetch_block.length && ~stallreq_i && icache_result_valid) begin + if (i < current_fetch_block.length && ~stallreq_i && icache_result_valid && ~is_flushing && ~flush_i) begin if (i == current_fetch_block.length - 1) begin instr_buffer_o[i].valid <= 1; instr_buffer_o[i].is_last_in_block <= 1; diff --git a/src/vsrc/instr_buffer.sv b/src/vsrc/instr_buffer.sv index 1d68f59..93cfa3a 100644 --- a/src/vsrc/instr_buffer.sv +++ b/src/vsrc/instr_buffer.sv @@ -25,12 +25,13 @@ module instr_buffer #( instr_buffer_info_t buffer_queue[BUFFER_SIZE], next_buffer_queue[BUFFER_SIZE]; - logic [$clog2(BUFFER_SIZE)-1:0] read_ptr, write_ptr, write_ptr_plus_2; + logic [$clog2(BUFFER_SIZE)-1:0] read_ptr, write_ptr; // Workaround, verilator seems to extend {write_ptr + 2} to more bits // we want a loopback counter, so declare a fixed width to get around - assign write_ptr_plus_2 = write_ptr + 2; - assign frontend_stallreq_o = (write_ptr_plus_2 == read_ptr); + logic [$clog2(BUFFER_SIZE)-1:0] buffer_clearance; + assign buffer_clearance = read_ptr - write_ptr; + assign frontend_stallreq_o = (buffer_clearance <= 4 && buffer_clearance != 0); // State transition always_ff @(posedge clk or negedge rst_n) begin : buffer_queue_ff From a2adf4aa3afca18df36b347e248b618d5b60f38d Mon Sep 17 00:00:00 2001 From: fly-high-to-iceland <1315617085@qq.com> Date: Mon, 30 May 2022 16:47:58 +0800 Subject: [PATCH 18/41] dcache --- mycpu | 1 + src/vsrc/dcache.sv | 374 ++++++++++++++++++++++++++++++++++++++++++++ src/vsrc/defines.sv | 235 ---------------------------- 3 files changed, 375 insertions(+), 235 deletions(-) create mode 160000 mycpu create mode 100644 src/vsrc/dcache.sv delete mode 100644 src/vsrc/defines.sv diff --git a/mycpu b/mycpu new file mode 160000 index 0000000..cd42a88 --- /dev/null +++ b/mycpu @@ -0,0 +1 @@ +Subproject commit cd42a882c4f4a2db413258234791b396e9961fc9 diff --git a/src/vsrc/dcache.sv b/src/vsrc/dcache.sv new file mode 100644 index 0000000..cbb92a3 --- /dev/null +++ b/src/vsrc/dcache.sv @@ -0,0 +1,374 @@ +`timescale 1ns / 1ps +////////////////////////////////////////////////////////////////////////////////// +// Company: +// Engineer: +// +// Create Date: 2022/04/21 17:24:48 +// Design Name: +// Module Name: cache +// Project Name: +// Target Devices: +// Tool Versions: +// Description: +// +// Dependencies: +// +// Revision: +// Revision 0.01 - File Created +// Additional Comments: +// +////////////////////////////////////////////////////////////////////////////////// + +//cache2飬ÿ4k +//VDTagData=1+1+20+128=150 + + +module cache( + input wire clk, + input wire rst, + //cacheCPUˮߵĽ + input wire valid, //Ч + input wire op, // 1:write 0: read + input wire uncache, //־uncacheָλЧ + input wire[7:0] index, // ַindex(addr[11:4]) + input wire[19:0] tag, //TLB鵽pfnγɵtag + input wire[3:0] offset, //ַoffsetaddr[3:0] + input wire[3:0] wstrb, //дֽʹź + input wire[31:0] wdata, //д + output reg addr_ok, //ôĵַOKַգдַݱ + output reg data_ok, //ôݴOkݷأдд + output reg[31:0] rdata, //CacheĽ + + //cacheAXIߵĽӿ + output reg rd_req, //ЧźšߵƽЧ + output reg[3:0] rd_type, //ͣ3'b000: ֽڣ3'b001: ֣3'b010: ֣3'b100Cache + output reg[31:0] rd_addr, //ʼַ + input wire rd_rdy, //ܷ񱻽յźšߵƽЧ + input wire ret_valid, //ЧߵƽЧ + input wire ret_last, //һζӦһ + input wire[31:0] ret_data, // + output reg wr_req, //дЧźšߵƽЧ + output reg[2:0] wr_type, //дͣ3'b000: ֽڣ3'b001: ֣3'b010: ֣3'b100Cache + output reg[31:0] wr_addr, //дʼַ + output reg[3:0] wr_wstrb, //дֽ롣дΪ3'b000: ֽڣ3'b001: ֣3'b010ֵ² + output reg[127:0] wr_data, //д + input wire wr_rdy //дܷ񱻽ܵźšp2234. + + + //SRAM-AXIתģеȷʵ + ); + +//״̬״̬ +//IDLECacheģ鵱ǰûκβ +//LOOKUPCacheģ鵱ǰִһҵõIJѯ +//MISSCacheģ鵱ǰIJCacheȱʧڵȴAXIߵwr_rdyź +//REPLACE滻CacheѾCacheжڵȴAXIߵrd_rdyź +//REFILLCacheȱʧķôѷ׼/ڽȱʧCacheдCache +parameter IDLE=0; +parameter LOOKUP=1; +parameter MISS=2; +parameter REPLACE=3; +parameter REFILL=4; +parameter WRITE = 5; +//Write Buffer״̬״̬ +//IDLE: Write Buffer״̬ǰûдд +//WRITE: ддCacheС״̬LOOKUP״̬ҷStoreCacheǣWrite Buffer״̬Write״̬ +//ͬʱWrite BufferĴStoreҪдIndex·šoffsetдʹ(д32λЩֽ)дݡ + + +parameter V=149; +parameter D=148; +parameter TagMSB=147; +parameter TagLSB=128; +parameter BlockMSB=127; +parameter BlockLSB=0; + +reg [149:0] cache_data [0:511]; +reg [2:0] state,next_state; +reg [2:0] wr_state,wr_next_state; +reg hit; +reg hit1; +reg hit2; +reg way; //hitway壬misswayʾһ· +reg write_op; //hit write ִб־ߵƽЧ +reg miss_way_r; //ȱʧ·дʹ + +//ַ32λ[31:12]ΪTag[11:4]ΪCacheindex, [3:0]:offset,Cacheƫ +wire [7:0]cpu_req_index; +wire [19:0]cpu_req_tag; +wire [3:0]cpu_req_offset; + +//wire cpu_req_uncache; +wire cpu_req_valid; +wire cpu_req_op; +wire[3:0] cpu_req_wstrb; +wire[31:0] cpu_req_wdata; + +wire cpu_rd_rdy; +wire cpu_wr_rdy; +wire cpu_ret_valid; +wire cpu_ret_last; +wire[31:0] cpu_ret_data; + +////ַ32λ[31:12]ΪTag[11:4]ΪCacheindex, [3:0]:offset,Cacheƫ +//reg [7:0]cpu_req_index; +//reg [19:0]cpu_req_tag; +//reg [3:0]cpu_req_offset; + +////wire cpu_req_uncache; +//reg cpu_req_valid; +//reg cpu_req_op; +//reg[3:0] cpu_req_wstrb; +//reg[31:0] cpu_req_wdata; + +//reg cpu_rd_rdy; +//reg cpu_wr_rdy; +//reg cpu_ret_valid; +//reg[1:0] cpu_ret_last; +//reg[31:0] cpu_ret_data; + +//hit write ͻ λЧ +reg hit_conflict = 0; + +assign cpu_req_valid = valid; +assign cpu_req_op = op; +assign cpu_req_uncache = uncache; +assign cpu_req_offset = offset; +assign cpu_req_index = index; +assign cpu_req_tag = tag; +assign cpu_req_wstrb = wstrb; +assign cpu_req_wdata = wdata; +assign cpu_rd_rdy = rd_rdy; +assign cpu_wr_rdy = wr_rdy; +assign cpu_ret_valid = ret_valid; +assign cpu_ret_last = ret_last; +assign cpu_ret_data = ret_data; + + +//дCacheִй +integer i; +//ʼcache +initial +begin + for(i=0;i<512;i=i+1) + cache_data[i]=150'd0; +end + + +always@(posedge clk,posedge rst)begin + if(!rst) + state<=IDLE; + else + state<=next_state; +end + +//state change +always@(*)begin + case(state) + IDLE:if(!cpu_req_valid ||( cpu_req_valid && hit_conflict )) + next_state=IDLE; + else + next_state=LOOKUP; + LOOKUP: if( (hit && !cpu_req_valid) || (hit && ( cpu_req_valid && hit_conflict)) ) //hit + next_state=IDLE; + else if( hit && cpu_req_valid ) + next_state=LOOKUP; + else if(!hit)begin + next_state=MISS; + + end + MISS:if(cpu_wr_rdy == 0) + next_state=MISS; + else if(cpu_wr_rdy == 1) + next_state=REPLACE; + REPLACE:if(cpu_rd_rdy == 0) + next_state=REPLACE; + else + next_state=REFILL; + REFILL:if(cpu_ret_valid == 1 && cpu_ret_last == 1) + next_state = IDLE; + else + next_state = REFILL; + + default:next_state=IDLE; + endcase +end +reg wr_buffer; +//Write BUffer state change +always@(*)begin + case(wr_state) + IDLE:if(hit && cpu_req_op && cpu_req_valid)begin + wr_next_state=WRITE; + end + else begin + wr_next_state=IDLE; + end + WRITE: if( (hit) && (cpu_req_op) ) //hit + wr_next_state=WRITE; + else + wr_next_state=IDLE; + + default:wr_next_state=IDLE; + endcase +end + + +//Tag compare +//hit1 +always@(*)begin + if(state==LOOKUP) + if(cache_data[2*cpu_req_index][V]==1'b1&&cache_data[2*cpu_req_index][TagMSB:TagLSB]==cpu_req_tag)begin + hit1=1'b1; + if(cpu_req_op == 1)begin + if( index == cpu_req_index && tag == cpu_req_tag)begin + hit_conflict = 1; + end + end + end + else + hit1=1'b0; + else + hit1=1'b0; +end +//hit2 +always@(*)begin + if(state==LOOKUP) + if(cache_data[2*cpu_req_index+1][V]==1'b1&&cache_data[2*cpu_req_index+1][TagMSB:TagLSB]==cpu_req_tag)begin + hit2=1'b1; + if( cpu_req_op == 1)begin + if( index == cpu_req_index && tag == cpu_req_tag)begin + hit_conflict = 1; + end + end + end + else + hit2=1'b0; + else + hit2=1'b0; +end +//hit +always@(*)begin + if(state==LOOKUP)begin + hit=hit1||hit2; + if(hit && cpu_req_op)begin + wr_state = WRITE; + end + end + else + hit=1'b0; +end + + +//LOOKUPģ: CacheкĶд---Data Select +always@(posedge clk)begin + if(state==LOOKUP && hit) + if( op==1'b0) //read hit + begin + addr_ok<=1'b1; + if(hit1)begin + rdata =cache_data[2*cpu_req_index][8*cpu_req_offset +:32]; + end + else begin + rdata =cache_data[2*cpu_req_index+1][8*cpu_req_offset +:32]; + end + end + + else if(wr_state==WRITE && hit) //write hit + begin + addr_ok <= 1'b1; + data_ok <= 1'b1; + if(hit1) + begin + cache_data[2*cpu_req_index][8*cpu_req_offset +:32] = wdata; + cache_data[2*cpu_req_index][D] =1'b1; + end + else + begin + cache_data[2*cpu_req_index+1][8*cpu_req_offset +:32] = wdata; + cache_data[2*cpu_req_index+1][D] = 1'b1; + end + if(cpu_req_op == 0)begin + if(cpu_req_offset[3:2] == offset[3:2])begin + hit_conflict = 1; + end + end + end +end + +//way LFSB --Miss Buffer +always@(*)begin + if( state==MISS )begin //δ + case({cache_data[2*cpu_req_index][V],cache_data[2*cpu_req_index+1][V]}) + 2'b01:way=1'b0; //0· + 2'b10:way=1'b1; //1· + 2'b00:way=1'b0; //01· + 2'b11:way=1'b0; //01·ãĬ滻0· + default:way=1'b0; + endcase + miss_way_r = 1; + end +end + +reg[1:0] rt_offset; +//AXIӿڵд +always @ (*)begin + if (state == MISS)begin // 洢ҪдݻеַϢ +// if(cpu_req_op == 1)begin +// if(cache_data[2*cpu_req_index + way][D])begin + +// end + +// end + rd_addr = {cpu_req_tag[19:0],cpu_req_index[7:0],cpu_req_offset}; + rd_type = 3'b000; + addr_ok = 1'b1; + data_ok <= 1'b1; + end + else if (state == REPLACE)begin + //滻еCacheд + if(wr_rdy) begin + if(cache_data[2*cpu_req_index+way][V:D] == 2'b11 )begin + wr_req = 1'b1; + wr_addr = {cache_data[2*cpu_req_index+way][TagMSB:TagLSB], cpu_req_index, 4'b0000}; + wr_wstrb = wstrb; + wr_data = cache_data[2*cpu_req_index+way][BlockMSB:BlockLSB]; + end + end + else begin + wr_req = 1'b0; + end + rd_req = 1'b1; + end + else begin + wr_req = 1'b0; + rd_req = 1'b0; + end +end +//Miss Buffer +always@(*)begin + if(state == REFILL)begin + if(cpu_req_op == 0)begin + cache_data[2*cpu_req_index+way][149:128] = {2'b10,cpu_req_tag}; + cache_data[2*cpu_req_index+way][rt_offset*32 +:32] = ret_data; + if(ret_last)begin + rt_offset = 0; + rd_req = 1'b0; + rdata = cache_data[2*cpu_req_index+way][cpu_req_index*8 +:32]; + end + end + if(cpu_req_op == 1 )begin + cache_data[2*cpu_req_index+way][149:128] = {2'b11,cpu_req_tag}; + cache_data[2*cpu_req_index+way][rt_offset * 8 +:32] = ret_data; + if(ret_last)begin + rt_offset = 0; + cache_data[2*cpu_req_index+way][cpu_req_index*8 +:32] = cpu_req_wdata; + end + end + rt_offset = rt_offset+ 1; + end +end + +endmodule + + + diff --git a/src/vsrc/defines.sv b/src/vsrc/defines.sv deleted file mode 100644 index cb24f5c..0000000 --- a/src/vsrc/defines.sv +++ /dev/null @@ -1,235 +0,0 @@ -`ifndef DEFINES_SV -`define DEFINES_SV - -// Global define -`define RstEnable 1'b1 -`define RstDisable 1'b0 -`define ZeroWord 32'h00000000 -`define WriteEnable 1'b1 -`define WriteDisable 1'b0 -`define ReadEnable 1'b1 -`define ReadDisable 1'b0 -`define AluOpBus 7:0 -`define AluSelBus 2:0 -`define InstValid 1'b1 -`define InstInvalid 1'b0 -`define Stop 1'b1 -`define NoStop 1'b0 -`define Branch 1'b1 -`define NotBranch 1'b0 -`define InterruptAssert 1'b1 -`define InterruptNotAssert 1'b0 -`define TrapAssert 1'b1 -`define TrapNotAssert 1'b0 -`define True_v 1'b1 -`define False_v 1'b0 -`define ChipEnable 1'b1 -`define ChipDisable 1'b0 - - -// Instruction Encode - - -// 2RI16-type -`define EXE_JIRL 6'b010011 -`define EXE_BEQ 6'b010110 -`define EXE_BNE 6'b010111 -`define EXE_BLT 6'b011000 -`define EXE_BGE 6'b011001 -`define EXE_BLTU 6'b011010 -`define EXE_BGEU 6'b011011 -`define EXE_LU12I_W 7'b0001010 -`define EXE_PCADDU12I 7'b0001110 - -`define EXE_B 6'b010100 -`define EXE_BL 6'b010101 - -// 6-12 bit opcode, decoded in opcode_2 -`define EXE_ATOMIC_MEM 6'b001000 -`define EXE_LL_W 8'b00100000 -`define EXE_SC_W 8'b00100001 - - -`define EXE_SPECIAL 8'b00000100 -`define EXE_CSR_RELATED 6'b00???? -`define EXE_OTHER 6'b100100 -`define EXE_CSRRD 5'b00000 -`define EXE_CSRWR 5'b00001 -`define EXE_CSRXCHG 5'b00011 -`define EXE_TLB_RELATED 5'b10000 - -// 2R-type -`define EXE_TLBSRCH 22'b0000011001001000001010 -`define EXE_TLBRD 22'b0000011001001000001011 -`define EXE_TLBWR 22'b0000011001001000001100 -`define EXE_TLBFILL 22'b0000011001001000001101 -`define EXE_ERTN 22'b0000011001001000001110 - -// 3R-type -`define EXE_ADD_W 17'b00000000000100000 -`define EXE_SUB_W 17'b00000000000100010 -`define EXE_SLT 17'b00000000000100100 -`define EXE_SLTU 17'b00000000000100101 -`define EXE_NOR 17'b00000000000101000 -`define EXE_AND 17'b00000000000101001 -`define EXE_OR 17'b00000000000101010 -`define EXE_XOR 17'b00000000000101011 -`define EXE_SLL_W 17'b00000000000101110 -`define EXE_SRL_W 17'b00000000000101111 -`define EXE_SRA_W 17'b00000000000110000 -`define EXE_MUL_W 17'b00000000000111000 -`define EXE_MULH_W 17'b00000000000111001 -`define EXE_MULH_WU 17'b00000000000111010 -`define EXE_DIV_W 17'b00000000001000000 -`define EXE_MOD_W 17'b00000000001000001 -`define EXE_DIV_WU 17'b00000000001000010 -`define EXE_MOD_WU 17'b00000000001000011 -`define EXE_BREAK 17'b00000000001010100 -`define EXE_SYSCALL 17'b00000000001010110 -// -`define EXE_IDLE 17'b00000110010010001 -`define EXE_INVTLB 17'b00000110010010011 -// 顺序核无需实现 -`define EXE_DBAR 17'b00111000011100100 -`define EXE_IBAR 17'b00111000011100101 - -// 2RI12-type -`define EXE_SLTI 10'b0000001000 -`define EXE_SLTUI 10'b0000001001 -`define EXE_ADDI_W 10'b0000001010 -`define EXE_ANDI 10'b0000001101 -`define EXE_ORI 10'b0000001110 -`define EXE_XORI 10'b0000001111 -`define EXE_LD_B 10'b0010100000 -`define EXE_LD_H 10'b0010100001 -`define EXE_LD_W 10'b0010100010 -`define EXE_ST_B 10'b0010100100 -`define EXE_ST_H 10'b0010100101 -`define EXE_ST_W 10'b0010100110 -`define EXE_LD_BU 10'b0010101000 -`define EXE_LD_HU 10'b0010101001 -`define EXE_PRELD 10'b0010101011 - -// 2RI16-type -`define EXE_JIRL 6'b010011 -`define EXE_BEQ 6'b010110 -`define EXE_BNE 6'b010111 -`define EXE_BLT 6'b011000 -`define EXE_BGE 6'b011001 -`define EXE_BLTU 6'b011010 -`define EXE_BGEU 6'b011011 - - - -`define EXE_SLLI_W 14'b00000000010000 // EXE_SHIFT_ARITH -`define EXE_SRLI_W 14'b00000000010001 -`define EXE_SRAI_W 14'b00000000010010 - - -`define EXE_NOP 22'b0 - - - -// AluOp -`define EXE_NOP_OP 8'b00000000 -`define EXE_OR_OP 8'b00000001 -`define EXE_AND_OP 8'b00000010 -`define EXE_XOR_OP 8'b00000011 -`define EXE_NOR_OP 8'b00000100 -`define EXE_LUI_OP 8'b00000101 -`define EXE_SLL_OP 8'b00110001 // HACK: Workaround -`define EXE_SRL_OP 8'b00000110 -`define EXE_SRA_OP 8'b00000111 -`define EXE_ADD_OP 8'b00001000 -`define EXE_SUB_OP 8'b00001001 -`define EXE_MUL_OP 8'b00001010 -`define EXE_MULH_OP 8'b00001011 -`define EXE_MULHU_OP 8'b00001100 -`define EXE_DIV_OP 8'b00001101 -`define EXE_DIVU_OP 8'b10001101 // HACK: DIVU and MODU should be re-mapped -`define EXE_MOD_OP 8'b00001110 -`define EXE_MODU_OP 8'b10001110 -`define EXE_SLT_OP 8'b00001111 -`define EXE_SLTU_OP 8'b00010000 -`define EXE_B_OP 8'b00010001 -`define EXE_BL_OP 8'b00010010 -`define EXE_BEQ_OP 8'b00010011 -`define EXE_BNE_OP 8'b00010100 -`define EXE_BLT_OP 8'b00010101 -`define EXE_BGE_OP 8'b00010110 -`define EXE_BLTU_OP 8'b00010111 -`define EXE_BGEU_OP 8'b00011000 -`define EXE_JIRL_OP 8'b00011001 -`define EXE_LD_B_OP 8'b00011010 -`define EXE_LD_H_OP 8'b00011011 -`define EXE_LD_W_OP 8'b00011100 -`define EXE_ST_B_OP 8'b00011101 -`define EXE_ST_H_OP 8'b00011110 -`define EXE_ST_W_OP 8'b00011111 -`define EXE_LD_BU_OP 8'b00100000 -`define EXE_LD_HU_OP 8'b00100001 -`define EXE_LL_OP 8'b00100010 -`define EXE_SC_OP 8'b00100011 -`define EXE_PCADD_OP 8'b00100100 -`define EXE_SYSCALL_OP 8'b00100101 -`define EXE_BREAK_OP 8'b00100110 -`define EXE_CSRRD_OP 8'b00100111 -`define EXE_CSRWR_OP 8'b00101000 -`define EXE_CSRXCHG_OP 8'b00101001 -`define EXE_TLBFILL_OP 8'b00101010 -`define EXE_TLBRD_OP 8'b00101011 -`define EXE_TLBWR_OP 8'b00101100 -`define EXE_TLBSRCH_OP 8'b00101101 -`define EXE_ERTN_OP 8'b00101110 -`define EXE_IDLE_OP 8'b00101111 -`define EXE_INVTLB_OP 8'b00110000 - - -//AluSel -`define EXE_RES_NOP 3'b000 -`define EXE_RES_LOGIC 3'b001 -`define EXE_RES_SHIFT 3'b010 -`define EXE_RES_MOVE 3'b011 -`define EXE_RES_ARITH 3'b100 -`define EXE_RES_JUMP 3'b101 -`define EXE_RES_LOAD_STORE 3'b110 -`define EXE_RES_CSR 3'b111 - - -// Rom related -`define InstAddrBus 31:0 -`define InstBus 31:0 -`define InstMemNum 131071 -`define InstMemNumLog2 17 - -// Registers -`define RegAddrBus 4:0 -`define RegBus 31:0 -`define RegWidth 32 -`define RegNum 32 -`define RegNumLog2 5 -`define NOPRegAddr 5'b00000 -`define DoubleRegBus 63:0 - -//data_ram -`define DataAddrBus 31:0 -`define DataBus 31:0 -`define DataMemNum 128 -`define DataMemNumLog2 17 -`define ByteWidth 7:0 - - -// SRAM latency -`define CacheLatency 0 - -typedef struct packed { - logic we; - logic [`RegAddrBus] addr; - logic [`RegBus] data; -} reg_write_signal; -//tlb-compare-part -//typedef struct packed { - -//} tlb_com_part; - -`endif From b25415a99c6b5ee8e14a650e91c75c29fdc0fa57 Mon Sep 17 00:00:00 2001 From: Easton Man Date: Mon, 30 May 2022 19:01:12 +0800 Subject: [PATCH 19/41] fix: fix mixed-assign usage & rewrite state using SV --- src/vsrc/dcache.sv | 76 +++++++++++++++++++++++++--------------------- 1 file changed, 41 insertions(+), 35 deletions(-) diff --git a/src/vsrc/dcache.sv b/src/vsrc/dcache.sv index 6f5e166..36194e1 100644 --- a/src/vsrc/dcache.sv +++ b/src/vsrc/dcache.sv @@ -5,7 +5,7 @@ // // Create Date: 2022/04/21 17:24:48 // Design Name: -// Module Name: cache +// Module Name: dcache // Project Name: // Target Devices: // Tool Versions: @@ -23,9 +23,10 @@ //V、D、Tag、Data=1+1+20+128=150 -module cache ( +module dcache ( input wire clk, input wire rst, + //cache与CPU流水线的交互接 input wire valid, //表明请求有效 input wire op, // 1:write 0: read @@ -64,12 +65,15 @@ module cache ( //MISS:Cache模块当前处理的操作Cache缺失,且正在等待AXI总线的wr_rdy信号 //REPLACE:待替换的Cache行已经从Cache中读出,且正在等待AXI总线的rd_rdy信号 //REFILL:Cache缺失的访存请求已发出,准备/正在将缺失的Cache行数据写入Cache中 - parameter IDLE = 0; - parameter LOOKUP = 1; - parameter MISS = 2; - parameter REPLACE = 3; - parameter REFILL = 4; - parameter WRITE = 5; + enum int { + IDLE, + LOOKUP, + MISS, + REPLACE, + REFILL, + WRITE + } + state, next_state; //Write Buffer状态机包括两个状态 //IDLE: Write Buffer状态机当前没有待写的数据 //WRITE: 将待写的数据写入Cache中。在主状态机处于LOOKUP状态且发现Store操作命中Cache是,触发Write Buffer状态机进入Write状态 @@ -84,7 +88,6 @@ module cache ( parameter BlockLSB = 0; reg [149:0] cache_data[0:511]; - reg [2:0] state, next_state; reg [2:0] wr_state, wr_next_state; reg hit; reg hit1; @@ -146,10 +149,9 @@ module cache ( //读写访问Cache的执行过程 - integer i; //初始化cache initial begin - for (i = 0; i < 512; i = i + 1) cache_data[i] = 150'd0; + for (integer i = 0; i < 512; i = i + 1) cache_data[i] = 0; end @@ -159,34 +161,38 @@ module cache ( end //state change - always @(*) begin + always_comb begin case (state) - IDLE: - if (!cpu_req_valid || (cpu_req_valid && hit_conflict)) next_state = IDLE; - else next_state = LOOKUP; - LOOKUP: - if ((hit && !cpu_req_valid) || (hit && (cpu_req_valid && hit_conflict))) //若hit - next_state = IDLE; - else if (hit && cpu_req_valid) next_state = LOOKUP; - else if (!hit) begin - next_state = MISS; - + IDLE: begin + if (!cpu_req_valid || (cpu_req_valid && hit_conflict)) next_state = IDLE; + else next_state = LOOKUP; + end + LOOKUP: begin + if ((hit && !cpu_req_valid) || (hit && (cpu_req_valid && hit_conflict))) //若hit + next_state = IDLE; + else if (hit && cpu_req_valid) next_state = LOOKUP; + else if (!hit) begin + next_state = MISS; + end + end + MISS: begin + if (cpu_wr_rdy == 0) next_state = MISS; + else if (cpu_wr_rdy == 1) next_state = REPLACE; + end + REPLACE: begin + if (cpu_rd_rdy == 0) next_state = REPLACE; + else next_state = REFILL; + end + REFILL: begin + if (cpu_ret_valid == 1 && cpu_ret_last == 1) next_state = IDLE; + else next_state = REFILL; end - MISS: - if (cpu_wr_rdy == 0) next_state = MISS; - else if (cpu_wr_rdy == 1) next_state = REPLACE; - REPLACE: - if (cpu_rd_rdy == 0) next_state = REPLACE; - else next_state = REFILL; - REFILL: - if (cpu_ret_valid == 1 && cpu_ret_last == 1) next_state = IDLE; - else next_state = REFILL; - default: next_state = IDLE; endcase end + reg wr_buffer; - //Write BUffer state change + //Write buffer state change always @(*) begin case (wr_state) IDLE: @@ -303,8 +309,8 @@ module cache ( // end rd_addr = {cpu_req_tag[19:0], cpu_req_index[7:0], cpu_req_offset}; rd_type = 3'b000; - addr_ok = 1'b1; - data_ok <= 1'b1; + // addr_ok = 1'b1; + // data_ok <= 1'b1; end else if (state == REPLACE) begin //将被替换行的Cache数据写入主存中 if (wr_rdy) begin From f114cc97d7960e6aa0b6045a2e742577ed067c99 Mon Sep 17 00:00:00 2001 From: Easton Man Date: Mon, 30 May 2022 19:27:00 +0800 Subject: [PATCH 20/41] feat: connect partial wires of dcache --- src/vsrc/cpu_top.sv | 68 ++++++++++++++++++++++++++++++++++++--------- src/vsrc/dcache.sv | 4 +-- 2 files changed, 57 insertions(+), 15 deletions(-) diff --git a/src/vsrc/cpu_top.sv b/src/vsrc/cpu_top.sv index ccdeb6e..dabb684 100644 --- a/src/vsrc/cpu_top.sv +++ b/src/vsrc/cpu_top.sv @@ -98,20 +98,23 @@ module cpu_top ( logic [`RegBus] icache_axi_addr; // MEM <-> AXI Controller - // TODO: replace with DCache - logic data_axi_we; - logic [`DataAddrBus] data_axi_addr; - logic [`RegBus] data_axi_data; + logic dcache_axi_rreq; // Read handshake + logic axi_dcache_rd_rdy; + logic axi_dcache_rvalid; + logic dcache_axi_wreq; // Write handshake + logic axi_dcache_wr_rdy; + logic [`DataAddrBus] dcache_axi_raddr; + logic [`DataAddrBus] dcache_axi_waddr; + logic [`DataAddrBus] dcache_axi_addr; + assign dcache_axi_addr = dcache_axi_rreq ? dcache_axi_raddr : dcache_axi_wreq ? dcache_axi_waddr : 0; + logic [127:0] axi_dcache_data; + logic [127:0] dcache_axi_data; logic [`RegBus] axi_mem_data; logic data_axi_busy; logic [3:0] data_axi_sel; // Byte selection mem_axi_struct mem_axi_signal[2]; - assign data_axi_we = mem_axi_signal[0].we | mem_axi_signal[1].we; - assign data_axi_addr = mem_axi_signal[0].ce ? mem_axi_signal[0].addr : mem_axi_signal[1].ce ? mem_axi_signal[1].addr : 32'b0; - assign data_axi_data = mem_axi_signal[0].ce ? mem_axi_signal[0].data : mem_axi_signal[1].ce ? mem_axi_signal[1].data : 32'b0; - assign data_axi_sel = mem_axi_signal[0].ce ? mem_axi_signal[0].sel : mem_axi_signal[1].ce ? mem_axi_signal[1].sel : 4'b0; axi_master u_axi_master ( .aclk (aclk), @@ -125,16 +128,22 @@ module cpu_top ( .icache_rd_req_i(icache_axi_rreq), .icache_rd_rdy_o(axi_icache_rdy), .icache_ret_valid_o(axi_icache_rvalid), - .icache_ret_last_o(), + .icache_ret_last_o(), // Used in burst transfer, currently unused - // <-> MEM Stage - .data_cpu_addr_i(data_axi_addr), + // <-> DCache + .data_cpu_addr_i(dcache_axi_addr), .data_cpu_sel_i(data_axi_sel), - .data_cpu_data_o(axi_mem_data), + .data_cpu_data_o(axi_dcache_data), .data_id(4'b0001), + .dcache_rd_req_i(dcache_axi_rreq), .dcache_rd_type_i(3'b000), // For [31:0] + .dcache_rd_rdy_o(axi_dcache_rd_rdy), + .dcache_ret_valid_o(axi_icache_rvalid), + .dcache_ret_last_o(), // same as ICache + .dcache_wr_req_i(dcache_axi_wreq), .dcache_wr_type_i(3'b000), - .dcache_wr_data({{96{1'b0}},data_axi_data}), + .dcache_wr_data(dcache_axi_data), + .dcache_wr_rdy(axi_dcache_rd_rdy), // External AXI signals @@ -176,6 +185,39 @@ module cpu_top ( .s_bready(bready) ); + dcache u_dcache( + .clk (clk ), + .rst (rst ), + + .valid (), + .op (), + .uncache (), + .index (), + .tag (), + .offset (), + .wstrb (), + .wdata (), + .addr_ok (), + .data_ok (), + .rdata (), + + // <-> AXI Controller + .rd_req (dcache_axi_rreq), + .rd_type (), + .rd_addr (dcache_axi_raddr), + .rd_rdy (axi_dcache_rd_rdy), + .ret_valid (axi_dcache_rvalid), + .ret_last (), + .ret_data (axi_dcache_data), + .wr_req (dcache_axi_wreq), + .wr_type (), + .wr_addr (dcache_axi_waddr), + .wr_wstrb (), + .wr_data (dcache_axi_data), + .wr_rdy (axi_dcache_wr_rdy) + ); + + // FETCH_WIDTH is 4 localparam FETCH_WIDTH = 4; diff --git a/src/vsrc/dcache.sv b/src/vsrc/dcache.sv index 36194e1..05f4838 100644 --- a/src/vsrc/dcache.sv +++ b/src/vsrc/dcache.sv @@ -36,7 +36,7 @@ module dcache ( input wire [3:0] offset, //地址的offset域addr[3:0] input wire [3:0] wstrb, //写字节使能信号 input wire [31:0] wdata, //写数据 - output reg addr_ok, //该次请求的地址传输OK,读:地址被接收;写:地址和数据别接收 + output reg addr_ok, //该次请求的地址传输OK,读:地址被接收;写:地址和数据被接收 output reg data_ok, //该次请求的数据传输Ok,读:数据返回;写:数据写入完成 output reg [31:0] rdata, //读Cache的结果 @@ -87,7 +87,7 @@ module dcache ( parameter BlockMSB = 127; parameter BlockLSB = 0; - reg [149:0] cache_data[0:511]; + reg [511:0][149:0] cache_data; reg [2:0] wr_state, wr_next_state; reg hit; reg hit1; From 633b7bfcfb340d14c9e8b897ba9d523c7bdae8bc Mon Sep 17 00:00:00 2001 From: Easton Man Date: Mon, 30 May 2022 19:59:09 +0800 Subject: [PATCH 21/41] fix: fix typo --- src/vsrc/cpu_top.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vsrc/cpu_top.sv b/src/vsrc/cpu_top.sv index dabb684..194437d 100644 --- a/src/vsrc/cpu_top.sv +++ b/src/vsrc/cpu_top.sv @@ -138,7 +138,7 @@ module cpu_top ( .dcache_rd_req_i(dcache_axi_rreq), .dcache_rd_type_i(3'b000), // For [31:0] .dcache_rd_rdy_o(axi_dcache_rd_rdy), - .dcache_ret_valid_o(axi_icache_rvalid), + .dcache_ret_valid_o(axi_dcache_rvalid), .dcache_ret_last_o(), // same as ICache .dcache_wr_req_i(dcache_axi_wreq), .dcache_wr_type_i(3'b000), From 3d5136ea48bb70377cd489623de127d45df38d87 Mon Sep 17 00:00:00 2001 From: Easton Man Date: Mon, 30 May 2022 20:57:29 +0800 Subject: [PATCH 22/41] fix: fix IB full stall --- src/vsrc/frontend/ftq.sv | 2 +- src/vsrc/frontend/ifu.sv | 2 ++ src/vsrc/icache.sv | 12 ++++++++++-- src/vsrc/instr_buffer.sv | 2 +- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/vsrc/frontend/ftq.sv b/src/vsrc/frontend/ftq.sv index 4c8c03b..f6ae6a8 100644 --- a/src/vsrc/frontend/ftq.sv +++ b/src/vsrc/frontend/ftq.sv @@ -56,7 +56,7 @@ module ftq #( end else begin if (backend_commit_i) comm_ptr <= comm_ptr + 1; if (ifu_accept_i & ~instr_buffer_stallreq_i) ifu_ptr <= ifu_ptr + 1; - if (bpu_i.valid & ~instr_buffer_stallreq_i) bpu_ptr <= bpu_ptr + 1; + if (bpu_i.valid) bpu_ptr <= bpu_ptr + 1; // If backend redirect triggered, back to comm_ptr if (backend_flush_i) begin diff --git a/src/vsrc/frontend/ifu.sv b/src/vsrc/frontend/ifu.sv index aa4bc52..9e1c742 100644 --- a/src/vsrc/frontend/ifu.sv +++ b/src/vsrc/frontend/ifu.sv @@ -124,6 +124,8 @@ module ifu #( for (integer i = 0; i < FETCH_WIDTH; i++) begin instr_buffer_o[i] <= 0; end + end else if (stallreq_i) begin + // Hold output end else begin for (integer i = 0; i < FETCH_WIDTH; i++) begin // Default diff --git a/src/vsrc/icache.sv b/src/vsrc/icache.sv index faeae62..91f8b4c 100644 --- a/src/vsrc/icache.sv +++ b/src/vsrc/icache.sv @@ -260,11 +260,19 @@ module icache #( axi_rreq_o = 0; axi_addr_o = 0; case (state) - REFILL_1_REQ, REFILL_1_WAIT: begin + REFILL_1_REQ: begin + axi_rreq_o = 1; + axi_addr_o = raddr_1_i; + end + REFILL_1_WAIT: begin axi_rreq_o = 1; axi_addr_o = raddr_1_delay1; end - REFILL_2_REQ, REFILL_2_WAIT: begin + REFILL_2_REQ: begin + axi_rreq_o = 1; + axi_addr_o = raddr_2_i; + end + REFILL_2_WAIT: begin axi_rreq_o = 1; axi_addr_o = raddr_2_delay1; end diff --git a/src/vsrc/instr_buffer.sv b/src/vsrc/instr_buffer.sv index 93cfa3a..38d08b6 100644 --- a/src/vsrc/instr_buffer.sv +++ b/src/vsrc/instr_buffer.sv @@ -3,7 +3,7 @@ module instr_buffer #( parameter IF_WIDTH = 2, parameter ID_WIDTH = 2, - parameter BUFFER_SIZE = 16 + parameter BUFFER_SIZE = 8 ) ( input logic clk, input logic rst, From f1e731e161a0734b04972d968eb8b3328930613a Mon Sep 17 00:00:00 2001 From: Easton Man Date: Mon, 30 May 2022 21:15:21 +0800 Subject: [PATCH 23/41] fix: fix flushing when rreq not really sent --- src/vsrc/frontend/ifu.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vsrc/frontend/ifu.sv b/src/vsrc/frontend/ifu.sv index 9e1c742..0b18e9f 100644 --- a/src/vsrc/frontend/ifu.sv +++ b/src/vsrc/frontend/ifu.sv @@ -73,7 +73,7 @@ module ifu #( last_rreq_cross_cacheline <= 0; end else if (flush_i) begin is_flushing <= 1; - last_rreq_cross_cacheline <= current_fetch_block.is_cross_cacheline; + last_rreq_cross_cacheline <= current_fetch_block.is_cross_cacheline & ftq_input_valid; end else if (last_rreq_cross_cacheline) begin if ((icache_rvalid_i | flushing_rvalid) == 2'b11) begin is_flushing <= 0; From 66bf976a35cd0dd0888865e698d709983bd60f2c Mon Sep 17 00:00:00 2001 From: Rookie-rookie-rookie <292601787@qq.com> Date: Mon, 30 May 2022 21:44:30 +0800 Subject: [PATCH 24/41] connect dcache to mem --- src/vsrc/cpu_top.sv | 38 ++++++---- src/vsrc/dcache.sv | 128 ++++++++++++++++----------------- src/vsrc/pipeline/4_mem/mem.sv | 114 ++++++++++++++--------------- src/vsrc/pipeline_defines.sv | 2 +- 4 files changed, 145 insertions(+), 137 deletions(-) diff --git a/src/vsrc/cpu_top.sv b/src/vsrc/cpu_top.sv index 194437d..6801829 100644 --- a/src/vsrc/cpu_top.sv +++ b/src/vsrc/cpu_top.sv @@ -109,13 +109,10 @@ module cpu_top ( assign dcache_axi_addr = dcache_axi_rreq ? dcache_axi_raddr : dcache_axi_wreq ? dcache_axi_waddr : 0; logic [127:0] axi_dcache_data; logic [127:0] dcache_axi_data; - logic [`RegBus] axi_mem_data; + logic [`RegBus] cache_mem_data; logic data_axi_busy; logic [3:0] data_axi_sel; // Byte selection - mem_axi_struct mem_axi_signal[2]; - - axi_master u_axi_master ( .aclk (aclk), .aresetn(aresetn), @@ -185,21 +182,32 @@ module cpu_top ( .s_bready(bready) ); + mem_cache_struct mem_cache_signal[2]; + logic mem_cache_we,mem_cache_ce; + logic [3:0] mem_cache_sel; + logic [31:0] mem_cache_addr,mem_cache_data; + + assign mem_cache_ce = mem_cache_signal[0].ce | mem_cache_signal[1].ce; + assign mem_cache_we = mem_cache_signal[0].we | mem_cache_signal[1].we; + assign mem_cache_sel = mem_cache_signal[0].we ? mem_cache_signal[0].sel : mem_cache_signal[0].we ? mem_cache_signal[1].sel : 0; + assign mem_cache_addr = mem_cache_signal[0].we ? mem_cache_signal[0].addr : mem_cache_signal[0].we ? mem_cache_signal[1].addr : 0; + assign mem_cache_data = mem_cache_signal[0].we ? mem_cache_signal[0].data : mem_cache_signal[0].we ? mem_cache_signal[1].data : 0; + dcache u_dcache( .clk (clk ), .rst (rst ), - .valid (), - .op (), - .uncache (), - .index (), - .tag (), - .offset (), - .wstrb (), - .wdata (), + .valid (mem_cache_ce), + .op (mem_cache_we), + .uncache (1'b0), + .index (mem_cache_addr[11:4]), + .tag (tlb_data_o.tag), + .offset (mem_cache_addr[3:0]), + .wstrb (mem_cache_sel), + .wdata (mem_cache_data), .addr_ok (), .data_ok (), - .rdata (), + .rdata (cache_mem_data), // <-> AXI Controller .rd_req (dcache_axi_rreq), @@ -541,11 +549,11 @@ module cpu_top ( .signal_o(mem_signal_o[i]), // -> AXI Controller - .signal_axi_o(mem_axi_signal[i]), + .signal_cache_o(mem_cache_signal[i]), // <- AXI Controller .axi_busy_i(data_axi_busy), - .mem_data_i(axi_mem_data), + .mem_data_i(cache_mem_data), // -> Ctrl .stallreq(mem_stallreq[i]), diff --git a/src/vsrc/dcache.sv b/src/vsrc/dcache.sv index 05f4838..f4b6723 100644 --- a/src/vsrc/dcache.sv +++ b/src/vsrc/dcache.sv @@ -24,36 +24,36 @@ module dcache ( - input wire clk, - input wire rst, + input logic clk, + input logic rst, //cache与CPU流水线的交互接 - input wire valid, //表明请求有效 - input wire op, // 1:write 0: read - input wire uncache, //标志uncache指令,高位有效 - input wire [7:0] index, // 地址的index域(addr[11:4]) - input wire [19:0] tag, //从TLB查到的pfn形成的tag - input wire [3:0] offset, //地址的offset域addr[3:0] - input wire [3:0] wstrb, //写字节使能信号 - input wire [31:0] wdata, //写数据 - output reg addr_ok, //该次请求的地址传输OK,读:地址被接收;写:地址和数据被接收 - output reg data_ok, //该次请求的数据传输Ok,读:数据返回;写:数据写入完成 - output reg [31:0] rdata, //读Cache的结果 + input logic valid, //表明请求有效 + input logic op, // 1:write 0: read + input logic uncache, //标志uncache指令,高位有效 + input logic [7:0] index, // 地址的index域(addr[11:4]) + input logic [19:0] tag, //从TLB查到的pfn形成的tag + input logic [3:0] offset, //地址的offset域addr[3:0] + input logic [3:0] wstrb, //写字节使能信号 + input logic [31:0] wdata, //写数据 + output logic addr_ok, //该次请求的地址传输OK,读:地址被接收;写:地址和数据被接收 + output logic data_ok, //该次请求的数据传输Ok,读:数据返回;写:数据写入完成 + output logic [31:0] rdata, //读Cache的结果 //cache与AXI总线的交互接口 - output reg rd_req, //读请求有效信号。高电平有效 - output reg[3:0] rd_type, //读请求类型:3'b000: 字节;3'b001: 半字;3'b010: 字;3'b100:Cache行 - output reg [31:0] rd_addr, //读请求起始地址 - input wire rd_rdy, //读请求能否被接收的握手信号。高电平有效 - input wire ret_valid, //返回数据有效。高电平有效。 - input wire ret_last, //返回数据是一次读请求对应的最后一个返回数据 - input wire [31:0] ret_data, //读返回数据 - output reg wr_req, //写请求有效信号。高电平有效 - output reg[2:0] wr_type, //写请求类型:3'b000: 字节;3'b001: 半字;3'b010: 字;3'b100:Cache行 - output reg [31:0] wr_addr, //写请求起始地址 - output reg[3:0] wr_wstrb, //写操作的字节掩码。仅在写请求类型为:3'b000: 字节;3'b001: 半字;3'b010:字的情况下才有意义 - output reg [127:0] wr_data, //写数据 - input wire wr_rdy //写请求能否被接受的握手信号。具体见p2234. + output logic rd_req, //读请求有效信号。高电平有效 + output logic[3:0] rd_type, //读请求类型:3'b000: 字节;3'b001: 半字;3'b010: 字;3'b100:Cache行 + output logic [31:0] rd_addr, //读请求起始地址 + input logic rd_rdy, //读请求能否被接收的握手信号。高电平有效 + input logic ret_valid, //返回数据有效。高电平有效。 + input logic ret_last, //返回数据是一次读请求对应的最后一个返回数据 + input logic [31:0] ret_data, //读返回数据 + output logic wr_req, //写请求有效信号。高电平有效 + output logic[2:0] wr_type, //写请求类型:3'b000: 字节;3'b001: 半字;3'b010: 字;3'b100:Cache行 + output logic [31:0] wr_addr, //写请求起始地址 + output logic[3:0] wr_wstrb, //写操作的字节掩码。仅在写请求类型为:3'b000: 字节;3'b001: 半字;3'b010:字的情况下才有意义 + output logic [127:0] wr_data, //写数据 + input logic wr_rdy //写请求能否被接受的握手信号。具体见p2234. //还需对类SRAM-AXI转接桥模块进行调整,随后确定实现 @@ -87,51 +87,51 @@ module dcache ( parameter BlockMSB = 127; parameter BlockLSB = 0; - reg [511:0][149:0] cache_data; - reg [2:0] wr_state, wr_next_state; - reg hit; - reg hit1; - reg hit2; - reg way; //若hit,则way无意义,若miss,则way表示分配的那一路 - reg write_op; //hit write 执行标志,高电平有效 - reg miss_way_r; //缺失路的写使能 + logic [511:0][149:0] cache_data; + logic [2:0] wr_state, wr_next_state; + logic hit; + logic hit1; + logic hit2; + logic way; //若hit,则way无意义,若miss,则way表示分配的那一路 + logic write_op; //hit write 执行标志,高电平有效 + logic miss_way_r; //缺失路的写使能 //虚地址共32位,[31:12]为Tag,[11:4]为Cache组索引index, [3:0]:offset,Cache行内偏移 - wire [ 7:0] cpu_req_index; - wire [19:0] cpu_req_tag; - wire [ 3:0] cpu_req_offset; + logic [ 7:0] cpu_req_index; + logic [19:0] cpu_req_tag; + logic [ 3:0] cpu_req_offset; //wire cpu_req_uncache; - wire cpu_req_valid; - wire cpu_req_op; - wire [ 3:0] cpu_req_wstrb; - wire [31:0] cpu_req_wdata; + logic cpu_req_valid; + logic cpu_req_op; + logic [ 3:0] cpu_req_wstrb; + logic [31:0] cpu_req_wdata; - wire cpu_rd_rdy; - wire cpu_wr_rdy; - wire cpu_ret_valid; - wire cpu_ret_last; - wire [31:0] cpu_ret_data; + logic cpu_rd_rdy; + logic cpu_wr_rdy; + logic cpu_ret_valid; + logic cpu_ret_last; + logic [31:0] cpu_ret_data; ////虚地址共32位,[31:12]为Tag,[11:4]为Cache组索引index, [3:0]:offset,Cache行内偏移 - //reg [7:0]cpu_req_index; - //reg [19:0]cpu_req_tag; - //reg [3:0]cpu_req_offset; - - ////wire cpu_req_uncache; - //reg cpu_req_valid; - //reg cpu_req_op; - //reg[3:0] cpu_req_wstrb; - //reg[31:0] cpu_req_wdata; - - //reg cpu_rd_rdy; - //reg cpu_wr_rdy; - //reg cpu_ret_valid; - //reg[1:0] cpu_ret_last; - //reg[31:0] cpu_ret_data; + //logic [7:0]cpu_req_index; + //logic [19:0]cpu_req_tag; + //logic [3:0]cpu_req_offset; + + ////logic cpu_req_uncache; + //logic cpu_req_valid; + //logic cpu_req_op; + //logic[3:0] cpu_req_wstrb; + //logic[31:0] cpu_req_wdata; + + //logic cpu_rd_rdy; + //logic cpu_wr_rdy; + //logic cpu_ret_valid; + //logic[1:0] cpu_ret_last; + //logic[31:0] cpu_ret_data; //hit write 冲突 高位有效 - reg hit_conflict = 0; + logic hit_conflict = 0; assign cpu_req_valid = valid; assign cpu_req_op = op; @@ -191,7 +191,7 @@ module dcache ( endcase end - reg wr_buffer; + logic wr_buffer; //Write buffer state change always @(*) begin case (wr_state) @@ -297,7 +297,7 @@ module dcache ( end end - reg [1:0] rt_offset; + logic [1:0] rt_offset; //对AXI接口的写操作 always @(*) begin if (state == MISS) begin // 存储要写的数据还有地址等信息 diff --git a/src/vsrc/pipeline/4_mem/mem.sv b/src/vsrc/pipeline/4_mem/mem.sv index 6264f5e..40b8add 100644 --- a/src/vsrc/pipeline/4_mem/mem.sv +++ b/src/vsrc/pipeline/4_mem/mem.sv @@ -9,7 +9,7 @@ module mem ( output mem_wb_struct signal_o, - output mem_axi_struct signal_axi_o, + output mem_cache_struct signal_cache_o, // -> Ctrl output stallreq, @@ -145,10 +145,10 @@ module mem ( signal_o.waddr = 0; signal_o.csr_signal = 0; signal_o.aluop = 0; - signal_axi_o = 0; + signal_cache_o = 0; LLbit_we_o = 1'b0; LLbit_value_o = 1'b0; - signal_axi_o = 0; + signal_cache_o = 0; signal_o.store_data = 0; end else begin LLbit_we_o = 1'b0; @@ -161,30 +161,30 @@ module mem ( signal_o.wdata = signal_i.wdata; signal_o.aluop = aluop_i; signal_o.csr_signal = signal_i.csr_signal; - signal_axi_o = 0; + signal_cache_o = 0; signal_o.store_data = 0; case (aluop_i) `EXE_LD_B_OP: begin - signal_axi_o.addr = mem_addr; + signal_cache_o.addr = mem_addr; signal_o.wreg = `WriteEnable; - signal_axi_o.ce = `ChipEnable; - signal_axi_o.sel = 4'b1111; + signal_cache_o.ce = `ChipEnable; + signal_cache_o.sel = 4'b1111; case (mem_addr[1:0]) 2'b11: begin signal_o.wdata = {{24{mem_data_i[31]}}, mem_data_i[31:24]}; - // signal_axi_o.sel = 4'b1000; + // signal_cache_o.sel = 4'b1000; end 2'b10: begin signal_o.wdata = {{24{mem_data_i[23]}}, mem_data_i[23:16]}; - // signal_axi_o.sel = 4'b0100; + // signal_cache_o.sel = 4'b0100; end 2'b01: begin signal_o.wdata = {{24{mem_data_i[15]}}, mem_data_i[15:8]}; - // signal_axi_o.sel = 4'b0010; + // signal_cache_o.sel = 4'b0010; end 2'b00: begin signal_o.wdata = {{24{mem_data_i[7]}}, mem_data_i[7:0]}; - // signal_axi_o.sel = 4'b0001; + // signal_cache_o.sel = 4'b0001; end default: begin signal_o.wdata = `ZeroWord; @@ -192,18 +192,18 @@ module mem ( endcase end `EXE_LD_H_OP: begin - signal_axi_o.addr = mem_addr; + signal_cache_o.addr = mem_addr; signal_o.wreg = `WriteEnable; - signal_axi_o.ce = `ChipEnable; + signal_cache_o.ce = `ChipEnable; case (mem_addr[1:0]) 2'b10: begin signal_o.wdata = {{16{mem_data_i[31]}}, mem_data_i[31:16]}; - signal_axi_o.sel = 4'b1100; + signal_cache_o.sel = 4'b1100; end 2'b00: begin signal_o.wdata = {{16{mem_data_i[15]}}, mem_data_i[15:0]}; - signal_axi_o.sel = 4'b0011; + signal_cache_o.sel = 4'b0011; end default: begin @@ -212,32 +212,32 @@ module mem ( endcase end `EXE_LD_W_OP: begin - signal_axi_o.addr = mem_addr; + signal_cache_o.addr = mem_addr; signal_o.wreg = `WriteEnable; - signal_axi_o.ce = `ChipEnable; - signal_axi_o.sel = 4'b1111; + signal_cache_o.ce = `ChipEnable; + signal_cache_o.sel = 4'b1111; signal_o.wdata = mem_data_i; end `EXE_LD_BU_OP: begin - signal_axi_o.addr = mem_addr; + signal_cache_o.addr = mem_addr; signal_o.wreg = `WriteEnable; - signal_axi_o.ce = `ChipEnable; + signal_cache_o.ce = `ChipEnable; case (mem_addr[1:0]) 2'b11: begin signal_o.wdata = {{24{1'b0}}, mem_data_i[31:24]}; - signal_axi_o.sel = 4'b1000; + signal_cache_o.sel = 4'b1000; end 2'b10: begin signal_o.wdata = {{24{1'b0}}, mem_data_i[23:16]}; - signal_axi_o.sel = 4'b0100; + signal_cache_o.sel = 4'b0100; end 2'b01: begin signal_o.wdata = {{24{1'b0}}, mem_data_i[15:8]}; - signal_axi_o.sel = 4'b0010; + signal_cache_o.sel = 4'b0010; end 2'b00: begin signal_o.wdata = {{24{1'b0}}, mem_data_i[7:0]}; - signal_axi_o.sel = 4'b0001; + signal_cache_o.sel = 4'b0001; end default: begin signal_o.wdata = `ZeroWord; @@ -245,17 +245,17 @@ module mem ( endcase end `EXE_LD_HU_OP: begin - signal_axi_o.addr = mem_addr; + signal_cache_o.addr = mem_addr; signal_o.wreg = `WriteEnable; - signal_axi_o.ce = `ChipEnable; + signal_cache_o.ce = `ChipEnable; case (mem_addr[1:0]) 2'b10: begin signal_o.wdata = {{16{1'b0}}, mem_data_i[31:16]}; - signal_axi_o.sel = 4'b1100; + signal_cache_o.sel = 4'b1100; end 2'b00: begin signal_o.wdata = {{16{1'b0}}, mem_data_i[15:0]}; - signal_axi_o.sel = 4'b0011; + signal_cache_o.sel = 4'b0011; end default: begin signal_o.wdata = `ZeroWord; @@ -263,76 +263,76 @@ module mem ( endcase end `EXE_ST_B_OP: begin - signal_axi_o.addr = mem_addr; + signal_cache_o.addr = mem_addr; signal_o.wreg = `WriteEnable; - signal_axi_o.we = `WriteEnable; - signal_axi_o.ce = `ChipEnable; - signal_axi_o.data = {reg2_i[7:0], reg2_i[7:0], reg2_i[7:0], reg2_i[7:0]}; + signal_cache_o.we = `WriteEnable; + signal_cache_o.ce = `ChipEnable; + signal_cache_o.data = {reg2_i[7:0], reg2_i[7:0], reg2_i[7:0], reg2_i[7:0]}; case (mem_addr[1:0]) 2'b11: begin - signal_axi_o.sel = 4'b1000; + signal_cache_o.sel = 4'b1000; signal_o.store_data = {reg2_i[7:0], 24'b0}; end 2'b10: begin - signal_axi_o.sel = 4'b0100; + signal_cache_o.sel = 4'b0100; signal_o.store_data = {8'b0, reg2_i[7:0], 16'b0}; end 2'b01: begin - signal_axi_o.sel = 4'b0010; + signal_cache_o.sel = 4'b0010; signal_o.store_data = {16'b0, reg2_i[7:0], 8'b0}; end 2'b00: begin - signal_axi_o.sel = 4'b0001; + signal_cache_o.sel = 4'b0001; signal_o.store_data = {24'b0, reg2_i[7:0]}; end endcase end `EXE_ST_H_OP: begin - signal_axi_o.addr = mem_addr; + signal_cache_o.addr = mem_addr; signal_o.wreg = `WriteEnable; - signal_axi_o.we = `WriteEnable; - signal_axi_o.ce = `ChipEnable; - signal_axi_o.data = {reg2_i[15:0], reg2_i[15:0]}; + signal_cache_o.we = `WriteEnable; + signal_cache_o.ce = `ChipEnable; + signal_cache_o.data = {reg2_i[15:0], reg2_i[15:0]}; case (mem_addr[1:0]) 2'b10: begin - signal_axi_o.sel = 4'b1100; + signal_cache_o.sel = 4'b1100; signal_o.store_data = {reg2_i[15:0], 16'b0}; end 2'b00: begin - signal_axi_o.sel = 4'b0011; + signal_cache_o.sel = 4'b0011; signal_o.store_data = {16'b0, reg2_i[15:0]}; end default: begin - signal_axi_o.sel = 4'b0000; + signal_cache_o.sel = 4'b0000; end endcase end `EXE_ST_W_OP: begin - signal_axi_o.addr = mem_addr; + signal_cache_o.addr = mem_addr; signal_o.wreg = `WriteEnable; - signal_axi_o.we = `WriteEnable; - signal_axi_o.ce = `ChipEnable; - signal_axi_o.data = reg2_i; - signal_axi_o.sel = 4'b1111; + signal_cache_o.we = `WriteEnable; + signal_cache_o.ce = `ChipEnable; + signal_cache_o.data = reg2_i; + signal_cache_o.sel = 4'b1111; signal_o.store_data = reg2_i; end `EXE_LL_OP: begin - signal_axi_o.addr = mem_addr; + signal_cache_o.addr = mem_addr; signal_o.wreg = `WriteDisable; - signal_axi_o.ce = `ChipEnable; - signal_axi_o.sel = 4'b1111; + signal_cache_o.ce = `ChipEnable; + signal_cache_o.sel = 4'b1111; signal_o.wdata = mem_data_i; LLbit_we_o = 1'b1; LLbit_value_o = 1'b1; end `EXE_SC_OP: begin if (LLbit == 1'b1) begin - signal_axi_o.addr = mem_addr; - signal_axi_o.we = `WriteEnable; + signal_cache_o.addr = mem_addr; + signal_cache_o.we = `WriteEnable; signal_o.wreg = `WriteEnable; - signal_axi_o.ce = `ChipEnable; - signal_axi_o.data = reg2_i; - signal_axi_o.sel = 4'b1111; + signal_cache_o.ce = `ChipEnable; + signal_cache_o.data = reg2_i; + signal_cache_o.sel = 4'b1111; LLbit_we_o = 1'b1; LLbit_value_o = 1'b0; signal_o.wdata = 32'b1; @@ -342,7 +342,7 @@ module mem ( end default: begin // Reset AXI signals, IMPORTANT! - signal_axi_o = 0; + signal_cache_o = 0; end endcase end diff --git a/src/vsrc/pipeline_defines.sv b/src/vsrc/pipeline_defines.sv index 9ef6fc5..4f919cd 100644 --- a/src/vsrc/pipeline_defines.sv +++ b/src/vsrc/pipeline_defines.sv @@ -112,7 +112,7 @@ typedef struct packed { logic [3:0] sel; logic [`DataAddrBus] addr; logic [`RegBus] data; -} mem_axi_struct; +} mem_cache_struct; typedef struct packed { logic we; From d2f3a8aef9c9e7960f2ba16034024a1ebfc597cc Mon Sep 17 00:00:00 2001 From: Easton Man Date: Mon, 30 May 2022 23:00:50 +0800 Subject: [PATCH 25/41] fix: fix IFU flushing state entry condition --- src/vsrc/frontend/ifu.sv | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/src/vsrc/frontend/ifu.sv b/src/vsrc/frontend/ifu.sv index 0b18e9f..b730ddc 100644 --- a/src/vsrc/frontend/ifu.sv +++ b/src/vsrc/frontend/ifu.sv @@ -31,10 +31,6 @@ module ifu #( output instr_buffer_info_t instr_buffer_o[FETCH_WIDTH] ); - // Reset signal - logic rst_n; - assign rst_n = ~rst; - logic accept_ftq_input; assign ftq_accept_o = accept_ftq_input; @@ -42,7 +38,8 @@ module ifu #( logic ftq_input_valid = ftq_i.valid; // Send addr to ICache always_comb begin - if (ftq_input_valid) begin + if (ftq_input_valid & ~is_flushing) begin + // Send rreq to ICache if FTQ input is valid and not in flushing state icache_rreq_o[0] = 1; icache_rreq_o[1] = ftq_i.is_cross_cacheline ? 1 : 0; icache_raddr_o[0] = {ftq_i.start_pc[ADDR_WIDTH-1:4], 4'b0}; @@ -56,8 +53,8 @@ module ifu #( // Flush state logic is_flushing; logic [1:0] flushing_rvalid; - always_ff @(posedge clk or negedge rst_n) begin - if (~rst_n) begin + always_ff @(posedge clk) begin + if (rst) begin flushing_rvalid <= 0; end else if (flush_i) begin flushing_rvalid <= 0; @@ -67,11 +64,16 @@ module ifu #( end end logic last_rreq_cross_cacheline; - always_ff @(posedge clk or negedge rst_n) begin - if (!rst_n) begin + always_ff @(posedge clk) begin + if (rst) begin is_flushing <= 0; last_rreq_cross_cacheline <= 0; - end else if (flush_i) begin + end else if (flush_i & current_fetch_block.valid & ~icache_result_valid) begin + // Enter a flushing state to wait for ICache return + // This state means a rreq is on-the-fly + // So require the following condition: + // 1. ICache does not return valid in P1 + // 2. RREQ is sent to ICache is_flushing <= 1; last_rreq_cross_cacheline <= current_fetch_block.is_cross_cacheline & ftq_input_valid; end else if (last_rreq_cross_cacheline) begin @@ -90,8 +92,8 @@ module ifu #( ftq_ifu_t current_fetch_block; logic [ADDR_WIDTH-1:0] debug_p1_pc = current_fetch_block.start_pc; // DEBUG logic [ADDR_WIDTH-1:0] debug_p0_pc = ftq_i.start_pc; // DEBUG - always_ff @(posedge clk or negedge rst_n) begin - if (!rst_n) begin + always_ff @(posedge clk) begin + if (rst) begin current_fetch_block <= 0; end else if (flush_i) begin current_fetch_block <= 0; @@ -112,15 +114,17 @@ module ifu #( if (is_flushing) icache_result_valid = 0; end - // If last req to icache is valid, then accept another ftq input - assign accept_ftq_input = icache_result_valid; + // If last req to icache is valid + // and not in a flushing state + // then accept another ftq input + assign accept_ftq_input = icache_result_valid & ~is_flushing; // P2 // Send instr info to IB logic [FETCH_WIDTH*2-1:0][DATA_WIDTH-1:0] cacheline_combined; assign cacheline_combined = {cacheline_1, cacheline_0}; - always_ff @(posedge clk or negedge rst_n) begin - if (!rst_n) begin + always_ff @(posedge clk) begin + if (rst) begin for (integer i = 0; i < FETCH_WIDTH; i++) begin instr_buffer_o[i] <= 0; end From b52c64483c2f1a72b14be784f350182aa234e582 Mon Sep 17 00:00:00 2001 From: Rookie-rookie-rookie <292601787@qq.com> Date: Tue, 31 May 2022 10:34:24 +0800 Subject: [PATCH 26/41] connect dcache, test fail --- src/vsrc/cpu_top.sv | 47 +++++++++++++-------------- src/vsrc/pipeline/4_mem/mem.sv | 53 ++++++------------------------- src/vsrc/pipeline/4_mem/mem_wb.sv | 49 ++++++++++++++++++++++++++-- src/vsrc/pipeline_defines.sv | 3 +- 4 files changed, 83 insertions(+), 69 deletions(-) diff --git a/src/vsrc/cpu_top.sv b/src/vsrc/cpu_top.sv index 6801829..8618c20 100644 --- a/src/vsrc/cpu_top.sv +++ b/src/vsrc/cpu_top.sv @@ -110,7 +110,7 @@ module cpu_top ( logic [127:0] axi_dcache_data; logic [127:0] dcache_axi_data; logic [`RegBus] cache_mem_data; - logic data_axi_busy; + logic mem_data_ok,mem_addr_ok; logic [3:0] data_axi_sel; // Byte selection axi_master u_axi_master ( @@ -200,13 +200,13 @@ module cpu_top ( .valid (mem_cache_ce), .op (mem_cache_we), .uncache (1'b0), - .index (mem_cache_addr[11:4]), + .index (tlb_data_o.index), .tag (tlb_data_o.tag), - .offset (mem_cache_addr[3:0]), + .offset (tlb_data_o.offset), .wstrb (mem_cache_sel), .wdata (mem_cache_data), - .addr_ok (), - .data_ok (), + .addr_ok (mem_addr_ok), + .data_ok (mem_data_ok), .rdata (cache_mem_data), // <-> AXI Controller @@ -418,15 +418,10 @@ module cpu_top ( logic excp_flush; logic ertn_flush; logic [31:0] csr_era_i; - logic [31:0] wb_csr_era[2]; logic [8:0] csr_esubcode_i; - logic [8:0] wb_csr_esubcode[2]; logic [5:0] csr_ecode_i; - logic [5:0] wb_csr_ecode[2]; logic va_error_i; - logic wb_va_error[2]; logic [31:0] bad_va_i; - logic [31:0] wb_bad_va[2]; logic tlbsrch_en; logic tlbsrch_found; logic [4:0] tlbsrch_index; @@ -538,7 +533,7 @@ module cpu_top ( assign csr_mem_signal = {csr_pg,csr_da,csr_dmw0,csr_dmw1,csr_plv,csr_datm}; //assign tlb_mem_signal = {data_tlb_found,data_tlb_index,data_tlb_v,data_tlb_d,data_tlb_mat,data_tlb_plv}; - logic wb_LLbit_we_i[2],wb_LLbit_value_i[2]; + logic wb_LLbit_we_i[2],wb_LLbit_value_i[2],data_fetch; generate for (genvar i = 0; i < 2; i++) begin : mem mem u_mem ( @@ -548,11 +543,13 @@ module cpu_top ( .signal_o(mem_signal_o[i]), - // -> AXI Controller + // -> cache .signal_cache_o(mem_cache_signal[i]), // <- AXI Controller - .axi_busy_i(data_axi_busy), + .addr_ok(mem_addr_ok), + .data_ok(mem_data_ok), + .data_fetch(data_fetch), .mem_data_i(cache_mem_data), // -> Ctrl @@ -564,20 +561,11 @@ module cpu_top ( .LLbit_we_o(mem_wb_LLbit_we[i]), .LLbit_value_o(mem_wb_LLbit_value[i]), - .csr_mem_signal(csr_mem_signal), - .disable_cache(1'b0), - // Data forward // -> Dispatch // -> EX - .mem_data_forward_o(mem_data_forward[i]), - - .data_addr_trans_en(mem_data_addr_trans_en[i]), - .dmw0_en(mem_data_dmw0_en[i]), - .dmw1_en(mem_data_dmw1_en[i]), - .cacop_op_mode_di(cacop_op_mode_di[i]), + .mem_data_forward_o(mem_data_forward[i]) - .tlb_mem_signal(tlb_mem_signal) ); end @@ -601,6 +589,17 @@ module cpu_top ( .flush(flush), + .csr_mem_signal(csr_mem_signal), + .disable_cache(1'b0), + + //<- tlb + .data_addr_trans_en(mem_data_addr_trans_en[i]), + .dmw0_en(mem_data_dmw0_en[i]), + .dmw1_en(mem_data_dmw1_en[i]), + .cacop_op_mode_di(cacop_op_mode_di[i]), + //-> tlb + .tlb_mem_signal(tlb_mem_signal), + //to ctrl .wb_ctrl_signal(wb_ctrl_signal[i]), @@ -741,6 +740,8 @@ module cpu_top ( assign data_addr_trans_en = mem_data_addr_trans_en[0] | mem_data_addr_trans_en[1]; assign tlb_data_i.dmw0_en = mem_data_dmw0_en[0] | mem_data_dmw0_en[1]; assign tlb_data_i.dmw1_en = mem_data_dmw1_en[0] | mem_data_dmw1_en[1]; + assign tlb_data_i.vaddr = mem_cache_addr; + assign tlb_data_i.fetch = data_fetch; inst_tlb_struct tlb_inst_i; tlb_inst_struct tlb_inst_o; diff --git a/src/vsrc/pipeline/4_mem/mem.sv b/src/vsrc/pipeline/4_mem/mem.sv index 40b8add..d2613cd 100644 --- a/src/vsrc/pipeline/4_mem/mem.sv +++ b/src/vsrc/pipeline/4_mem/mem.sv @@ -14,32 +14,22 @@ module mem ( // -> Ctrl output stallreq, - // <- AXI Controller - input logic axi_busy_i, + // <- cache + input logic addr_ok, + input logic data_ok, input logic [`RegBus] mem_data_i, + output data_fetch, + input logic LLbit_i, input logic wb_LLbit_we_i, input logic wb_LLbit_value_i, - //from csr - input csr_to_mem_struct csr_mem_signal, - input logic disable_cache, - // Data forward // -> Dispatch // -> EX output mem_data_forward_t mem_data_forward_o, - //to addr trans - output logic data_addr_trans_en, - output logic dmw0_en, - output logic dmw1_en, - output logic cacop_op_mode_di, - - //tlb - input tlb_to_mem_struct tlb_mem_signal, - output reg LLbit_we_o, output reg LLbit_value_o @@ -49,14 +39,6 @@ module mem ( logic access_mem; logic mem_store_op; logic mem_load_op; - logic excp_adem; - logic pg_mode; - logic da_mode; - logic excp_tlbr; - logic excp_pil; - logic excp_pis; - logic excp_pme; - logic excp_ppi; logic [`InstAddrBus] debug_pc_i; assign debug_pc_i = signal_i.instr_info.pc; @@ -75,9 +57,11 @@ module mem ( assign excp_i = signal_i.excp; assign excp_num_i = signal_i.excp_num; + assign data_fetch = addr_ok | aluop_i == `EXE_TLBSRCH_OP; + assign access_mem = mem_load_op || mem_store_op; - assign stallreq = axi_busy_i & (mem_load_op | mem_store_op); + assign stallreq = !data_ok & (mem_load_op | mem_store_op); assign mem_load_op = aluop_i == `EXE_LD_B_OP || aluop_i == `EXE_LD_BU_OP || aluop_i == `EXE_LD_H_OP || aluop_i == `EXE_LD_HU_OP || aluop_i == `EXE_LD_W_OP || aluop_i == `EXE_LL_OP; @@ -105,28 +89,11 @@ module mem ( assign signal_o.load_addr = mem_load_op ? mem_addr : 0; assign signal_o.store_addr = mem_store_op ? mem_addr : 0; - //addr dmw trans - assign dmw0_en = ((csr_mem_signal.csr_dmw0[`PLV0] && csr_mem_signal.csr_plv == 2'd0) || (csr_mem_signal.csr_dmw0[`PLV3] && csr_mem_signal.csr_plv == 2'd3)) && (signal_i.wdata[31:29] == csr_mem_signal.csr_dmw0[`VSEG]); - assign dmw1_en = ((csr_mem_signal.csr_dmw1[`PLV0] && csr_mem_signal.csr_plv == 2'd0) || (csr_mem_signal.csr_dmw1[`PLV3] && csr_mem_signal.csr_plv == 2'd3)) && (signal_i.wdata[31:29] == csr_mem_signal.csr_dmw1[`VSEG]); - - assign pg_mode = !csr_mem_signal.csr_da && csr_mem_signal.csr_pg; - assign da_mode = csr_mem_signal.csr_da && !csr_mem_signal.csr_pg; - - assign data_addr_trans_en = pg_mode && !dmw0_en && !dmw1_en && !cacop_op_mode_di; - // Data forward assign mem_data_forward_o = {mem_load_op, signal_o.wreg, signal_o.waddr, signal_o.wdata}; - assign excp_tlbr = access_mem && !tlb_mem_signal.data_tlb_found && data_addr_trans_en; - assign excp_pil = mem_load_op && !tlb_mem_signal.data_tlb_v && data_addr_trans_en; //cache will generate pil exception?? - assign excp_pis = mem_store_op && !tlb_mem_signal.data_tlb_v && data_addr_trans_en; - assign excp_ppi = access_mem && tlb_mem_signal.data_tlb_v && (csr_mem_signal.csr_plv > tlb_mem_signal.data_tlb_plv) && data_addr_trans_en; - assign excp_pme = mem_store_op && tlb_mem_signal.data_tlb_v && (csr_mem_signal.csr_plv <= tlb_mem_signal.data_tlb_plv) && !tlb_mem_signal.data_tlb_d && data_addr_trans_en; - - assign signal_o.excp = excp_tlbr || excp_pil || excp_pis || excp_ppi || excp_pme || excp_adem || excp_i; - assign signal_o.excp_num = { - excp_pil, excp_pis, excp_ppi, excp_pme, excp_tlbr, excp_adem, excp_num_i - }; + assign signal_o.excp = excp_i; + assign signal_o.excp_num = excp_num_i; assign signal_o.refetch = signal_i.refetch; always @(*) begin diff --git a/src/vsrc/pipeline/4_mem/mem_wb.sv b/src/vsrc/pipeline/4_mem/mem_wb.sv index 97673ac..827b045 100644 --- a/src/vsrc/pipeline/4_mem/mem_wb.sv +++ b/src/vsrc/pipeline/4_mem/mem_wb.sv @@ -12,6 +12,19 @@ module mem_wb ( input logic flush, + //<- csr + input csr_to_mem_struct csr_mem_signal, + input logic disable_cache, + + //<- tlb + output logic data_addr_trans_en, + output logic dmw0_en, + output logic dmw1_en, + output logic cacop_op_mode_di, + + //-> tlb + input tlb_to_mem_struct tlb_mem_signal, + // load store relate difftest output wb_ctrl wb_ctrl_signal, @@ -19,6 +32,38 @@ module mem_wb ( output logic is_last_in_block ); + logic excp,pg_mode,da_mode; + logic [15:0] excp_num; + logic access_mem,mem_store_op,mem_load_op; + logic excp_tlbr,excp_pil,excp_pis,excp_pme,excp_ppi,excp_adem; + + assign access_mem = mem_load_op || mem_store_op; + + assign mem_load_op = mem_signal_o.aluop == `EXE_LD_B_OP || mem_signal_o.aluop == `EXE_LD_BU_OP || mem_signal_o.aluop == `EXE_LD_H_OP || mem_signal_o.aluop == `EXE_LD_HU_OP || + mem_signal_o.aluop == `EXE_LD_W_OP || mem_signal_o.aluop == `EXE_LL_OP; + + assign mem_store_op = mem_signal_o.aluop == `EXE_ST_B_OP || mem_signal_o.aluop == `EXE_ST_H_OP || mem_signal_o.aluop == `EXE_ST_W_OP || mem_signal_o.aluop == `EXE_SC_OP; + + assign dmw0_en = ((csr_mem_signal.csr_dmw0[`PLV0] && csr_mem_signal.csr_plv == 2'd0) || (csr_mem_signal.csr_dmw0[`PLV3] && csr_mem_signal.csr_plv == 2'd3)) && (mem_signal_o.wdata[31:29] == csr_mem_signal.csr_dmw0[`VSEG]); + assign dmw1_en = ((csr_mem_signal.csr_dmw1[`PLV0] && csr_mem_signal.csr_plv == 2'd0) || (csr_mem_signal.csr_dmw1[`PLV3] && csr_mem_signal.csr_plv == 2'd3)) && (mem_signal_o.wdata[31:29] == csr_mem_signal.csr_dmw1[`VSEG]); + + assign pg_mode = !csr_mem_signal.csr_da && csr_mem_signal.csr_pg; + assign da_mode = csr_mem_signal.csr_da && !csr_mem_signal.csr_pg; + + assign data_addr_trans_en = pg_mode && !dmw0_en && !dmw1_en && !cacop_op_mode_di; + + assign excp_adem = 0; + assign excp_tlbr = access_mem && !tlb_mem_signal.data_tlb_found && data_addr_trans_en; + assign excp_pil = mem_load_op && !tlb_mem_signal.data_tlb_v && data_addr_trans_en; //cache will generate pil exception?? + assign excp_pis = mem_store_op && !tlb_mem_signal.data_tlb_v && data_addr_trans_en; + assign excp_ppi = access_mem && tlb_mem_signal.data_tlb_v && (csr_mem_signal.csr_plv > tlb_mem_signal.data_tlb_plv) && data_addr_trans_en; + assign excp_pme = mem_store_op && tlb_mem_signal.data_tlb_v && (csr_mem_signal.csr_plv <= tlb_mem_signal.data_tlb_plv) && !tlb_mem_signal.data_tlb_d && data_addr_trans_en; + + assign excp = excp_tlbr || excp_pil || excp_pis || excp_ppi || excp_pme || excp_adem || mem_signal_o.excp; + assign excp_num = { + excp_pil, excp_pis, excp_ppi, excp_pme, excp_tlbr, excp_adem, mem_signal_o.excp_num + }; + // For observability logic [`RegBus] debug_mem_wdata; assign debug_mem_wdata = mem_signal_o.wdata; @@ -42,8 +87,8 @@ module mem_wb ( wb_ctrl_signal.wb_reg_o.pc <= mem_signal_o.instr_info.pc; wb_ctrl_signal.llbit_o.we <= mem_LLbit_we; wb_ctrl_signal.llbit_o.value <= mem_LLbit_value; - wb_ctrl_signal.excp <= mem_signal_o.excp; - wb_ctrl_signal.excp_num <= mem_signal_o.excp_num; + wb_ctrl_signal.excp <= excp; + wb_ctrl_signal.excp_num <= excp_num; wb_ctrl_signal.fetch_flush <= mem_signal_o.refetch; wb_ctrl_signal.csr_signal_o <= mem_signal_o.csr_signal; wb_ctrl_signal.diff_commit_o.pc <= mem_signal_o.instr_info.pc; diff --git a/src/vsrc/pipeline_defines.sv b/src/vsrc/pipeline_defines.sv index 4f919cd..c8ad488 100644 --- a/src/vsrc/pipeline_defines.sv +++ b/src/vsrc/pipeline_defines.sv @@ -102,7 +102,7 @@ typedef struct packed { logic [`RegBus] store_data; logic excp; - logic [15:0] excp_num; + logic [9:0] excp_num; logic refetch; } mem_wb_struct; @@ -112,6 +112,7 @@ typedef struct packed { logic [3:0] sel; logic [`DataAddrBus] addr; logic [`RegBus] data; + logic uncache_en; } mem_cache_struct; typedef struct packed { From b9eb700146f2d7038e826a06b0fcf2cd12ac0046 Mon Sep 17 00:00:00 2001 From: Easton Man Date: Tue, 31 May 2022 15:02:51 +0800 Subject: [PATCH 27/41] refactor: rewrite IFU in pipeline design --- src/vsrc/frontend/ftq.sv | 10 +-- src/vsrc/frontend/ifu.sv | 145 ++++++++++++++++++++------------------- src/vsrc/icache.sv | 120 +++++++++++++++++--------------- 3 files changed, 144 insertions(+), 131 deletions(-) diff --git a/src/vsrc/frontend/ftq.sv b/src/vsrc/frontend/ftq.sv index f6ae6a8..c471ba2 100644 --- a/src/vsrc/frontend/ftq.sv +++ b/src/vsrc/frontend/ftq.sv @@ -21,7 +21,7 @@ module ftq #( // <-> IFU output ftq_ifu_t ifu_o, - input logic ifu_accept_i + input logic ifu_accept_i // Must return in the same cycle ); // Reset signal @@ -79,10 +79,10 @@ module ftq #( // Output // -> IFU - assign ifu_o.valid = FTQ[ifu_ptr+ifu_accept_i].valid; - assign ifu_o.is_cross_cacheline = FTQ[ifu_ptr+ifu_accept_i].is_cross_cacheline; - assign ifu_o.start_pc = FTQ[ifu_ptr+ifu_accept_i].start_pc; - assign ifu_o.length = FTQ[ifu_ptr+ifu_accept_i].length; + assign ifu_o.valid = FTQ[ifu_ptr].valid; + assign ifu_o.is_cross_cacheline = FTQ[ifu_ptr].is_cross_cacheline; + assign ifu_o.start_pc = FTQ[ifu_ptr].start_pc; + assign ifu_o.length = FTQ[ifu_ptr].length; // -> BPU logic [$clog2(QUEUE_SIZE)-1:0] bpu_ptr_plus1; // Limit the bit width diff --git a/src/vsrc/frontend/ifu.sv b/src/vsrc/frontend/ifu.sv index b730ddc..d06ad33 100644 --- a/src/vsrc/frontend/ifu.sv +++ b/src/vsrc/frontend/ifu.sv @@ -31,14 +31,14 @@ module ifu #( output instr_buffer_info_t instr_buffer_o[FETCH_WIDTH] ); - logic accept_ftq_input; - assign ftq_accept_o = accept_ftq_input; - - // P0 - logic ftq_input_valid = ftq_i.valid; - // Send addr to ICache + // P0 ////////////////////////////////////////////////////////////////////////////// + logic p0_send_rreq; + // Condition when to send rreq to ICache, see doc for detail + assign p0_send_rreq = ftq_i.valid & ~is_flushing & ~stallreq_i & ~p1_stallreq; + assign ftq_accept_o = p0_send_rreq; + // Send read req to ICache always_comb begin - if (ftq_input_valid & ~is_flushing) begin + if (p0_send_rreq) begin // Send rreq to ICache if FTQ input is valid and not in flushing state icache_rreq_o[0] = 1; icache_rreq_o[1] = ftq_i.is_cross_cacheline ? 1 : 0; @@ -50,79 +50,76 @@ module ifu #( end end + + // P1 ////////////////////////////////////////////////////////////////////////////// // Flush state - logic is_flushing; - logic [1:0] flushing_rvalid; - always_ff @(posedge clk) begin - if (rst) begin - flushing_rvalid <= 0; - end else if (flush_i) begin - flushing_rvalid <= 0; - end else begin - if (icache_rvalid_i[0]) flushing_rvalid[0] <= 1; - if (icache_rvalid_i[1]) flushing_rvalid[1] <= 1; - end - end - logic last_rreq_cross_cacheline; + logic is_flushing_r, is_flushing; + assign is_flushing = is_flushing_r | flush_i; always_ff @(posedge clk) begin if (rst) begin - is_flushing <= 0; - last_rreq_cross_cacheline <= 0; - end else if (flush_i & current_fetch_block.valid & ~icache_result_valid) begin - // Enter a flushing state to wait for ICache return - // This state means a rreq is on-the-fly - // So require the following condition: - // 1. ICache does not return valid in P1 - // 2. RREQ is sent to ICache - is_flushing <= 1; - last_rreq_cross_cacheline <= current_fetch_block.is_cross_cacheline & ftq_input_valid; - end else if (last_rreq_cross_cacheline) begin - if ((icache_rvalid_i | flushing_rvalid) == 2'b11) begin - is_flushing <= 0; - last_rreq_cross_cacheline <= 0; - end - end else if (icache_rvalid_i[0] == 1) begin - is_flushing <= 0; - last_rreq_cross_cacheline <= 0; + is_flushing_r <= 0; + end else if (flush_i & p1_read_transaction.valid & ~p1_read_done) begin + is_flushing_r <= 1; + end else if (p1_read_done) begin + is_flushing_r <= 0; end end - // P1 - // FTQ input pass to P1 - ftq_ifu_t current_fetch_block; - logic [ADDR_WIDTH-1:0] debug_p1_pc = current_fetch_block.start_pc; // DEBUG - logic [ADDR_WIDTH-1:0] debug_p0_pc = ftq_i.start_pc; // DEBUG + // P1 data structure + typedef struct packed { + logic valid; + logic [`InstAddrBus] start_pc; + logic is_cross_cacheline; + logic [$clog2(`FETCH_WIDTH+1)-1:0] length; + logic [1:0] icache_rvalid_r; + logic [1:0][CACHELINE_WIDTH-1:0] icache_rdata_r; + } read_transaction_t; + read_transaction_t p1_read_transaction; + + logic p1_read_done; // Read done is same cycle as ICache return valid + assign p1_read_done = p1_read_transaction.is_cross_cacheline ? + (icache_rvalid_i[0] | p1_read_transaction.icache_rvalid_r[0]) & (icache_rvalid_i[1]| p1_read_transaction.icache_rvalid_r[1]) : + (icache_rvalid_i[0] | p1_read_transaction.icache_rvalid_r[0]); + logic p1_stallreq; + assign p1_stallreq = p1_read_transaction.valid & ~p1_read_done; always_ff @(posedge clk) begin if (rst) begin - current_fetch_block <= 0; - end else if (flush_i) begin - current_fetch_block <= 0; + p1_read_transaction <= 0; + end else if (p0_send_rreq) begin + p1_read_transaction.valid <= 1; + p1_read_transaction.start_pc <= ftq_i.start_pc; + p1_read_transaction.is_cross_cacheline <= ftq_i.is_cross_cacheline; + p1_read_transaction.length <= ftq_i.length; + p1_read_transaction.icache_rvalid_r <= 0; + p1_read_transaction.icache_rdata_r <= 0; + end else if (p1_read_done & ~stallreq_i) begin + // Reset if done and not stalling + p1_read_transaction <= 0; end else begin - current_fetch_block <= ftq_i; + if (icache_rvalid_i[0]) begin + p1_read_transaction.icache_rvalid_r[0] <= 1; + p1_read_transaction.icache_rdata_r[0] <= icache_rdata_i[0]; + end + if (icache_rvalid_i[1]) begin + p1_read_transaction.icache_rvalid_r[1] <= 1; + p1_read_transaction.icache_rdata_r[1] <= icache_rdata_i[1]; + end end end - // Cacheline returned - logic [FETCH_WIDTH-1:0][DATA_WIDTH-1:0] cacheline_0, cacheline_1; - assign cacheline_0 = icache_rdata_i[0]; - assign cacheline_1 = icache_rdata_i[1]; - logic icache_result_valid; - always_comb begin - if (current_fetch_block.is_cross_cacheline) - icache_result_valid = icache_rvalid_i[0] & icache_rvalid_i[1]; - else icache_result_valid = icache_rvalid_i[0]; - if (is_flushing) icache_result_valid = 0; - end + // P1 debug, for observability + logic [ADDR_WIDTH-1:0] debug_p1_pc = p1_read_transaction.start_pc; // DEBUG + logic [ADDR_WIDTH-1:0] debug_p0_pc = ftq_i.start_pc; // DEBUG + logic [1:0] debug_p1_rvalid_r = p1_read_transaction.icache_rvalid_r; - // If last req to icache is valid - // and not in a flushing state - // then accept another ftq input - assign accept_ftq_input = icache_result_valid & ~is_flushing; - // P2 + // P2 ////////////////////////////////////////////////////////////////////////////////// // Send instr info to IB - logic [FETCH_WIDTH*2-1:0][DATA_WIDTH-1:0] cacheline_combined; - assign cacheline_combined = {cacheline_1, cacheline_0}; + logic [FETCH_WIDTH*2-1:0][DATA_WIDTH-1:0] cacheline_combined; // Same cycle as ICache return + assign cacheline_combined = { + icache_rvalid_i[1] ? icache_rdata_i[1] : p1_read_transaction.icache_rdata_r[1], + icache_rvalid_i[0] ? icache_rdata_i[0] : p1_read_transaction.icache_rdata_r[0] + }; always_ff @(posedge clk) begin if (rst) begin for (integer i = 0; i < FETCH_WIDTH; i++) begin @@ -130,26 +127,30 @@ module ifu #( end end else if (stallreq_i) begin // Hold output - end else begin + end else if (p1_read_done & ~is_flushing) begin for (integer i = 0; i < FETCH_WIDTH; i++) begin // Default instr_buffer_o[i].is_last_in_block <= 0; - if (i < current_fetch_block.length && ~stallreq_i && icache_result_valid && ~is_flushing && ~flush_i) begin - if (i == current_fetch_block.length - 1) begin + if (i < p1_read_transaction.length) begin + if (i == p1_read_transaction.length - 1) begin instr_buffer_o[i].valid <= 1; instr_buffer_o[i].is_last_in_block <= 1; - instr_buffer_o[i].pc <= current_fetch_block.start_pc + i * 4; // Instr is 4 bytes long - instr_buffer_o[i].instr <= cacheline_combined[current_fetch_block.start_pc[3:2]+i]; + instr_buffer_o[i].pc <= p1_read_transaction.start_pc + i * 4; // Instr is 4 bytes long + instr_buffer_o[i].instr <= cacheline_combined[p1_read_transaction.start_pc[3:2]+i]; end else begin instr_buffer_o[i].valid <= 1; - instr_buffer_o[i].pc <= current_fetch_block.start_pc + i * 4; // Instr is 4 bytes long - instr_buffer_o[i].instr <= cacheline_combined[current_fetch_block.start_pc[3:2]+i]; + instr_buffer_o[i].pc <= p1_read_transaction.start_pc + i * 4; // Instr is 4 bytes long + instr_buffer_o[i].instr <= cacheline_combined[p1_read_transaction.start_pc[3:2]+i]; end end else begin instr_buffer_o[i] <= 0; end end + end else begin + for (integer i = 0; i < FETCH_WIDTH; i++) begin + instr_buffer_o[i] <= 0; + end end end diff --git a/src/vsrc/icache.sv b/src/vsrc/icache.sv index 91f8b4c..0fadbbd 100644 --- a/src/vsrc/icache.sv +++ b/src/vsrc/icache.sv @@ -31,11 +31,6 @@ module icache #( input logic [CACHELINE_WIDTH-1:0] axi_data_i ); - // Reset signal - logic rst_n; - assign rst_n = ~rst; - - ///////////////////////////////////////////////// // PO, query BRAM //////////////////////////////////////////////// @@ -122,24 +117,24 @@ module icache #( end endgenerate - // Cache addr - always_comb begin : cache_addr_gen + // BRAM index gen + always_comb begin : bram_addr_gen for (integer i = 0; i < NWAY; i++) begin - if (tag_bram_we[i][0]) begin - tag_bram_addr[i][0] = raddr_1_delay1[11:4]; - data_bram_addr[i][0] = raddr_1_delay1[11:4]; + if (miss_1 | (state == REFILL_1_WAIT & ~rvalid_1)) begin + tag_bram_addr[i][0] = p1_raddr_1[11:4]; + data_bram_addr[i][0] = p1_raddr_1[11:4]; end else if (rreq_1_i) begin tag_bram_addr[i][0] = raddr_1_i[11:4]; data_bram_addr[i][0] = raddr_1_i[11:4]; - end else begin // TODO: write + end else begin tag_bram_addr[i][0] = 0; data_bram_addr[i][0] = 0; end end for (integer i = 0; i < NWAY; i++) begin - if (tag_bram_we[i][1]) begin - tag_bram_addr[i][1] = raddr_2_delay1[11:4]; - data_bram_addr[i][1] = raddr_2_delay1[11:4]; + if (miss_2 | (state == REFILL_2_WAIT & ~rvalid_2)) begin + tag_bram_addr[i][1] = p1_raddr_2[11:4]; + data_bram_addr[i][1] = p1_raddr_2[11:4]; end else if (rreq_2_i) begin tag_bram_addr[i][1] = raddr_2_i[11:4]; data_bram_addr[i][1] = raddr_2_i[11:4]; @@ -157,27 +152,31 @@ module icache #( /////////////////////////////////////////////////// // Input reg - logic rreq_1_delay1, rreq_2_delay1; - logic [ADDR_WIDTH-1:0] raddr_1_delay1, raddr_2_delay1; + logic p1_rreq_1, p1_rreq_2; + logic [ADDR_WIDTH-1:0] p1_raddr_1, p1_raddr_2; always_ff @(posedge clk) begin - rreq_1_delay1 <= rreq_1_i; - rreq_2_delay1 <= rreq_2_i; - if (rreq_1_i & (rvalid_1 | state == IDLE)) raddr_1_delay1 <= raddr_1_i; - if (rreq_2_i & (rvalid_2 | state == IDLE)) raddr_2_delay1 <= raddr_2_i; + if (rvalid_1_o | ~p1_rreq_1) begin + p1_rreq_1 <= rreq_1_i; + p1_raddr_1 <= raddr_1_i; + end + if (rvalid_2_o | ~p1_rreq_2) begin + p1_rreq_2 <= rreq_2_i; + p1_raddr_2 <= raddr_2_i; + end end logic [NWAY-1:0][1:0] tag_hit; always_comb begin for (integer i = 0; i < NWAY; i++) begin - tag_hit[i][0] = tag_bram_rdata[i][0][19:0] == raddr_1_delay1[ADDR_WIDTH-1:ADDR_WIDTH-20] && tag_bram_rdata[i][0][20]; - tag_hit[i][1] = tag_bram_rdata[i][1][19:0] == raddr_2_delay1[ADDR_WIDTH-1:ADDR_WIDTH-20] && tag_bram_rdata[i][1][20]; + tag_hit[i][0] = tag_bram_rdata[i][0][19:0] == p1_raddr_1[ADDR_WIDTH-1:ADDR_WIDTH-20] && tag_bram_rdata[i][0][20]; + tag_hit[i][1] = tag_bram_rdata[i][1][19:0] == p1_raddr_2[ADDR_WIDTH-1:ADDR_WIDTH-20] && tag_bram_rdata[i][1][20]; end end logic rvalid_1, rvalid_2; - assign rvalid_1_o = rvalid_1 && (rreq_1_delay1 || state == REFILL_1_WAIT); - assign rvalid_2_o = rvalid_2 && (rreq_2_delay1 || state == REFILL_2_WAIT); + assign rvalid_1_o = rvalid_1 && p1_rreq_1; + assign rvalid_2_o = rvalid_2 && p1_rreq_2; // Generate read output always_comb begin rvalid_1 = 0; @@ -206,8 +205,8 @@ module icache #( REFILL_2_WAIT } state, next_state; - always_ff @(posedge clk or negedge rst_n) begin - if (!rst_n) begin + always_ff @(posedge clk) begin + if (rst) begin state <= IDLE; end else begin state <= next_state; @@ -215,9 +214,34 @@ module icache #( end - logic miss_1, miss_2; - assign miss_1 = rreq_1_delay1 & ~rvalid_1_o; - assign miss_2 = rreq_2_delay1 & ~rvalid_2_o; + logic miss_1_pulse, miss_2_pulse, miss_1_r, miss_2_r, miss_1, miss_2; + assign miss_1_pulse = p1_rreq_1 & ~rvalid_1 & (state == IDLE); + assign miss_2_pulse = p1_rreq_2 & ~rvalid_2 & (state == IDLE); + assign miss_1 = miss_1_pulse | miss_1_r; + assign miss_2 = miss_2_pulse | miss_2_r; + always_ff @(posedge clk) begin + if (rst) begin + miss_1_r <= 0; + miss_2_r <= 0; + end else begin + case (state) + IDLE: begin + miss_1_r <= miss_1_pulse; + miss_2_r <= miss_2_pulse; + end + REFILL_1_WAIT: begin + if (axi_rvalid_i) miss_1_r <= 0; + end + REFILL_2_WAIT: begin + if (axi_rvalid_i) miss_2_r <= 0; + end + default: begin + end + endcase + end + end + + always_comb begin : transition_comb case (state) @@ -236,15 +260,12 @@ module icache #( end REFILL_1_WAIT: begin if (rvalid_1) begin - // if (miss_2) next_state = REFILL_2_REQ; - // else - next_state = IDLE; + if (miss_2) next_state = REFILL_2_REQ; + else next_state = IDLE; end else next_state = REFILL_1_WAIT; end REFILL_2_WAIT: begin if (rvalid_2) begin - // if (miss_1) next_state = REFILL_1_REQ; - // else next_state = IDLE; end else next_state = REFILL_2_WAIT; end @@ -254,29 +275,20 @@ module icache #( endcase end - // State machine output + // Read request to AXI Controller always_comb begin - // Default value - axi_rreq_o = 0; - axi_addr_o = 0; case (state) - REFILL_1_REQ: begin - axi_rreq_o = 1; - axi_addr_o = raddr_1_i; + REFILL_1_REQ, REFILL_1_WAIT: begin + axi_rreq_o = miss_1 ? 1 : 0; + axi_addr_o = miss_1 ? p1_raddr_1 : 0; end - REFILL_1_WAIT: begin - axi_rreq_o = 1; - axi_addr_o = raddr_1_delay1; - end - REFILL_2_REQ: begin - axi_rreq_o = 1; - axi_addr_o = raddr_2_i; - end - REFILL_2_WAIT: begin - axi_rreq_o = 1; - axi_addr_o = raddr_2_delay1; + REFILL_2_REQ, REFILL_2_WAIT: begin + axi_rreq_o = miss_2 ? 1 : 0; + axi_addr_o = miss_2 ? p1_raddr_2 : 0; end default: begin + axi_rreq_o = 0; + axi_addr_o = 0; end endcase end @@ -296,13 +308,13 @@ module icache #( // write this way if (state == REFILL_1_WAIT && axi_rvalid_i) begin tag_bram_we[i][0] = 1; - tag_bram_wdata[i][0] = {1'b1, raddr_1_delay1[31:12]}; + tag_bram_wdata[i][0] = {1'b1, p1_raddr_1[31:12]}; data_bram_we[i][0] = 1; data_bram_wdata[i][0] = axi_data_i; end if (state == REFILL_2_WAIT && axi_rvalid_i) begin tag_bram_we[i][1] = 1; - tag_bram_wdata[i][1] = {1'b1, raddr_2_delay1[31:12]}; + tag_bram_wdata[i][1] = {1'b1, p1_raddr_2[31:12]}; data_bram_we[i][1] = 1; data_bram_wdata[i][1] = axi_data_i; end From 9b7a32f1f60ba0076e34fb6a4b9160b687e303de Mon Sep 17 00:00:00 2001 From: Rookie-rookie-rookie <292601787@qq.com> Date: Tue, 31 May 2022 16:24:47 +0800 Subject: [PATCH 28/41] try to fix logic bug but fail --- src/vsrc/dcache.sv | 2 +- src/vsrc/tlb.sv | 10 +++++----- src/vsrc/tlb_entry.sv | 26 +++++++++----------------- 3 files changed, 15 insertions(+), 23 deletions(-) diff --git a/src/vsrc/dcache.sv b/src/vsrc/dcache.sv index f4b6723..324c2df 100644 --- a/src/vsrc/dcache.sv +++ b/src/vsrc/dcache.sv @@ -156,7 +156,7 @@ module dcache ( always @(posedge clk, posedge rst) begin - if (!rst) state <= IDLE; + if (rst) state <= IDLE; else state <= next_state; end diff --git a/src/vsrc/tlb.sv b/src/vsrc/tlb.sv index 9ae0d02..746bb94 100644 --- a/src/vsrc/tlb.sv +++ b/src/vsrc/tlb.sv @@ -22,10 +22,10 @@ module tlb //invtlb input tlb_inv_in_struct inv_signal_i, //from csr - input logic [31:0] csr_dmw0 , - input logic [31:0] csr_dmw1 , - input logic csr_da , - input logic csr_pg + input logic [31:0] csr_dmw0, + input logic [31:0] csr_dmw1, + input logic csr_da, + input logic csr_pg ); logic [18:0] s0_vppn ; @@ -148,4 +148,4 @@ assign data_o.offset = data_i.vaddr[3:0]; assign data_o.index = data_i.vaddr[11:4]; assign data_o.tag = data_addr_trans_en ? ((s1_ps == 6'd12) ? s1_ppn : {s1_ppn[19:10], data_paddr[21:12]}) : data_paddr[31:12]; -endmodule \ No newline at end of file +endmodule diff --git a/src/vsrc/tlb_entry.sv b/src/vsrc/tlb_entry.sv index dbef779..d84e801 100644 --- a/src/vsrc/tlb_entry.sv +++ b/src/vsrc/tlb_entry.sv @@ -77,8 +77,8 @@ generate end endgenerate -assign s0_found = !(!match0); -assign s1_found = !(!match1); +assign s0_found = match0 != 32'b0;//!(!match0); +assign s1_found = match1 != 32'b0;//!(!match1); @@ -132,23 +132,15 @@ generate for (i = 0; i < TLBNUM; i = i + 1) begin: invalid_tlb_entry always @(posedge clk) begin - if (we && (w_index == i)) begin + if (we && (w_index == i)) tlb_e[i] <= write_port.e; - end else if (inv_i.en) begin - if (inv_i.op == 5'd0 || inv_i.op == 5'd1) begin + if (inv_i.op == 5'd0 || inv_i.op == 5'd1) + tlb_e[i] <= 1'b0; + else if (inv_i.op == 5'd2 && tlb_g[i]) + tlb_e[i] <= 1'b0; + else if (inv_i.op == 5'd3 && !tlb_g[i]) tlb_e[i] <= 1'b0; - end - else if (inv_i.op == 5'd2) begin - if (tlb_g[i]) begin - tlb_e[i] <= 1'b0; - end - end - else if (inv_i.op == 5'd3) begin - if (!tlb_g[i]) begin - tlb_e[i] <= 1'b0; - end - end else if (inv_i.op == 5'd4) begin if (!tlb_g[i] && (tlb_asid[i] == inv_i.asid)) begin tlb_e[i] <= 1'b0; @@ -171,4 +163,4 @@ generate end endgenerate -endmodule \ No newline at end of file +endmodule From 6c2fe45617ccada1376610d95f0324cb5a7b1344 Mon Sep 17 00:00:00 2001 From: Easton Man Date: Tue, 31 May 2022 18:58:34 +0800 Subject: [PATCH 29/41] fix: fix logic loop --- src/vsrc/cpu_top.sv | 3 +- src/vsrc/dcache.sv | 78 ++++++++++++++++++++++----------------------- 2 files changed, 41 insertions(+), 40 deletions(-) diff --git a/src/vsrc/cpu_top.sv b/src/vsrc/cpu_top.sv index 8618c20..6ea924d 100644 --- a/src/vsrc/cpu_top.sv +++ b/src/vsrc/cpu_top.sv @@ -9,6 +9,7 @@ `include "frontend/frontend.sv" `include "instr_buffer.sv" `include "icache.sv" +`include "dcache.sv" `include "ctrl.sv" `include "pipeline_defines.sv" `include "pipeline/1_decode/id.sv" @@ -140,7 +141,7 @@ module cpu_top ( .dcache_wr_req_i(dcache_axi_wreq), .dcache_wr_type_i(3'b000), .dcache_wr_data(dcache_axi_data), - .dcache_wr_rdy(axi_dcache_rd_rdy), + .dcache_wr_rdy(axi_dcache_wr_rdy), // External AXI signals diff --git a/src/vsrc/dcache.sv b/src/vsrc/dcache.sv index 324c2df..e926b1d 100644 --- a/src/vsrc/dcache.sv +++ b/src/vsrc/dcache.sv @@ -24,18 +24,18 @@ module dcache ( - input logic clk, - input logic rst, + input logic clk, + input logic rst, //cache与CPU流水线的交互接 - input logic valid, //表明请求有效 - input logic op, // 1:write 0: read - input logic uncache, //标志uncache指令,高位有效 - input logic [7:0] index, // 地址的index域(addr[11:4]) - input logic [19:0] tag, //从TLB查到的pfn形成的tag - input logic [3:0] offset, //地址的offset域addr[3:0] - input logic [3:0] wstrb, //写字节使能信号 - input logic [31:0] wdata, //写数据 + input logic valid, //表明请求有效 + input logic op, // 1:write 0: read + input logic uncache, //标志uncache指令,高位有效 + input logic [7:0] index, // 地址的index域(addr[11:4]) + input logic [19:0] tag, //从TLB查到的pfn形成的tag + input logic [3:0] offset, //地址的offset域addr[3:0] + input logic [3:0] wstrb, //写字节使能信号 + input logic [31:0] wdata, //写数据 output logic addr_ok, //该次请求的地址传输OK,读:地址被接收;写:地址和数据被接收 output logic data_ok, //该次请求的数据传输Ok,读:数据返回;写:数据写入完成 output logic [31:0] rdata, //读Cache的结果 @@ -44,16 +44,16 @@ module dcache ( output logic rd_req, //读请求有效信号。高电平有效 output logic[3:0] rd_type, //读请求类型:3'b000: 字节;3'b001: 半字;3'b010: 字;3'b100:Cache行 output logic [31:0] rd_addr, //读请求起始地址 - input logic rd_rdy, //读请求能否被接收的握手信号。高电平有效 - input logic ret_valid, //返回数据有效。高电平有效。 - input logic ret_last, //返回数据是一次读请求对应的最后一个返回数据 - input logic [31:0] ret_data, //读返回数据 + input logic rd_rdy, //读请求能否被接收的握手信号。高电平有效 + input logic ret_valid, //返回数据有效。高电平有效。 + input logic ret_last, //返回数据是一次读请求对应的最后一个返回数据 + input logic [31:0] ret_data, //读返回数据 output logic wr_req, //写请求有效信号。高电平有效 output logic[2:0] wr_type, //写请求类型:3'b000: 字节;3'b001: 半字;3'b010: 字;3'b100:Cache行 output logic [31:0] wr_addr, //写请求起始地址 output logic[3:0] wr_wstrb, //写操作的字节掩码。仅在写请求类型为:3'b000: 字节;3'b001: 半字;3'b010:字的情况下才有意义 output logic [127:0] wr_data, //写数据 - input logic wr_rdy //写请求能否被接受的握手信号。具体见p2234. + input logic wr_rdy //写请求能否被接受的握手信号。具体见p2234. //还需对类SRAM-AXI转接桥模块进行调整,随后确定实现 @@ -89,29 +89,29 @@ module dcache ( logic [511:0][149:0] cache_data; logic [2:0] wr_state, wr_next_state; - logic hit; - logic hit1; - logic hit2; - logic way; //若hit,则way无意义,若miss,则way表示分配的那一路 - logic write_op; //hit write 执行标志,高电平有效 - logic miss_way_r; //缺失路的写使能 + logic hit; + logic hit1; + logic hit2; + logic way; //若hit,则way无意义,若miss,则way表示分配的那一路 + logic write_op; //hit write 执行标志,高电平有效 + logic miss_way_r; //缺失路的写使能 //虚地址共32位,[31:12]为Tag,[11:4]为Cache组索引index, [3:0]:offset,Cache行内偏移 - logic [ 7:0] cpu_req_index; - logic [19:0] cpu_req_tag; - logic [ 3:0] cpu_req_offset; + logic [ 7:0] cpu_req_index; + logic [19:0] cpu_req_tag; + logic [ 3:0] cpu_req_offset; //wire cpu_req_uncache; - logic cpu_req_valid; - logic cpu_req_op; - logic [ 3:0] cpu_req_wstrb; - logic [31:0] cpu_req_wdata; + logic cpu_req_valid; + logic cpu_req_op; + logic [ 3:0] cpu_req_wstrb; + logic [31:0] cpu_req_wdata; - logic cpu_rd_rdy; - logic cpu_wr_rdy; - logic cpu_ret_valid; - logic cpu_ret_last; - logic [31:0] cpu_ret_data; + logic cpu_rd_rdy; + logic cpu_wr_rdy; + logic cpu_ret_valid; + logic cpu_ret_last; + logic [31:0] cpu_ret_data; ////虚地址共32位,[31:12]为Tag,[11:4]为Cache组索引index, [3:0]:offset,Cache行内偏移 //logic [7:0]cpu_req_index; @@ -131,7 +131,7 @@ module dcache ( //logic[31:0] cpu_ret_data; //hit write 冲突 高位有效 - logic hit_conflict = 0; + logic hit_conflict = 0; assign cpu_req_valid = valid; assign cpu_req_op = op; @@ -155,7 +155,7 @@ module dcache ( end - always @(posedge clk, posedge rst) begin + always @(posedge clk) begin if (rst) state <= IDLE; else state <= next_state; end @@ -338,20 +338,20 @@ module dcache ( cache_data[2*cpu_req_index+way][149:128] = {2'b10, cpu_req_tag}; cache_data[2*cpu_req_index+way][rt_offset*32+:32] = ret_data; if (ret_last) begin - rt_offset = 0; + // rt_offset = 0; rd_req = 1'b0; - rdata = cache_data[2*cpu_req_index+way][cpu_req_index*8+:32]; + rdata = cache_data[2*cpu_req_index+way][cpu_req_index*8+:32]; end end if (cpu_req_op == 1) begin cache_data[2*cpu_req_index+way][149:128] = {2'b11, cpu_req_tag}; cache_data[2*cpu_req_index+way][rt_offset*8+:32] = ret_data; if (ret_last) begin - rt_offset = 0; + // rt_offset = 0; cache_data[2*cpu_req_index+way][cpu_req_index*8+:32] = cpu_req_wdata; end end - rt_offset = rt_offset + 1; + // rt_offset = rt_offset + 1; end end From 6f85b2afe6fcd872cc8b629f81e1c394d87b4fd4 Mon Sep 17 00:00:00 2001 From: Rookie-rookie-rookie <292601787@qq.com> Date: Tue, 31 May 2022 20:12:37 +0800 Subject: [PATCH 30/41] do nothing --- src/vsrc/dcache.sv | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/vsrc/dcache.sv b/src/vsrc/dcache.sv index e926b1d..52b1172 100644 --- a/src/vsrc/dcache.sv +++ b/src/vsrc/dcache.sv @@ -76,7 +76,7 @@ module dcache ( state, next_state; //Write Buffer状态机包括两个状态 //IDLE: Write Buffer状态机当前没有待写的数据 - //WRITE: 将待写的数据写入Cache中。在主状态机处于LOOKUP状态且发现Store操作命中Cache是,触发Write Buffer状态机进入Write状态 + //WRITE: 将待写的数据写入Cache中。在主状态机处于LOOKUP状态且发现Store操作命中Cache时,触发Write Buffer状态机进入Write状态 //同时Write Buffer会寄存Store要写入的Index、路号、offset、写使能(写32位数据里的那些字节)和写数据。 @@ -88,7 +88,7 @@ module dcache ( parameter BlockLSB = 0; logic [511:0][149:0] cache_data; - logic [2:0] wr_state, wr_next_state; + logic [31:0] wr_state, wr_next_state; logic hit; logic hit1; logic hit2; @@ -215,7 +215,7 @@ module dcache ( //hit1 always @(*) begin if (state == LOOKUP) - if(cache_data[2*cpu_req_index][V]==1'b1&&cache_data[2*cpu_req_index][TagMSB:TagLSB]==cpu_req_tag)begin + if(cache_data[2*cpu_req_index][V]==1'b1&&cache_data[2*cpu_req_index][TagMSB:TagLSB] == cpu_req_tag)begin hit1 = 1'b1; if (cpu_req_op == 1) begin if (index == cpu_req_index && tag == cpu_req_tag) begin @@ -228,7 +228,7 @@ module dcache ( //hit2 always @(*) begin if (state == LOOKUP) - if(cache_data[2*cpu_req_index+1][V]==1'b1&&cache_data[2*cpu_req_index+1][TagMSB:TagLSB]==cpu_req_tag)begin + if(cache_data[2*cpu_req_index+1][V]==1'b1&&cache_data[2*cpu_req_index+1][TagMSB:TagLSB] == cpu_req_tag)begin hit2 = 1'b1; if (cpu_req_op == 1) begin if (index == cpu_req_index && tag == cpu_req_tag) begin @@ -262,7 +262,7 @@ module dcache ( end end - else if(wr_state==WRITE && hit) //write hit + else if(wr_state == WRITE && hit) //write hit begin addr_ok <= 1'b1; data_ok <= 1'b1; From 089fd6980d738c2ae195f4a3c042385a8b4d71e7 Mon Sep 17 00:00:00 2001 From: Easton Man Date: Tue, 31 May 2022 20:30:46 +0800 Subject: [PATCH 31/41] docs: add comments in frontend components --- src/vsrc/frontend/frontend.sv | 5 +--- src/vsrc/frontend/ftq.sv | 28 ++++++++++++--------- src/vsrc/frontend/ifu.sv | 46 ++++++++++++++++++++++------------- 3 files changed, 46 insertions(+), 33 deletions(-) diff --git a/src/vsrc/frontend/frontend.sv b/src/vsrc/frontend/frontend.sv index e456449..ab994d1 100644 --- a/src/vsrc/frontend/frontend.sv +++ b/src/vsrc/frontend/frontend.sv @@ -75,10 +75,7 @@ module frontend #( ftq_ifu_t ftq_ifu_block; logic ifu_ftq_accept; - ftq #( - .FETCH_WIDTH(4), - .QUEUE_SIZE (4) - ) u_ftq ( + ftq u_ftq ( .clk(clk), .rst(rst), diff --git a/src/vsrc/frontend/ftq.sv b/src/vsrc/frontend/ftq.sv index c471ba2..2311627 100644 --- a/src/vsrc/frontend/ftq.sv +++ b/src/vsrc/frontend/ftq.sv @@ -24,14 +24,10 @@ module ftq #( input logic ifu_accept_i // Must return in the same cycle ); - // Reset signal - logic rst_n; - assign rst_n = ~rst; - // QUEUE data structure ftq_block_t [QUEUE_SIZE-1:0] FTQ, next_FTQ; - always_ff @(posedge clk or negedge rst_n) begin - if (~rst_n) begin + always_ff @(posedge clk) begin + if (rst) begin FTQ <= 0; end else begin FTQ <= next_FTQ; @@ -48,17 +44,24 @@ module ftq #( // PTR logic [$clog2(QUEUE_SIZE)-1:0] bpu_ptr, ifu_ptr, comm_ptr; - always_ff @(posedge clk or negedge rst_n) begin : ptr_ff - if (~rst_n) begin + always_ff @(posedge clk) begin : ptr_ff + if (rst) begin bpu_ptr <= 0; ifu_ptr <= 0; comm_ptr <= 0; end else begin + // Backend committed, means that current comm_ptr block is done if (backend_commit_i) comm_ptr <= comm_ptr + 1; - if (ifu_accept_i & ~instr_buffer_stallreq_i) ifu_ptr <= ifu_ptr + 1; + + // If block is accepted by IFU, ifu_ptr++ + // IB full should result in IFU not accepting FTQ input + if (ifu_accept_i) ifu_ptr <= ifu_ptr + 1; + + // BPU ptr if (bpu_i.valid) bpu_ptr <= bpu_ptr + 1; - // If backend redirect triggered, back to comm_ptr + // If backend redirect triggered, back to comm_ptr + 1 + // Since FTQ is cleared out, so not pending block if (backend_flush_i) begin ifu_ptr <= comm_ptr + 1; bpu_ptr <= comm_ptr + 1; @@ -68,11 +71,12 @@ module ftq #( // next_FTQ always_comb begin : next_FTQ_comb - // Default, no change + // Default no change next_FTQ = FTQ; + // clear out if committed if (backend_commit_i) next_FTQ[comm_ptr] = 0; + // Accept BPU input if (bpu_i.valid) next_FTQ[bpu_ptr] = bpu_i; - // If backend redirect triggered, clear FTQ if (backend_flush_i) next_FTQ = 0; end diff --git a/src/vsrc/frontend/ifu.sv b/src/vsrc/frontend/ifu.sv index d06ad33..bbae041 100644 --- a/src/vsrc/frontend/ifu.sv +++ b/src/vsrc/frontend/ifu.sv @@ -30,12 +30,13 @@ module ifu #( input logic stallreq_i, output instr_buffer_info_t instr_buffer_o[FETCH_WIDTH] ); - - // P0 ////////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////// + // P0, send read req to ICache + ///////////////////////////////////////////////////////////////////////////////// logic p0_send_rreq; // Condition when to send rreq to ICache, see doc for detail assign p0_send_rreq = ftq_i.valid & ~is_flushing & ~stallreq_i & ~p1_stallreq; - assign ftq_accept_o = p0_send_rreq; + assign ftq_accept_o = p0_send_rreq; // FTQ handshake, same cycle as ftq_i // Send read req to ICache always_comb begin if (p0_send_rreq) begin @@ -50,17 +51,20 @@ module ifu #( end end - - // P1 ////////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////// + // P1 + ///////////////////////////////////////////////////////////////////////////////// // Flush state logic is_flushing_r, is_flushing; assign is_flushing = is_flushing_r | flush_i; - always_ff @(posedge clk) begin + always_ff @(posedge clk) begin : is_flushing_ff if (rst) begin is_flushing_r <= 0; end else if (flush_i & p1_read_transaction.valid & ~p1_read_done) begin + // Enter a flusing state if flush_i and read transaction on-the-fly is_flushing_r <= 1; end else if (p1_read_done) begin + // Reset when read transaction is done is_flushing_r <= 0; end end @@ -80,12 +84,13 @@ module ifu #( assign p1_read_done = p1_read_transaction.is_cross_cacheline ? (icache_rvalid_i[0] | p1_read_transaction.icache_rvalid_r[0]) & (icache_rvalid_i[1]| p1_read_transaction.icache_rvalid_r[1]) : (icache_rvalid_i[0] | p1_read_transaction.icache_rvalid_r[0]); - logic p1_stallreq; + logic p1_stallreq; // Currently in transaction and not done yet assign p1_stallreq = p1_read_transaction.valid & ~p1_read_done; - always_ff @(posedge clk) begin + always_ff @(posedge clk) begin : p1_ff if (rst) begin p1_read_transaction <= 0; end else if (p0_send_rreq) begin + // If P0 sent rreq to ICache, move info from P0 to P1 p1_read_transaction.valid <= 1; p1_read_transaction.start_pc <= ftq_i.start_pc; p1_read_transaction.is_cross_cacheline <= ftq_i.is_cross_cacheline; @@ -96,6 +101,8 @@ module ifu #( // Reset if done and not stalling p1_read_transaction <= 0; end else begin + // Store rvalid in P1 data structure + // This is required since ICache do not guarantee rvalid of the two ports is returned in the same cycle if (icache_rvalid_i[0]) begin p1_read_transaction.icache_rvalid_r[0] <= 1; p1_read_transaction.icache_rdata_r[0] <= icache_rdata_i[0]; @@ -107,20 +114,22 @@ module ifu #( end end + logic [FETCH_WIDTH*2-1:0][DATA_WIDTH-1:0] cacheline_combined; // Same cycle as ICache return, used in P2 + assign cacheline_combined = { + icache_rvalid_i[1] ? icache_rdata_i[1] : p1_read_transaction.icache_rdata_r[1], + icache_rvalid_i[0] ? icache_rdata_i[0] : p1_read_transaction.icache_rdata_r[0] + }; + // P1 debug, for observability logic [ADDR_WIDTH-1:0] debug_p1_pc = p1_read_transaction.start_pc; // DEBUG logic [ADDR_WIDTH-1:0] debug_p0_pc = ftq_i.start_pc; // DEBUG logic [1:0] debug_p1_rvalid_r = p1_read_transaction.icache_rvalid_r; - // P2 ////////////////////////////////////////////////////////////////////////////////// - // Send instr info to IB - logic [FETCH_WIDTH*2-1:0][DATA_WIDTH-1:0] cacheline_combined; // Same cycle as ICache return - assign cacheline_combined = { - icache_rvalid_i[1] ? icache_rdata_i[1] : p1_read_transaction.icache_rdata_r[1], - icache_rvalid_i[0] ? icache_rdata_i[0] : p1_read_transaction.icache_rdata_r[0] - }; - always_ff @(posedge clk) begin + ///////////////////////////////////////////////////////////////////////////////// + // P2, send instr info to IB + ///////////////////////////////////////////////////////////////////////////////// + always_ff @(posedge clk) begin : p2_ff if (rst) begin for (integer i = 0; i < FETCH_WIDTH; i++) begin instr_buffer_o[i] <= 0; @@ -128,6 +137,8 @@ module ifu #( end else if (stallreq_i) begin // Hold output end else if (p1_read_done & ~is_flushing) begin + // If p1 read done, pass data to IB + // However, if p1 read done comes from flushing, do not pass down to IB for (integer i = 0; i < FETCH_WIDTH; i++) begin // Default instr_buffer_o[i].is_last_in_block <= 0; @@ -135,7 +146,7 @@ module ifu #( if (i < p1_read_transaction.length) begin if (i == p1_read_transaction.length - 1) begin instr_buffer_o[i].valid <= 1; - instr_buffer_o[i].is_last_in_block <= 1; + instr_buffer_o[i].is_last_in_block <= 1; // Mark the instruction as last in block, used when commit instr_buffer_o[i].pc <= p1_read_transaction.start_pc + i * 4; // Instr is 4 bytes long instr_buffer_o[i].instr <= cacheline_combined[p1_read_transaction.start_pc[3:2]+i]; end else begin @@ -148,6 +159,7 @@ module ifu #( end end end else begin + // Otherwise keep 0 for (integer i = 0; i < FETCH_WIDTH; i++) begin instr_buffer_o[i] <= 0; end From ec7043f2c0f593f5830d45785b1654263987802c Mon Sep 17 00:00:00 2001 From: Easton Man Date: Tue, 31 May 2022 20:44:16 +0800 Subject: [PATCH 32/41] feat: use paramerized LFSR --- README.md | 3 ++ src/vsrc/icache.sv | 12 ++++-- src/vsrc/utils/lfsr.sv | 91 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+), 3 deletions(-) create mode 100644 src/vsrc/utils/lfsr.sv diff --git a/README.md b/README.md index de9d9f5..411cb76 100644 --- a/README.md +++ b/README.md @@ -83,4 +83,7 @@ git checkout -b > 引用和致谢 +[cva5](https://github.com/openhwgroup/cva5) 项目和 Eric Matthews + - [乘除法器](https://github.com/risclite/rv32m-multiplier-and-divider) +- [参数化的LFSR](https://github.com/openhwgroup/cva5/blob/master/core/lfsr.sv) \ No newline at end of file diff --git a/src/vsrc/icache.sv b/src/vsrc/icache.sv index 0fadbbd..ec40829 100644 --- a/src/vsrc/icache.sv +++ b/src/vsrc/icache.sv @@ -1,5 +1,6 @@ `include "utils/bram.sv" +`include "utils/lfsr.sv" module icache #( parameter NSET = 256, @@ -295,9 +296,14 @@ module icache #( // Refill write BRAM logic random_r; - always_ff @(posedge clk) begin - random_r <= ~random_r; - end + lfsr #( + .WIDTH(1), + ) u_lfsr ( + .clk (clk), + .rst (rst), + .en (1'b1), + .value(random_r) + ); always_comb begin for (integer i = 0; i < NWAY; i++) begin tag_bram_we[i] = 0; diff --git a/src/vsrc/utils/lfsr.sv b/src/vsrc/utils/lfsr.sv new file mode 100644 index 0000000..f48a583 --- /dev/null +++ b/src/vsrc/utils/lfsr.sv @@ -0,0 +1,91 @@ +/* + * Copyright © 2021 Eric Matthews + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Initial code developed under the supervision of Dr. Lesley Shannon, + * Reconfigurable Computing Lab, Simon Fraser University. + * + * Author(s): + * Eric Matthews + */ + +//3-16 bit LFSRs with additional feedback to support full 2^N range +module lfsr #( + parameter int unsigned WIDTH = 3, + parameter NEEDS_RESET = 1 +) ( + input logic clk, + input logic rst, + input logic en, + output logic [WIDTH-1:0] value +); + + typedef struct packed { + int unsigned NUM; + bit [3:0][31:0] INDICIES; + } tap_t; + + //XNOR taps for LFSR from 3-16 bits wide (source: Xilinx xapp052) + localparam tap_t LFSR_TAPS[17] = '{ + //Dummy entries for widths 0-2 + '{ + NUM : 1, + INDICIES : '{0, 0, 0, 0} + }, + '{NUM : 1, INDICIES : '{0, 0, 0, 0}}, + '{NUM : 1, INDICIES : '{0, 0, 0, 0}}, + //Number of taps and indicies[3:0] for LFSRs width 3 to 16 + '{ + NUM : 2, + INDICIES : '{0, 0, 1, 2} + }, //3 + '{NUM : 2, INDICIES : '{0, 0, 2, 3}}, //4 + '{NUM : 2, INDICIES : '{0, 0, 2, 4}}, + '{NUM : 2, INDICIES : '{0, 0, 4, 5}}, + '{NUM : 2, INDICIES : '{0, 0, 5, 6}}, + '{NUM : 4, INDICIES : '{3, 4, 5, 7}}, //8 + '{NUM : 2, INDICIES : '{0, 0, 4, 8}}, + '{NUM : 2, INDICIES : '{0, 0, 6, 9}}, + '{NUM : 2, INDICIES : '{0, 0, 8, 10}}, + '{NUM : 4, INDICIES : '{0, 3, 5, 11}}, //12 + '{NUM : 4, INDICIES : '{0, 2, 3, 12}}, + '{NUM : 4, INDICIES : '{0, 2, 4, 13}}, + '{NUM : 2, INDICIES : '{0, 0, 13, 14}}, //15 + '{NUM : 4, INDICIES : '{3, 12, 14, 15}} //16 + }; + + localparam tap_t TAPS = LFSR_TAPS[WIDTH]; + + logic [TAPS.NUM-1:0] feedback_input; + logic feedback; + //////////////////////////////////////////////////// + //Implementation + generate + if (WIDTH == 2) begin : gen_width_two + assign feedback = ~value[WIDTH-1]; + end else begin : gen_width_three_plus + for (genvar i = 0; i < TAPS.NUM; i++) begin : gen_taps + assign feedback_input[i] = value[int'(TAPS.INDICIES[i])]; + end + //XNOR of taps and range extension to include all ones + assign feedback = (~^feedback_input) ^ |value[WIDTH-2:0]; + end + endgenerate + + initial value = 0; + always_ff @(posedge clk) begin + if (NEEDS_RESET & rst) value <= '0; + else if (en) value <= {value[WIDTH-2:0], feedback}; + end + +endmodule From 5e62d2fa3d92c1922d1e1e37bd609ba9333da6c4 Mon Sep 17 00:00:00 2001 From: Easton Man Date: Tue, 31 May 2022 20:47:35 +0800 Subject: [PATCH 33/41] feat: add fpa from matthews --- .gitignore | 1 + README.md | 3 +- src/vsrc/utils/priority_encoder.sv | 61 ++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 src/vsrc/utils/priority_encoder.sv diff --git a/.gitignore b/.gitignore index c3c6d00..26323b0 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,7 @@ wave.vcd # Misc log files logs/ +log #la_code la_code diff --git a/README.md b/README.md index 411cb76..c97b059 100644 --- a/README.md +++ b/README.md @@ -86,4 +86,5 @@ git checkout -b [cva5](https://github.com/openhwgroup/cva5) 项目和 Eric Matthews - [乘除法器](https://github.com/risclite/rv32m-multiplier-and-divider) -- [参数化的LFSR](https://github.com/openhwgroup/cva5/blob/master/core/lfsr.sv) \ No newline at end of file +- [参数化的LFSR](https://github.com/openhwgroup/cva5/blob/master/core/lfsr.sv) +- [基于ROM的Priority Encoder](https://github.com/openhwgroup/cva5/blob/master/core/priority_encoder.sv) \ No newline at end of file diff --git a/src/vsrc/utils/priority_encoder.sv b/src/vsrc/utils/priority_encoder.sv new file mode 100644 index 0000000..1835106 --- /dev/null +++ b/src/vsrc/utils/priority_encoder.sv @@ -0,0 +1,61 @@ +/* + * Copyright © 2021 Eric Matthews, Lesley Shannon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Initial code developed under the supervision of Dr. Lesley Shannon, + * Reconfigurable Computing Lab, Simon Fraser University. + * + * Author(s): + * Eric Matthews + */ + +//////////////////////////////////////////////////// +//Highest Priority for: Index Zero +//Look-up Table based +//Max width of 12 +//////////////////////////////////////////////////// +module priority_encoder #( + parameter WIDTH = 4 +) ( + input logic [WIDTH-1:0] priority_vector, + output logic [(WIDTH == 1) ? 0 : ($clog2(WIDTH)-1) : 0] encoded_result +); + //////////////////////////////////////////////////// + //Width Check + if (WIDTH > 12) $error("Max priority encoder width exceeded!"); + + //Tool workaround + localparam MIN_WIDTH = (WIDTH == 1) ? 2 : WIDTH; + localparam LOG2_WIDTH = $clog2(MIN_WIDTH); + //Table generation for priority encoder + function [2**MIN_WIDTH-1:0][LOG2_WIDTH-1 : 0] table_gen(); + for (int i = 0; i < 2 ** MIN_WIDTH; i++) begin //Loop through all memory addresses + table_gen[i] = LOG2_WIDTH'(MIN_WIDTH - 1); //Initialize to lowest priority + for ( + int j = (int'(MIN_WIDTH) - 2); j >= 0; j-- + ) begin //Check each bit in increasing priority + if (i[j]) //If bit is set update table value with that bit's index + table_gen[i] = LOG2_WIDTH'(j); + end + end + endfunction + + //Initialize Table + localparam logic [2**MIN_WIDTH-1:0][LOG2_WIDTH-1 : 0] ENCODER_ROM = table_gen(); + + //////////////////////////////////////////////////// + //Implementation + assign encoded_result = (WIDTH == 1) ? 0 : ENCODER_ROM[priority_vector]; + +endmodule From 8ddaee764af5e1009523a652430ec414975220f4 Mon Sep 17 00:00:00 2001 From: Easton Man Date: Tue, 31 May 2022 21:12:24 +0800 Subject: [PATCH 34/41] fix: fix synthesize issue --- src/vsrc/icache.sv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/vsrc/icache.sv b/src/vsrc/icache.sv index ec40829..714b096 100644 --- a/src/vsrc/icache.sv +++ b/src/vsrc/icache.sv @@ -295,9 +295,9 @@ module icache #( end // Refill write BRAM - logic random_r; + logic [2:0] random_r; lfsr #( - .WIDTH(1), + .WIDTH(3) ) u_lfsr ( .clk (clk), .rst (rst), @@ -310,7 +310,7 @@ module icache #( tag_bram_wdata[i] = 0; data_bram_we[i] = 0; data_bram_wdata[i] = 0; - if (i[0] == random_r) begin + if (i[0] == random_r[0]) begin // write this way if (state == REFILL_1_WAIT && axi_rvalid_i) begin tag_bram_we[i][0] = 1; From 3a2750e0b83a45c02a2c77579b3ab5e1f8ef62c9 Mon Sep 17 00:00:00 2001 From: Easton Man Date: Wed, 1 Jun 2022 13:45:05 +0800 Subject: [PATCH 35/41] feat: implement dummy dcache --- src/vsrc/cpu_top.sv | 12 ++-- src/vsrc/dcache.sv | 73 ++++++++++--------- src/vsrc/dummy_dcache.sv | 150 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 192 insertions(+), 43 deletions(-) create mode 100644 src/vsrc/dummy_dcache.sv diff --git a/src/vsrc/cpu_top.sv b/src/vsrc/cpu_top.sv index 6ea924d..9027547 100644 --- a/src/vsrc/cpu_top.sv +++ b/src/vsrc/cpu_top.sv @@ -9,7 +9,7 @@ `include "frontend/frontend.sv" `include "instr_buffer.sv" `include "icache.sv" -`include "dcache.sv" +`include "dummy_dcache.sv" `include "ctrl.sv" `include "pipeline_defines.sv" `include "pipeline/1_decode/id.sv" @@ -191,19 +191,19 @@ module cpu_top ( assign mem_cache_ce = mem_cache_signal[0].ce | mem_cache_signal[1].ce; assign mem_cache_we = mem_cache_signal[0].we | mem_cache_signal[1].we; assign mem_cache_sel = mem_cache_signal[0].we ? mem_cache_signal[0].sel : mem_cache_signal[0].we ? mem_cache_signal[1].sel : 0; - assign mem_cache_addr = mem_cache_signal[0].we ? mem_cache_signal[0].addr : mem_cache_signal[0].we ? mem_cache_signal[1].addr : 0; + assign mem_cache_addr = mem_cache_signal[0].addr; assign mem_cache_data = mem_cache_signal[0].we ? mem_cache_signal[0].data : mem_cache_signal[0].we ? mem_cache_signal[1].data : 0; - dcache u_dcache( + dummy_dcache u_dcache( .clk (clk ), .rst (rst ), .valid (mem_cache_ce), .op (mem_cache_we), .uncache (1'b0), - .index (tlb_data_o.index), - .tag (tlb_data_o.tag), - .offset (tlb_data_o.offset), + .index (mem_cache_addr[11:4]), + .tag (mem_cache_addr[31:12]), + .offset (mem_cache_addr[3:0]), .wstrb (mem_cache_sel), .wdata (mem_cache_data), .addr_ok (mem_addr_ok), diff --git a/src/vsrc/dcache.sv b/src/vsrc/dcache.sv index 52b1172..c860741 100644 --- a/src/vsrc/dcache.sv +++ b/src/vsrc/dcache.sv @@ -73,7 +73,7 @@ module dcache ( REFILL, WRITE } - state, next_state; + state, next_state, wr_state, wr_next_state; //Write Buffer状态机包括两个状态 //IDLE: Write Buffer状态机当前没有待写的数据 //WRITE: 将待写的数据写入Cache中。在主状态机处于LOOKUP状态且发现Store操作命中Cache时,触发Write Buffer状态机进入Write状态 @@ -88,30 +88,29 @@ module dcache ( parameter BlockLSB = 0; logic [511:0][149:0] cache_data; - logic [31:0] wr_state, wr_next_state; - logic hit; - logic hit1; - logic hit2; - logic way; //若hit,则way无意义,若miss,则way表示分配的那一路 - logic write_op; //hit write 执行标志,高电平有效 - logic miss_way_r; //缺失路的写使能 + logic hit1; + logic hit2; + logic hit; + logic way; //若hit,则way无意义,若miss,则way表示分配的那一路 + logic write_op; //hit write 执行标志,高电平有效 + logic miss_way_r; //缺失路的写使能 //虚地址共32位,[31:12]为Tag,[11:4]为Cache组索引index, [3:0]:offset,Cache行内偏移 - logic [ 7:0] cpu_req_index; - logic [19:0] cpu_req_tag; - logic [ 3:0] cpu_req_offset; + logic [ 7:0] cpu_req_index; + logic [ 19:0] cpu_req_tag; + logic [ 3:0] cpu_req_offset; //wire cpu_req_uncache; - logic cpu_req_valid; - logic cpu_req_op; - logic [ 3:0] cpu_req_wstrb; - logic [31:0] cpu_req_wdata; + logic cpu_req_valid; + logic cpu_req_op; + logic [ 3:0] cpu_req_wstrb; + logic [ 31:0] cpu_req_wdata; - logic cpu_rd_rdy; - logic cpu_wr_rdy; - logic cpu_ret_valid; - logic cpu_ret_last; - logic [31:0] cpu_ret_data; + logic cpu_rd_rdy; + logic cpu_wr_rdy; + logic cpu_ret_valid; + logic cpu_ret_last; + logic [ 31:0] cpu_ret_data; ////虚地址共32位,[31:12]为Tag,[11:4]为Cache组索引index, [3:0]:offset,Cache行内偏移 //logic [7:0]cpu_req_index; @@ -131,7 +130,7 @@ module dcache ( //logic[31:0] cpu_ret_data; //hit write 冲突 高位有效 - logic hit_conflict = 0; + logic hit_conflict = 0; assign cpu_req_valid = valid; assign cpu_req_op = op; @@ -155,9 +154,14 @@ module dcache ( end - always @(posedge clk) begin - if (rst) state <= IDLE; - else state <= next_state; + always_ff @(posedge clk) begin : state_ff + if (rst) begin + state <= IDLE; + wr_state <= IDLE; + end else begin + state <= next_state; + wr_state <= wr_next_state; + end end //state change @@ -168,10 +172,14 @@ module dcache ( else next_state = LOOKUP; end LOOKUP: begin - if ((hit && !cpu_req_valid) || (hit && (cpu_req_valid && hit_conflict))) //若hit + if ((hit && !cpu_req_valid) || (hit && (cpu_req_valid && hit_conflict))) begin + // Read hit or Write hit next_state = IDLE; - else if (hit && cpu_req_valid) next_state = LOOKUP; - else if (!hit) begin + end else if (hit && cpu_req_valid) begin + // Hit and have request + next_state = LOOKUP; + end else if (!hit) begin + // Anything miss enters miss state next_state = MISS; end end @@ -193,7 +201,7 @@ module dcache ( logic wr_buffer; //Write buffer state change - always @(*) begin + always_comb begin case (wr_state) IDLE: if (hit && cpu_req_op && cpu_req_valid) begin @@ -238,15 +246,6 @@ module dcache ( end else hit2 = 1'b0; else hit2 = 1'b0; end - //hit - always @(*) begin - if (state == LOOKUP) begin - hit = hit1 || hit2; - if (hit && cpu_req_op) begin - wr_state = WRITE; - end - end else hit = 1'b0; - end //LOOKUP模块: Cache命中后的读写操作---Data Select diff --git a/src/vsrc/dummy_dcache.sv b/src/vsrc/dummy_dcache.sv new file mode 100644 index 0000000..68068f9 --- /dev/null +++ b/src/vsrc/dummy_dcache.sv @@ -0,0 +1,150 @@ +module dummy_dcache ( + input logic clk, + input logic rst, + + //cache与CPU流水线的交互接 + input logic valid, //表明请求有效 + input logic op, // 1:write 0: read + input logic uncache, //标志uncache指令,高位有效 + input logic [7:0] index, // 地址的index域(addr[11:4]) + input logic [19:0] tag, //从TLB查到的pfn形成的tag + input logic [3:0] offset, //地址的offset域addr[3:0] + input logic [3:0] wstrb, //写字节使能信号 + input logic [31:0] wdata, //写数据 + output logic addr_ok, //该次请求的地址传输OK,读:地址被接收;写:地址和数据被接收 + output logic data_ok, //该次请求的数据传输Ok,读:数据返回;写:数据写入完成 + output logic [31:0] rdata, //读Cache的结果 + + //cache与AXI总线的交互接口 + output logic rd_req, //读请求有效信号。高电平有效 + output logic [2:0] rd_type, //读请求类型:3'b000: 字节;3'b001: 半字;3'b010: 字;3'b100:Cache行 + output logic [31:0] rd_addr, //读请求起始地址 + input logic rd_rdy, //读请求能否被接收的握手信号。高电平有效 + input logic ret_valid, //返回数据有效。高电平有效。 + input logic ret_last, //返回数据是一次读请求对应的最后一个返回数据 + input logic [31:0] ret_data, //读返回数据 + output logic wr_req, //写请求有效信号。高电平有效 + output logic [2:0] wr_type, //写请求类型:3'b000: 字节;3'b001: 半字;3'b010: 字;3'b100:Cache行 + output logic [31:0] wr_addr, //写请求起始地址 + output logic [3:0] wr_wstrb, //写操作的字节掩码。仅在写请求类型为:3'b000: 字节;3'b001: 半字;3'b010:字的情况下才有意义 + output logic [127:0] wr_data, //写数据 + input logic wr_rdy //写请求能否被接受的握手信号。具体见p2234. + + + //还需对类SRAM-AXI转接桥模块进行调整,随后确定实现 +); + + enum int { + IDLE, + READ_REQ, + READ_WAIT, + WRITE_REQ + } + state, next_state; + + always_ff @(posedge clk) begin + if (rst) state <= 0; + else state <= next_state; + end + + // State transition + always_comb begin + case (state) + IDLE: begin + if (valid) begin + if (op) next_state = WRITE_REQ; + else next_state = READ_REQ; + end else next_state = IDLE; + end + READ_REQ: begin + if (rd_rdy) next_state = READ_WAIT; // If AXI ready, send request + else next_state = READ_REQ; + end + READ_WAIT: begin + if (ret_valid) next_state = IDLE; // If return valid, back to IDLE + else next_state = READ_WAIT; + end + WRITE_REQ: begin + if (wr_rdy) + next_state = IDLE; // If AXI is ready, then write req is accept this cycle, back to IDLE + else next_state = WRITE_REQ; + end + default: begin + next_state = IDLE; + end + endcase + end + + logic [31:0] cpu_addr; + assign cpu_addr = {tag, index, offset}; + + logic rd_req_r; + logic [31:0] rd_addr_r; + + // Handshake with AXI + always_ff @(posedge clk) begin + case (state) + READ_REQ: begin + if (rd_rdy) begin + rd_req_r <= 1; + rd_addr_r <= cpu_addr; + end + end + endcase + end + assign rd_type = 3'b010; // word + assign wr_type = 3'b010; // word + always_comb begin + // Default signal + rd_addr = 0; + rd_req = 0; + wr_addr = 0; + wr_data = 0; + wr_req = 0; + wr_wstrb = 0; + + case (state) + READ_REQ: begin + if (rd_rdy) begin + rd_req = 1; + rd_addr = cpu_addr; + end + end + READ_WAIT: begin + rd_req = rd_req_r; + rd_addr = rd_addr_r; + end + WRITE_REQ: begin + if (wr_rdy) begin + wr_req = 1; + wr_addr = cpu_addr; + wr_data = {{96{1'b0}}, wdata}; + wr_wstrb = wstrb; + end + end + endcase + end + + // Handshake with CPU + always_comb begin + addr_ok = 0; + data_ok = 0; + rdata = 0; + case (state) + READ_WAIT: begin + if (ret_valid) begin + addr_ok = 1; + data_ok = 1; + rdata = ret_data; + end + end + WRITE_REQ: begin + if (wr_rdy) begin + addr_ok = 1; + data_ok = 1; + end + end + endcase + end + +endmodule From 57b3ef126146051796e66d1ccb1ef11b76c45d7a Mon Sep 17 00:00:00 2001 From: Easton Man Date: Wed, 1 Jun 2022 15:21:46 +0800 Subject: [PATCH 36/41] fix: fix data width --- src/vsrc/dummy_dcache.sv | 58 +++++++++++++++++++++++++++++++++++----- 1 file changed, 52 insertions(+), 6 deletions(-) diff --git a/src/vsrc/dummy_dcache.sv b/src/vsrc/dummy_dcache.sv index 68068f9..c37aa0c 100644 --- a/src/vsrc/dummy_dcache.sv +++ b/src/vsrc/dummy_dcache.sv @@ -22,7 +22,7 @@ module dummy_dcache ( input logic rd_rdy, //读请求能否被接收的握手信号。高电平有效 input logic ret_valid, //返回数据有效。高电平有效。 input logic ret_last, //返回数据是一次读请求对应的最后一个返回数据 - input logic [31:0] ret_data, //读返回数据 + input logic [127:0] ret_data, //读返回数据 output logic wr_req, //写请求有效信号。高电平有效 output logic [2:0] wr_type, //写请求类型:3'b000: 字节;3'b001: 半字;3'b010: 字;3'b100:Cache行 output logic [31:0] wr_addr, //写请求起始地址 @@ -38,6 +38,8 @@ module dummy_dcache ( IDLE, READ_REQ, READ_WAIT, + WRITE_READ_REQ, + WRITE_READ_WAIT, WRITE_REQ } state, next_state; @@ -52,7 +54,8 @@ module dummy_dcache ( case (state) IDLE: begin if (valid) begin - if (op) next_state = WRITE_REQ; + if (op) + next_state = WRITE_READ_REQ; // First read 128b, swap 32b, and then write 128b else next_state = READ_REQ; end else next_state = IDLE; end @@ -64,6 +67,14 @@ module dummy_dcache ( if (ret_valid) next_state = IDLE; // If return valid, back to IDLE else next_state = READ_WAIT; end + WRITE_READ_REQ: begin + if (rd_rdy) next_state = WRITE_READ_WAIT; + else next_state = WRITE_READ_REQ; + end + WRITE_READ_WAIT: begin + if (ret_valid) next_state = WRITE_REQ; + else next_state = WRITE_READ_WAIT; + end WRITE_REQ: begin if (wr_rdy) next_state = IDLE; // If AXI is ready, then write req is accept this cycle, back to IDLE @@ -79,7 +90,8 @@ module dummy_dcache ( assign cpu_addr = {tag, index, offset}; logic rd_req_r; - logic [31:0] rd_addr_r; + logic [31:0] rd_addr_r, wr_addr_r; + logic [127:0] wr_rd_data_r; // Handshake with AXI always_ff @(posedge clk) begin @@ -90,6 +102,24 @@ module dummy_dcache ( rd_addr_r <= cpu_addr; end end + READ_WAIT: begin + if (ret_valid) begin + rd_req_r <= 0; + rd_addr_r <= 0; + end + end + WRITE_READ_REQ: begin + if (rd_rdy) begin + rd_req_r <= 1; + rd_addr_r <= cpu_addr; + end + wr_addr_r <= cpu_addr; + end + WRITE_READ_WAIT: begin + if (ret_valid) begin + wr_rd_data_r <= ret_data; + end + end endcase end assign rd_type = 3'b010; // word @@ -114,11 +144,27 @@ module dummy_dcache ( rd_req = rd_req_r; rd_addr = rd_addr_r; end + WRITE_READ_REQ: begin + if (rd_rdy) begin + rd_req = 1; + rd_addr = cpu_addr; + end + end + WRITE_READ_WAIT: begin + rd_req = rd_req_r; + rd_addr = rd_addr_r; + end WRITE_REQ: begin if (wr_rdy) begin wr_req = 1; - wr_addr = cpu_addr; - wr_data = {{96{1'b0}}, wdata}; + wr_addr = wr_addr_r; + wr_data = {wr_rd_data_r[127:32], wdata}; + // case (wr_addr_r[3:2]) + // 2'b00: wr_data = {wr_rd_data_r[127:32], wdata}; + // 2'b01: wr_data = {wr_rd_data_r[127:64], wdata, wr_rd_data_r[31:0]}; + // 2'b10: wr_data = {wr_rd_data_r[127:96], wdata, wr_rd_data_r[63:0]}; + // 2'b11: wr_data = {wdata, wr_rd_data_r[95:0]}; + // endcase wr_wstrb = wstrb; end end @@ -135,7 +181,7 @@ module dummy_dcache ( if (ret_valid) begin addr_ok = 1; data_ok = 1; - rdata = ret_data; + rdata = ret_data[rd_addr_r[3:2]*32+:32]; end end WRITE_REQ: begin From 968de6f98f925c81259ae02a51e5e106a16beff8 Mon Sep 17 00:00:00 2001 From: Easton Man Date: Wed, 1 Jun 2022 18:08:50 +0800 Subject: [PATCH 37/41] fix: fix wstrb width for AXI128 --- src/vsrc/AXI/axi_master.sv | 4 ++-- src/vsrc/cpu_top.sv | 4 ++-- src/vsrc/dummy_dcache.sv | 18 +++++++++--------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/vsrc/AXI/axi_master.sv b/src/vsrc/AXI/axi_master.sv index 0b0faf1..9803212 100644 --- a/src/vsrc/AXI/axi_master.sv +++ b/src/vsrc/AXI/axi_master.sv @@ -15,7 +15,7 @@ module axi_master ( //data input wire [`ADDR] data_cpu_addr_i, - input wire [3:0] data_cpu_sel_i, + input wire [15:0] data_cpu_sel_i, output reg [`Data] data_cpu_data_o, input wire [3:0] data_id, //决定是读数据还是取指令 input wire [2:0] dcache_rd_type_i, // dacache read type @@ -65,7 +65,7 @@ module axi_master ( //w output wire [`ID] s_wid, output reg [`Data] s_wdata, - output wire [3:0] s_wstrb, //字节选通位和sel差不多 + output wire [15:0] s_wstrb, //字节选通位和sel差不多 output reg s_wlast, output reg s_wvalid, input wire s_wready, diff --git a/src/vsrc/cpu_top.sv b/src/vsrc/cpu_top.sv index 9027547..28c7973 100644 --- a/src/vsrc/cpu_top.sv +++ b/src/vsrc/cpu_top.sv @@ -59,7 +59,7 @@ module cpu_top ( // write data output [ 3:0] wid, output [127:0] wdata, - output [ 3:0] wstrb, + output [ 15:0] wstrb, output wlast, output wvalid, input wready, @@ -130,7 +130,7 @@ module cpu_top ( // <-> DCache .data_cpu_addr_i(dcache_axi_addr), - .data_cpu_sel_i(data_axi_sel), + .data_cpu_sel_i(16'hffff), .data_cpu_data_o(axi_dcache_data), .data_id(4'b0001), .dcache_rd_req_i(dcache_axi_rreq), diff --git a/src/vsrc/dummy_dcache.sv b/src/vsrc/dummy_dcache.sv index c37aa0c..7230ba9 100644 --- a/src/vsrc/dummy_dcache.sv +++ b/src/vsrc/dummy_dcache.sv @@ -156,15 +156,15 @@ module dummy_dcache ( end WRITE_REQ: begin if (wr_rdy) begin - wr_req = 1; - wr_addr = wr_addr_r; - wr_data = {wr_rd_data_r[127:32], wdata}; - // case (wr_addr_r[3:2]) - // 2'b00: wr_data = {wr_rd_data_r[127:32], wdata}; - // 2'b01: wr_data = {wr_rd_data_r[127:64], wdata, wr_rd_data_r[31:0]}; - // 2'b10: wr_data = {wr_rd_data_r[127:96], wdata, wr_rd_data_r[63:0]}; - // 2'b11: wr_data = {wdata, wr_rd_data_r[95:0]}; - // endcase + wr_req = 1; + wr_addr = wr_addr_r; + wr_data = {wr_rd_data_r[127:32], wdata}; + case (wr_addr_r[3:2]) + 2'b00: wr_data = {wr_rd_data_r[127:32], wdata}; + 2'b01: wr_data = {wr_rd_data_r[127:64], wdata, wr_rd_data_r[31:0]}; + 2'b10: wr_data = {wr_rd_data_r[127:96], wdata, wr_rd_data_r[63:0]}; + 2'b11: wr_data = {wdata, wr_rd_data_r[95:0]}; + endcase wr_wstrb = wstrb; end end From a717e33d9498327c610906a01776196ca536f053 Mon Sep 17 00:00:00 2001 From: Easton Man Date: Wed, 1 Jun 2022 19:10:54 +0800 Subject: [PATCH 38/41] Revert "fix: fix data width" This reverts commit 57b3ef126146051796e66d1ccb1ef11b76c45d7a. --- src/vsrc/AXI/axi_master.sv | 20 +++++++--- src/vsrc/cpu_top.sv | 6 +-- src/vsrc/dummy_dcache.sv | 79 ++++++++++++-------------------------- 3 files changed, 43 insertions(+), 62 deletions(-) diff --git a/src/vsrc/AXI/axi_master.sv b/src/vsrc/AXI/axi_master.sv index 9803212..6772f93 100644 --- a/src/vsrc/AXI/axi_master.sv +++ b/src/vsrc/AXI/axi_master.sv @@ -490,6 +490,7 @@ module axi_master ( //write + reg [15:0] write_wstrb_buffer; reg [`BurstData] write_buffer; @@ -554,9 +555,11 @@ module axi_master ( s_awsize <= 0; s_awvalid <= 0; + s_wstrb <= 0; s_wdata <= 0; s_wvalid <= 0; s_bready <= 0; + write_wstrb_buffer <= 0; write_buffer <= 0; s_wlast <= 0; end else begin @@ -570,20 +573,23 @@ module axi_master ( s_awsize <= 3'b100; s_awvalid <= 1; + s_wstrb <= 0; s_wdata <= 0; s_wvalid <= 0; s_bready <= 0; + write_wstrb_buffer <= data_cpu_sel_i; write_buffer <= dcache_wr_data; s_wlast <= 0; end else begin w_state <= w_state; s_awaddr <= 0; s_awsize <= 0; - s_awvalid <= 0; + s_wstrb <= 0; s_wdata <= 0; s_wvalid <= 0; s_bready <= 0; + write_wstrb_buffer <= 0; write_buffer <= 0; s_wlast <= 0; end @@ -599,6 +605,7 @@ module axi_master ( s_awvalid <= 0; s_wvalid <= 1; s_bready <= 1; + s_wstrb <= write_wstrb_buffer; s_wdata <= write_buffer; write_buffer <= {{32{1'b0}}, write_buffer[127:32]}; @@ -612,6 +619,7 @@ module axi_master ( s_awvalid <= s_awvalid; s_wvalid <= s_wvalid; s_bready <= s_bready; + s_wstrb <= 0; s_wdata <= 0; write_buffer <= write_buffer; s_wlast <= s_wlast; @@ -623,14 +631,17 @@ module axi_master ( if (s_wvalid && s_wready) begin if (cnt == s_awlen) begin w_state <= `W_RESP; + s_wstrb <= 0; s_wdata <= 0; s_wvalid <= 0; + write_wstrb_buffer <= 0; write_buffer <= 0; s_wlast <= 0; end else begin - w_state <= w_state; - s_wdata <= write_buffer[31:0]; - write_buffer <= {{32{1'b0}}, write_buffer[127:32]}; + w_state <= w_state; + s_wstrb <= write_wstrb_buffer; + s_wdata <= write_buffer; + // write_buffer <= {{32{1'b0}}, write_buffer[127:32]}; s_wvalid <= 1; if (cnt == s_awlen - 1) s_wlast <= 1; else s_wlast <= 0; @@ -674,7 +685,6 @@ module axi_master ( assign s_awcache = 0; assign s_awprot = 0; assign s_wid = 0; - assign s_wstrb = data_cpu_sel_i; //set axi signal assign s_arid = inst_s_arid | data_s_arid; diff --git a/src/vsrc/cpu_top.sv b/src/vsrc/cpu_top.sv index 28c7973..3b9f006 100644 --- a/src/vsrc/cpu_top.sv +++ b/src/vsrc/cpu_top.sv @@ -112,7 +112,7 @@ module cpu_top ( logic [127:0] dcache_axi_data; logic [`RegBus] cache_mem_data; logic mem_data_ok,mem_addr_ok; - logic [3:0] data_axi_sel; // Byte selection + logic [15:0] dcache_axi_wstrb; // Byte selection axi_master u_axi_master ( .aclk (aclk), @@ -130,7 +130,7 @@ module cpu_top ( // <-> DCache .data_cpu_addr_i(dcache_axi_addr), - .data_cpu_sel_i(16'hffff), + .data_cpu_sel_i(dcache_axi_wstrb), .data_cpu_data_o(axi_dcache_data), .data_id(4'b0001), .dcache_rd_req_i(dcache_axi_rreq), @@ -221,7 +221,7 @@ module cpu_top ( .wr_req (dcache_axi_wreq), .wr_type (), .wr_addr (dcache_axi_waddr), - .wr_wstrb (), + .wr_wstrb (dcache_axi_wstrb), .wr_data (dcache_axi_data), .wr_rdy (axi_dcache_wr_rdy) ); diff --git a/src/vsrc/dummy_dcache.sv b/src/vsrc/dummy_dcache.sv index 7230ba9..654abea 100644 --- a/src/vsrc/dummy_dcache.sv +++ b/src/vsrc/dummy_dcache.sv @@ -26,7 +26,7 @@ module dummy_dcache ( output logic wr_req, //写请求有效信号。高电平有效 output logic [2:0] wr_type, //写请求类型:3'b000: 字节;3'b001: 半字;3'b010: 字;3'b100:Cache行 output logic [31:0] wr_addr, //写请求起始地址 - output logic [3:0] wr_wstrb, //写操作的字节掩码。仅在写请求类型为:3'b000: 字节;3'b001: 半字;3'b010:字的情况下才有意义 + output logic [15:0] wr_wstrb, //写操作的字节掩码。16bits for AXI128 output logic [127:0] wr_data, //写数据 input logic wr_rdy //写请求能否被接受的握手信号。具体见p2234. @@ -38,8 +38,6 @@ module dummy_dcache ( IDLE, READ_REQ, READ_WAIT, - WRITE_READ_REQ, - WRITE_READ_WAIT, WRITE_REQ } state, next_state; @@ -54,8 +52,7 @@ module dummy_dcache ( case (state) IDLE: begin if (valid) begin - if (op) - next_state = WRITE_READ_REQ; // First read 128b, swap 32b, and then write 128b + if (op) next_state = WRITE_REQ; else next_state = READ_REQ; end else next_state = IDLE; end @@ -67,14 +64,6 @@ module dummy_dcache ( if (ret_valid) next_state = IDLE; // If return valid, back to IDLE else next_state = READ_WAIT; end - WRITE_READ_REQ: begin - if (rd_rdy) next_state = WRITE_READ_WAIT; - else next_state = WRITE_READ_REQ; - end - WRITE_READ_WAIT: begin - if (ret_valid) next_state = WRITE_REQ; - else next_state = WRITE_READ_WAIT; - end WRITE_REQ: begin if (wr_rdy) next_state = IDLE; // If AXI is ready, then write req is accept this cycle, back to IDLE @@ -90,8 +79,7 @@ module dummy_dcache ( assign cpu_addr = {tag, index, offset}; logic rd_req_r; - logic [31:0] rd_addr_r, wr_addr_r; - logic [127:0] wr_rd_data_r; + logic [31:0] rd_addr_r; // Handshake with AXI always_ff @(posedge clk) begin @@ -99,25 +87,7 @@ module dummy_dcache ( READ_REQ: begin if (rd_rdy) begin rd_req_r <= 1; - rd_addr_r <= cpu_addr; - end - end - READ_WAIT: begin - if (ret_valid) begin - rd_req_r <= 0; - rd_addr_r <= 0; - end - end - WRITE_READ_REQ: begin - if (rd_rdy) begin - rd_req_r <= 1; - rd_addr_r <= cpu_addr; - end - wr_addr_r <= cpu_addr; - end - WRITE_READ_WAIT: begin - if (ret_valid) begin - wr_rd_data_r <= ret_data; + rd_addr_r <= {cpu_addr[31:4], 4'b0}; // Keep addr aligned end end endcase @@ -137,35 +107,36 @@ module dummy_dcache ( READ_REQ: begin if (rd_rdy) begin rd_req = 1; - rd_addr = cpu_addr; + rd_addr = {cpu_addr[31:4], 4'b0}; // Keep addr aligned end end READ_WAIT: begin rd_req = rd_req_r; rd_addr = rd_addr_r; end - WRITE_READ_REQ: begin - if (rd_rdy) begin - rd_req = 1; - rd_addr = cpu_addr; - end - end - WRITE_READ_WAIT: begin - rd_req = rd_req_r; - rd_addr = rd_addr_r; - end WRITE_REQ: begin if (wr_rdy) begin wr_req = 1; - wr_addr = wr_addr_r; - wr_data = {wr_rd_data_r[127:32], wdata}; - case (wr_addr_r[3:2]) - 2'b00: wr_data = {wr_rd_data_r[127:32], wdata}; - 2'b01: wr_data = {wr_rd_data_r[127:64], wdata, wr_rd_data_r[31:0]}; - 2'b10: wr_data = {wr_rd_data_r[127:96], wdata, wr_rd_data_r[63:0]}; - 2'b11: wr_data = {wdata, wr_rd_data_r[95:0]}; + // wr_addr = cpu_addr; + wr_addr = {cpu_addr[31:4], 4'b0}; // Keep addr aligned + case (cpu_addr[3:2]) + 2'b00: begin + wr_data = {{96{1'b0}}, wdata}; + wr_wstrb = {12'b0, wstrb}; + end + 2'b01: begin + wr_data = {{64{1'b0}}, wdata, {32{1'b0}}}; + wr_wstrb = {8'b0, wstrb, 4'b0}; + end + 2'b10: begin + wr_data = {32'b0, wdata, {64{1'b0}}}; + wr_wstrb = {4'b0, wstrb, 8'b0}; + end + 2'b11: begin + wr_data = {wdata, {96{1'b0}}}; + wr_wstrb = {wstrb, 12'b0}; + end endcase - wr_wstrb = wstrb; end end endcase @@ -181,7 +152,7 @@ module dummy_dcache ( if (ret_valid) begin addr_ok = 1; data_ok = 1; - rdata = ret_data[rd_addr_r[3:2]*32+:32]; + rdata = ret_data[cpu_addr[3:2]*32+:32]; end end WRITE_REQ: begin From 9ac03b8834f9e5fc592019d00facf2a61e6cf77b Mon Sep 17 00:00:00 2001 From: Easton Man Date: Wed, 1 Jun 2022 19:44:56 +0800 Subject: [PATCH 39/41] fix: fix addr from mem[1] --- src/vsrc/cpu_top.sv | 6 +++--- src/vsrc/dummy_dcache.sv | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/vsrc/cpu_top.sv b/src/vsrc/cpu_top.sv index 3b9f006..c8d7a36 100644 --- a/src/vsrc/cpu_top.sv +++ b/src/vsrc/cpu_top.sv @@ -190,9 +190,9 @@ module cpu_top ( assign mem_cache_ce = mem_cache_signal[0].ce | mem_cache_signal[1].ce; assign mem_cache_we = mem_cache_signal[0].we | mem_cache_signal[1].we; - assign mem_cache_sel = mem_cache_signal[0].we ? mem_cache_signal[0].sel : mem_cache_signal[0].we ? mem_cache_signal[1].sel : 0; - assign mem_cache_addr = mem_cache_signal[0].addr; - assign mem_cache_data = mem_cache_signal[0].we ? mem_cache_signal[0].data : mem_cache_signal[0].we ? mem_cache_signal[1].data : 0; + assign mem_cache_sel = mem_cache_signal[0].we ? mem_cache_signal[0].sel : mem_cache_signal[1].we ? mem_cache_signal[1].sel : 0; + assign mem_cache_addr = mem_cache_signal[0].addr | mem_cache_signal[1].addr; + assign mem_cache_data = mem_cache_signal[0].we ? mem_cache_signal[0].data : mem_cache_signal[1].we ? mem_cache_signal[1].data : 0; dummy_dcache u_dcache( .clk (clk ), diff --git a/src/vsrc/dummy_dcache.sv b/src/vsrc/dummy_dcache.sv index 654abea..cef0855 100644 --- a/src/vsrc/dummy_dcache.sv +++ b/src/vsrc/dummy_dcache.sv @@ -117,7 +117,6 @@ module dummy_dcache ( WRITE_REQ: begin if (wr_rdy) begin wr_req = 1; - // wr_addr = cpu_addr; wr_addr = {cpu_addr[31:4], 4'b0}; // Keep addr aligned case (cpu_addr[3:2]) 2'b00: begin From df6b213217d6eeb44a5ccbd1522dca7d546e7675 Mon Sep 17 00:00:00 2001 From: 250HandsomeLiang <2502481961@qq.com> Date: Wed, 1 Jun 2022 20:06:38 +0800 Subject: [PATCH 40/41] fix wr_rdy_signal delay --- src/vsrc/AXI/axi_master.sv | 2 +- src/vsrc/dummy_dcache.sv | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/vsrc/AXI/axi_master.sv b/src/vsrc/AXI/axi_master.sv index 6772f93..1a3080c 100644 --- a/src/vsrc/AXI/axi_master.sv +++ b/src/vsrc/AXI/axi_master.sv @@ -65,7 +65,7 @@ module axi_master ( //w output wire [`ID] s_wid, output reg [`Data] s_wdata, - output wire [15:0] s_wstrb, //字节选通位和sel差不多 + output reg [15:0] s_wstrb, //字节选通位和sel差不多 output reg s_wlast, output reg s_wvalid, input wire s_wready, diff --git a/src/vsrc/dummy_dcache.sv b/src/vsrc/dummy_dcache.sv index 654abea..36e8471 100644 --- a/src/vsrc/dummy_dcache.sv +++ b/src/vsrc/dummy_dcache.sv @@ -92,6 +92,14 @@ module dummy_dcache ( end endcase end + + //delay wr_rdy one cycle + logic reg_wr_rdy; + always_ff @(posedge clk)begin + if(rst) reg_wr_rdy<=0; + else reg_wr_rdy<=wr_rdy; + end + assign rd_type = 3'b010; // word assign wr_type = 3'b010; // word always_comb begin @@ -115,7 +123,7 @@ module dummy_dcache ( rd_addr = rd_addr_r; end WRITE_REQ: begin - if (wr_rdy) begin + if (reg_wr_rdy) begin wr_req = 1; // wr_addr = cpu_addr; wr_addr = {cpu_addr[31:4], 4'b0}; // Keep addr aligned @@ -156,7 +164,7 @@ module dummy_dcache ( end end WRITE_REQ: begin - if (wr_rdy) begin + if (reg_wr_rdy) begin addr_ok = 1; data_ok = 1; end From 5ee683ea6f10c1525f7997e31884351bc3cec649 Mon Sep 17 00:00:00 2001 From: Easton Man Date: Wed, 1 Jun 2022 20:27:41 +0800 Subject: [PATCH 41/41] fix: fix dispatch flushed during stall --- src/vsrc/pipeline/2_dispatch/dispatch.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vsrc/pipeline/2_dispatch/dispatch.sv b/src/vsrc/pipeline/2_dispatch/dispatch.sv index ff9c932..4cf4b04 100644 --- a/src/vsrc/pipeline/2_dispatch/dispatch.sv +++ b/src/vsrc/pipeline/2_dispatch/dispatch.sv @@ -152,10 +152,10 @@ module dispatch #( always_ff @(posedge clk or negedge rst_n) begin : dispatch_ff if (!rst_n) begin exe_o[i] <= 0; - end else if (flush) begin - exe_o[i] <= 0; end else if (stall) begin // Do nothing, hold output + end else if (flush) begin + exe_o[i] <= 0; end else if (issue_valid[i]) begin // Pass through to EXE