Merge branch 'inscache' of ssh://forgejo@git.insignificance.de:42122/slederer/Tridora-CPU.git

# Conflicts:
#	examples/benchmarks.results.text
This commit is contained in:
slederer 2025-04-13 23:21:38 +02:00
commit a060b65bb9
14 changed files with 168 additions and 109 deletions

View file

@ -216,3 +216,5 @@ set_property -dict {PACKAGE_PIN C2 IOSTANDARD LVCMOS33} [get_ports rst]
#set_property -dict { PACKAGE_PIN A15 IOSTANDARD LVCMOS33 } [get_ports { isns0v95_p }]; #IO_L8P_T1_AD10P_15 Sch=ad_p[10]
set_property BITSTREAM.GENERAL.COMPRESS True [current_design]
set_max_delay -from [get_pins vgafb0/display_timings_inst/o_vblank_reg/C] -to [get_pins vgafb0/vblank_xfer_reg/D] 3.000

View file

@ -17,7 +17,9 @@ module cpu_clkgen(
.CLKFBOUT_PHASE(0.0), // Phase offset in degrees of CLKFB (-360.000-360.000).
.CLKIN1_PERIOD(10.0), // Input clock period in ns to ps resolution (i.e. 33.333 is 30 MHz).
// CLKOUT0_DIVIDE - CLKOUT6_DIVIDE: Divide amount for each CLKOUT (1-128)
.CLKOUT0_DIVIDE_F(12.0), // Divide amount for CLKOUT0 (1.000-128.000).
// CPU Clock: 12.0 = 83.33MHz CPU Clock, 333.33MHz Memory Clock
// 13.0 = 76.92MHz CPU Clock, 307.69MHz Memory Clock
.CLKOUT0_DIVIDE_F(13.0), // Divide amount for CLKOUT0 (1.000-128.000).
.CLKOUT1_DIVIDE(5),
.CLKOUT2_DIVIDE(40), // 40 = 25MHz pixel clock (should be 25.175MHz per spec) for 640x480
//.CLKOUT2_DIVIDE(25), // 25 = 40MHz pixel clock for 800x600

View file

@ -8,6 +8,7 @@ module dram_bridge #(ADDR_WIDTH = 32, WIDTH = 32)
input wire [WIDTH-1:0] mem_write_data,
input wire mem_read_enable,
input wire mem_write_enable,
input wire mem_read_ins,
output wire mem_wait,
input wire rst_n,
@ -105,33 +106,38 @@ module dram_bridge #(ADDR_WIDTH = 32, WIDTH = 32)
.sys_rst (rst_n)
);
// reg [DRAM_DATA_WIDTH-1:0] read_cache;
// reg [ADDR_WIDTH-1:0] cached_addr;
// wire cache_hit = cached_addr == mem_addr;
// wire [DRAM_DATA_WIDTH-1:0] read_data_wrapper = cache_hit ? read_cache : app_rd_data;
(*KEEP*) reg [DRAM_DATA_WIDTH-1:0] ins_cache;
(*KEEP*) reg [DRAM_ADDR_WIDTH-1:4] cached_addr;
(*KEEP*) wire cache_hit = mem_read_enable && mem_read_ins && (cached_addr == mem_addr[DRAM_ADDR_WIDTH-1:4]);
reg [WIDTH-1:0] read_buf;
reg read_inprogress = 0;
wire dram_read_enable = mem_read_enable && !cache_hit;
assign app_rd_data_end = 1'b1;
//assign app_wdf_mask = 16'b1111111111111100;
// addresses on the memory interface are aligned to 16 bytes
// and 28 bits wide (=256MB)
assign app_addr = { mem_addr[DRAM_ADDR_WIDTH:4], 4'b0000 };
//assign app_addr = { 28'b0 };
// select a word from the 128 bits transferred by the dram controller
// according to the lower bits of the address (ignoring bits 1:0)
wire [WIDTH-1:0] read_word;
wire [1:0] word_sel = mem_addr[3:2];
assign read_word = word_sel == 3'b11 ? app_rd_data[31:0] :
wire [WIDTH-1:0] read_word =
word_sel == 3'b11 ? app_rd_data[31:0] :
word_sel == 3'b10 ? app_rd_data[63:32] :
word_sel == 3'b01 ? app_rd_data[95:64] :
app_rd_data[127:96];
assign mem_read_data = app_rd_data_valid ? read_word : read_buf;
wire [WIDTH-1:0] read_cached_word =
word_sel == 3'b11 ? ins_cache[31:0] :
word_sel == 3'b10 ? ins_cache[63:32] :
word_sel == 3'b01 ? ins_cache[95:64] :
ins_cache[127:96];
(*KEEP*) assign mem_read_data = cache_hit ? read_cached_word :
app_rd_data_valid ? read_word : read_buf;
// set the write mask according to the lower bits of the address
// (ignoring bit 0)
@ -145,21 +151,34 @@ module dram_bridge #(ADDR_WIDTH = 32, WIDTH = 32)
assign app_wdf_end = mem_write_enable & write_ready;
assign app_wdf_data = { {4{mem_write_data}} };
assign mem_wait = (mem_read_enable & ~read_inprogress) |
assign mem_wait = (dram_read_enable & ~read_inprogress) |
(mem_write_enable & (~app_wdf_rdy | ~app_rdy)) |
(read_inprogress & ~app_rd_data_valid);
assign app_en = (mem_read_enable & ~read_inprogress) |
assign app_en = (dram_read_enable & ~read_inprogress) |
(mem_write_enable & write_ready);
assign app_cmd = mem_read_enable ? CMD_READ : CMD_WRITE;
assign app_cmd = dram_read_enable ? CMD_READ : CMD_WRITE;
always @(posedge dram_front_clk)
begin
if(mem_read_enable & ~read_inprogress & app_rdy)
if(dram_read_enable && mem_read_ins && app_rd_data_valid)
begin
ins_cache <= app_rd_data;
cached_addr <= mem_addr[DRAM_ADDR_WIDTH-1:4];
end
end
always @(posedge dram_front_clk)
begin
if(dram_read_enable & ~read_inprogress & app_rdy)
read_inprogress <= 1;
if(read_inprogress & app_rd_data_valid)
read_inprogress <= 0;
if(mem_read_enable & app_rd_data_valid)
if(dram_read_enable & app_rd_data_valid)
read_buf <= mem_read_data;
else
if (mem_read_enable & cache_hit)
read_buf <= read_cached_word;
end
endmodule

View file

@ -39,12 +39,12 @@
<Controller number="0">
<MemoryDevice>DDR3_SDRAM/Components/MT41K128M16XX-15E</MemoryDevice>
<TimePeriod>3000</TimePeriod>
<TimePeriod>3300</TimePeriod>
<VccAuxIO>1.8V</VccAuxIO>
<PHYRatio>4:1</PHYRatio>
<InputClkFreq>83.333</InputClkFreq>
<InputClkFreq>75.757</InputClkFreq>
<UIExtraClocks>0</UIExtraClocks>
<MMCM_VCO>666</MMCM_VCO>
<MMCM_VCO>606</MMCM_VCO>
<MMCMClkOut0> 1.000</MMCMClkOut0>
<MMCMClkOut1>1</MMCMClkOut1>
<MMCMClkOut2>1</MMCMClkOut2>

View file

@ -39,12 +39,12 @@
<Controller number="0">
<MemoryDevice>DDR3_SDRAM/Components/MT41K128M16XX-15E</MemoryDevice>
<TimePeriod>3000</TimePeriod>
<TimePeriod>3250</TimePeriod>
<VccAuxIO>1.8V</VccAuxIO>
<PHYRatio>4:1</PHYRatio>
<InputClkFreq>83.333</InputClkFreq>
<InputClkFreq>76.923</InputClkFreq>
<UIExtraClocks>0</UIExtraClocks>
<MMCM_VCO>666</MMCM_VCO>
<MMCM_VCO>615</MMCM_VCO>
<MMCMClkOut0> 1.000</MMCMClkOut0>
<MMCMClkOut1>1</MMCMClkOut1>
<MMCMClkOut2>1</MMCMClkOut2>

View file

@ -107,7 +107,7 @@ module sdspi(
tx_fifo_empty
);
fifo #(.ADDR_WIDTH(8)) rx_fifo(clk, reset,
fifo #(.ADDR_WIDTH(10)) rx_fifo(clk, reset,
rx_fifo_wr_en, rx_fifo_rd_en,
rx_shifter, rx_fifo_out,
rx_fifo_full,

View file

@ -11,20 +11,14 @@ module stackcpu #(parameter ADDR_WIDTH = 32, WIDTH = 32,
output reg [ADDR_WIDTH-1:0] addr,
input wire [WIDTH-1:0] data_in,
output wire read_enable,
output wire read_ins,
output wire [WIDTH-1:0] data_out,
output wire write_enable,
input wire mem_wait,
output wire led1,
output wire led2,
output wire led3,
output wire [WIDTH-1:0] debug_out1,
output wire [WIDTH-1:0] debug_out2,
output wire [WIDTH-1:0] debug_out3,
output wire [WIDTH-1:0] debug_out4,
output wire [WIDTH-1:0] debug_out5,
output wire [WIDTH-1:0] debug_out6
output wire led3
);
localparam EVAL_STACK_INDEX_WIDTH = 6;
@ -182,6 +176,8 @@ module stackcpu #(parameter ADDR_WIDTH = 32, WIDTH = 32,
assign mem_read_enable = (seq_state == FETCH) || (seq_state == EXEC && mem_read);
assign mem_write_enable = (seq_state == MEM && mem_write);
assign read_ins = (seq_state == FETCH) || (seq_state == DECODE);
initial
begin
PC <= 0; nPC <= 0; seq_state <= MEM;

View file

@ -3,7 +3,8 @@
// or as clk_1hz for debugging
`define clock cpuclk
`define clkfreq 83333333
//`define clkfreq 83333333
`define clkfreq 76923076
//`define clock clk
//`define clkfreq 100000000
//`define clock clk_1hz
@ -67,10 +68,11 @@ module top(
localparam ADDR_WIDTH = 32, WIDTH = 32,
ROMADDR_WIDTH = 11, IOADDR_WIDTH = 11, IOADDR_SEL = 4;
wire [ADDR_WIDTH-1:0] mem_addr;
(* KEEP *) wire [ADDR_WIDTH-1:0] mem_addr;
wire [WIDTH-1:0] mem_read_data;
wire [WIDTH-1:0] mem_write_data;
(* KEEP *) wire mem_wait;
assign led0 = mem_wait;
(* KEEP *) wire mem_read_enable;
(* KEEP *) wire mem_write_enable;
@ -80,14 +82,6 @@ module top(
wire irq;
// assign led0 = mem_wait;
wire [WIDTH-1:0] debug_data1, debug_data2,
debug_data3, debug_data4,
debug_data5, debug_data6;
assign led0 = debug_data6[0];
wire cpuclk, cpuclk_locked;
wire dram_refclk200;
wire pixclk;
@ -97,9 +91,11 @@ module top(
wire [ADDR_WIDTH-1:0] dram_addr;
wire [WIDTH-1:0] dram_read_data, dram_write_data;
wire dram_read_enable, dram_write_enable, dram_wait;
(* KEEP *) wire dram_read_ins;
dram_bridge dram_bridge0 (dram_addr,
dram_read_data, dram_write_data, dram_read_enable, dram_write_enable, dram_wait,
dram_read_data, dram_write_data, dram_read_enable, dram_write_enable,
dram_read_ins, dram_wait,
rst, cpuclk, dram_refclk200,
ddr3_dq, ddr3_dqs_n, ddr3_dqs_p, ddr3_addr,
ddr3_ba, ddr3_ras_n, ddr3_cas_n, ddr3_we_n,
@ -254,15 +250,10 @@ module top(
stackcpu cpu0(.clk(`clock), .rst(rst), .irq(irq),
.addr(mem_addr),
.data_in(mem_read_data), .read_enable(mem_read_enable),
.read_ins(dram_read_ins),
.data_out(mem_write_data), .write_enable(mem_write_enable),
.mem_wait(mem_wait),
.led1(led1), .led2(led2), .led3(led3),
.debug_out1(debug_data1),
.debug_out2(debug_data2),
.debug_out3(debug_data3),
.debug_out4(debug_data4),
.debug_out5(debug_data5),
.debug_out6(debug_data6));
.led1(led1), .led2(led2), .led3(led3));
// Interrupt Controller
irqctrl irqctrl0(`clock, irq_in, irqc_cs, mem_write_enable,

View file

@ -62,10 +62,11 @@ module display_timings #(
// o_scanline: high for one tick at the start of each visible scanline
assign o_scanline = (o_sy >= VA_STA) && (o_sy <= VA_END) && (o_sx == H_STA);
// set vblank at end of frame, clear at start
always @(posedge i_pix_clk)
begin
if(o_frame) o_vblank <= 1;
else if (o_de) o_vblank <= 0;
if(o_sy == VA_END) o_vblank <= 1;
else if (o_sy == -1) o_vblank <= 0;
end
always @ (posedge i_pix_clk)
@ -175,6 +176,7 @@ module vgafb #(VMEM_ADDR_WIDTH = 15, VMEM_DATA_WIDTH = 32) (
wire scanline; // scanline start
wire vblank; // vertical blank
reg vblank_buf; // vertical blank in cpu clock domain
reg vblank_xfer; // vertical blank clock domain crossing
display_timings #( // 640x480 800x600 1280x720 1920x1080
`ifdef RES_1024_768
@ -233,7 +235,7 @@ module vgafb #(VMEM_ADDR_WIDTH = 15, VMEM_DATA_WIDTH = 32) (
always @(posedge pix_clk) frame_d <= frame;
always @(posedge cpu_clk) vblank_buf <= vblank;
always @(posedge cpu_clk) { vblank_buf, vblank_xfer } <= { vblank_xfer, vblank };
always @(posedge cpu_clk)
begin