tridoracpu: implement data cache

This commit is contained in:
slederer 2025-09-15 23:02:22 +02:00
parent b2c2e8dc0c
commit 278f90a464
3 changed files with 138 additions and 38 deletions

View file

@ -45,6 +45,7 @@ Running benchmarks.prog
exp() 10K 00:00:29 exp() 10K 00:00:29
cos() 10K 00:00:06 cos() 10K 00:00:06
--------------------------------------
Arty-A7-35T Arty-A7-35T
76.92MHz, 64KB SRAM, 256MB DRAM 76.92MHz, 64KB SRAM, 256MB DRAM
running in DRAM (except corelib, stdlib, runtime) running in DRAM (except corelib, stdlib, runtime)
@ -68,7 +69,7 @@ Running benchmarks.prog
exp() 10K 00:00:32 exp() 10K 00:00:32
cos() 10K 00:00:06 cos() 10K 00:00:06
--------------------------------------
Arty-A7-35T Arty-A7-35T
76.92MHz, 64KB SRAM, 256MB DRAM, 16B instruction cache 76.92MHz, 64KB SRAM, 256MB DRAM, 16B instruction cache
running in DRAM (except corelib, stdlib, runtime) running in DRAM (except corelib, stdlib, runtime)
@ -91,3 +92,57 @@ Running benchmarks.prog
array copy 128k 1K 00:00:48 array copy 128k 1K 00:00:48
exp() 10K 00:00:32 exp() 10K 00:00:32
cos() 10K 00:00:06 cos() 10K 00:00:06
--------------------------------------
Arty-A7-35T
76.92MHz, 64KB SRAM, 256MB DRAM,
16B instruction cache, 16B wt data cache
running in DRAM (except corelib, stdlib, runtime)
Running benchmarks.prog
empty loop 10M 00:00:07
write variable 10M 00:00:17
read variable 10M 00:00:20
integer addition 10M 00:00:20
real addition 1M 00:00:28
integer multiplication 1M 00:01:11
real multiplication 1M 00:00:59
integer division 1M 00:01:36
real division 1M 00:01:05
string indexing 1M 00:00:39
string iteration 1M 00:00:19
new/dispose 1k 1M 00:00:19
new/dispose 128k 1M 00:00:19
array copy 1k 10K 00:00:03
array copy 128k 1K 00:00:39
exp() 10K 00:00:26
cos() 10K 00:00:05
--------------------------------------
Arty-A7-35T
76.92MHz, 64KB SRAM, 256MB DRAM,
16B instruction cache, 16B wb data cache
running in DRAM (except corelib, stdlib, runtime)
Running benchmarks.prog
empty loop 10M 00:00:04
write variable 10M 00:00:11
read variable 10M 00:00:18
integer addition 10M 00:00:18
real addition 1M 00:00:27
integer multiplication 1M 00:00:49
real multiplication 1M 00:00:58
integer division 1M 00:01:06
real division 1M 00:01:04
string indexing 1M 00:00:36
string iteration 1M 00:00:19
new/dispose 1k 1M 00:00:18
new/dispose 128k 1M 00:00:18
array copy 1k 10K 00:00:03
array copy 128k 1K 00:00:39
exp() 10K 00:00:25
cos() 10K 00:00:05

View file

@ -107,8 +107,14 @@ module dram_bridge #(ADDR_WIDTH = 32, WIDTH = 32)
); );
(*KEEP*) reg [DRAM_DATA_WIDTH-1:0] ins_cache; (*KEEP*) reg [DRAM_DATA_WIDTH-1:0] ins_cache;
(*KEEP*) reg [DRAM_ADDR_WIDTH-1:4] cached_addr; (*KEEP*) reg [DRAM_ADDR_WIDTH-1:4] icached_addr;
(*KEEP*) wire cache_hit = mem_read_enable && mem_read_ins && (cached_addr == mem_addr[DRAM_ADDR_WIDTH-1:4]); (*KEEP*) wire icache_hit = mem_read_enable && mem_read_ins && (icached_addr == mem_addr[DRAM_ADDR_WIDTH-1:4]);
(*KEEP*) reg [DRAM_DATA_WIDTH-1:0] d_cache;
(*KEEP*) reg [DRAM_ADDR_WIDTH-1:4] dcached_addr;
(*KEEP*) wire dcache_hit = mem_read_enable && !mem_read_ins && (dcached_addr == mem_addr[DRAM_ADDR_WIDTH-1:4]);
wire cache_hit = icache_hit | dcache_hit;
reg [WIDTH-1:0] read_buf; reg [WIDTH-1:0] read_buf;
reg read_inprogress = 0; reg read_inprogress = 0;
@ -125,25 +131,32 @@ module dram_bridge #(ADDR_WIDTH = 32, WIDTH = 32)
wire [1:0] word_sel = mem_addr[3:2]; wire [1:0] word_sel = mem_addr[3:2];
wire [WIDTH-1:0] read_word = wire [WIDTH-1:0] read_word =
word_sel == 3'b11 ? app_rd_data[31:0] : word_sel == 2'b11 ? app_rd_data[31:0] :
word_sel == 3'b10 ? app_rd_data[63:32] : word_sel == 2'b10 ? app_rd_data[63:32] :
word_sel == 3'b01 ? app_rd_data[95:64] : word_sel == 2'b01 ? app_rd_data[95:64] :
app_rd_data[127:96]; app_rd_data[127:96];
wire [WIDTH-1:0] read_cached_word = wire [WIDTH-1:0] read_icached_word =
word_sel == 3'b11 ? ins_cache[31:0] : word_sel == 2'b11 ? ins_cache[31:0] :
word_sel == 3'b10 ? ins_cache[63:32] : word_sel == 2'b10 ? ins_cache[63:32] :
word_sel == 3'b01 ? ins_cache[95:64] : word_sel == 2'b01 ? ins_cache[95:64] :
ins_cache[127:96]; ins_cache[127:96];
(*KEEP*) assign mem_read_data = cache_hit ? read_cached_word : wire [WIDTH-1:0] read_dcached_word =
word_sel == 2'b11 ? d_cache[31:0] :
word_sel == 2'b10 ? d_cache[63:32] :
word_sel == 2'b01 ? d_cache[95:64] :
d_cache[127:96];
(*KEEP*) assign mem_read_data = icache_hit ? read_icached_word :
dcache_hit ? read_dcached_word :
app_rd_data_valid ? read_word : read_buf; app_rd_data_valid ? read_word : read_buf;
// set the write mask according to the lower bits of the address // set the write mask according to the lower bits of the address
// (ignoring bit 0) // (ignoring bit 0)
assign app_wdf_mask = word_sel == 3'b11 ? 16'b1111111111110000 : assign app_wdf_mask = word_sel == 2'b11 ? 16'b1111111111110000 :
word_sel == 3'b10 ? 16'b1111111100001111 : word_sel == 2'b10 ? 16'b1111111100001111 :
word_sel == 3'b01 ? 16'b1111000011111111 : word_sel == 2'b01 ? 16'b1111000011111111 :
16'b0000111111111111 ; 16'b0000111111111111 ;
wire write_ready = mem_write_enable & app_wdf_rdy & app_rdy; wire write_ready = mem_write_enable & app_wdf_rdy & app_rdy;
@ -152,7 +165,7 @@ module dram_bridge #(ADDR_WIDTH = 32, WIDTH = 32)
assign app_wdf_data = { {4{mem_write_data}} }; assign app_wdf_data = { {4{mem_write_data}} };
assign mem_wait = (dram_read_enable & ~read_inprogress) | assign mem_wait = (dram_read_enable & ~read_inprogress) |
(mem_write_enable & (~app_wdf_rdy | ~app_rdy)) | (mem_write_enable & ~dcache_hit & (~app_wdf_rdy | ~app_rdy)) |
(read_inprogress & ~app_rd_data_valid); (read_inprogress & ~app_rd_data_valid);
assign app_en = (dram_read_enable & ~read_inprogress) | assign app_en = (dram_read_enable & ~read_inprogress) |
@ -160,25 +173,58 @@ module dram_bridge #(ADDR_WIDTH = 32, WIDTH = 32)
assign app_cmd = dram_read_enable ? CMD_READ : CMD_WRITE; assign app_cmd = dram_read_enable ? CMD_READ : CMD_WRITE;
/* set instruction cache */
always @(posedge dram_front_clk) always @(posedge dram_front_clk)
begin begin
if(dram_read_enable && mem_read_ins && app_rd_data_valid) if(dram_read_enable && mem_read_ins && app_rd_data_valid)
begin begin
ins_cache <= app_rd_data; ins_cache <= app_rd_data;
cached_addr <= mem_addr[DRAM_ADDR_WIDTH-1:4]; icached_addr <= mem_addr[DRAM_ADDR_WIDTH-1:4];
end end
end end
/* set data cache */
always @(posedge dram_front_clk)
begin
if(dram_read_enable && !mem_read_ins && app_rd_data_valid)
begin
d_cache <= app_rd_data;
dcached_addr <= mem_addr[DRAM_ADDR_WIDTH-1:4];
end
/* write-through cache - invalidate on write */
/* invalidate data cache on write */
// if(mem_write_enable && dcached_addr == mem_addr[DRAM_ADDR_WIDTH-1:4])
// dcached_addr <= {DRAM_ADDR_WIDTH-4{1'b1}};
/* write-back cache - update cache on write */
// write back to data cache on mem_write
if(mem_write_enable && dcached_addr == mem_addr[DRAM_ADDR_WIDTH-1:4])
begin
case(word_sel)
2'b11: d_cache[31:0] <= mem_write_data;
2'b10: d_cache[63:32] <= mem_write_data;
2'b01: d_cache[95:64] <= mem_write_data;
2'b00: d_cache[127:96] <= mem_write_data;
endcase
end
end
/* transfer read data, either from cache or from DRAM */
always @(posedge dram_front_clk) always @(posedge dram_front_clk)
begin begin
if(dram_read_enable & ~read_inprogress & app_rdy) if(dram_read_enable & ~read_inprogress & app_rdy)
read_inprogress <= 1; read_inprogress <= 1;
if(read_inprogress & app_rd_data_valid) if(read_inprogress & app_rd_data_valid)
read_inprogress <= 0; read_inprogress <= 0;
if(dram_read_enable & app_rd_data_valid) if(dram_read_enable & app_rd_data_valid)
read_buf <= mem_read_data; read_buf <= mem_read_data;
else else
if (mem_read_enable & cache_hit) if (mem_read_enable & icache_hit)
read_buf <= read_cached_word; read_buf <= read_icached_word;
else
if (mem_read_enable & dcache_hit)
read_buf <= read_dcached_word;
end end
endmodule endmodule

View file

@ -349,15 +349,22 @@
</Simulator> </Simulator>
</Simulators> </Simulators>
<Runs Version="1" Minor="22"> <Runs Version="1" Minor="22">
<Run Id="synth_1" Type="Ft3:Synth" SrcSet="sources_1" Part="xc7a35ticsg324-1L" ConstrsSet="constrs_1" Description="Vivado Synthesis Defaults" AutoIncrementalCheckpoint="false" WriteIncrSynthDcp="false" State="current" Dir="$PRUNDIR/synth_1" IncludeInArchive="true" IsChild="false" AutoIncrementalDir="$PSRCDIR/utils_1/imports/synth_1" AutoRQSDir="$PSRCDIR/utils_1/imports/synth_1" ParallelReportGen="true"> <Run Id="synth_1" Type="Ft3:Synth" SrcSet="sources_1" Part="xc7a35ticsg324-1L" ConstrsSet="constrs_1" Description="Higher performance designs, resource sharing is turned off, the global fanout guide is set to a lower number, FSM extraction forced to one-hot, LUT combining is disabled, equivalent registers are preserved, SRL are inferred with a larger threshold" AutoIncrementalCheckpoint="false" WriteIncrSynthDcp="false" State="current" Dir="$PRUNDIR/synth_1" IncludeInArchive="true" IsChild="false" AutoIncrementalDir="$PSRCDIR/utils_1/imports/synth_1" AutoRQSDir="$PSRCDIR/utils_1/imports/synth_1" ParallelReportGen="true">
<Strategy Version="1" Minor="2"> <Strategy Version="1" Minor="2">
<StratHandle Name="Vivado Synthesis Defaults" Flow="Vivado Synthesis 2020"> <StratHandle Name="Flow_PerfOptimized_high" Flow="Vivado Synthesis 2024">
<Desc>Vivado Synthesis Defaults</Desc> <Desc>Higher performance designs, resource sharing is turned off, the global fanout guide is set to a lower number, FSM extraction forced to one-hot, LUT combining is disabled, equivalent registers are preserved, SRL are inferred with a larger threshold</Desc>
</StratHandle> </StratHandle>
<Step Id="synth_design"/> <Step Id="synth_design">
<Option Id="Directive">7</Option>
<Option Id="FsmExtraction">1</Option>
<Option Id="KeepEquivalentRegisters">1</Option>
<Option Id="NoCombineLuts">1</Option>
<Option Id="ResourceSharing">2</Option>
<Option Id="ShregMinSize">5</Option>
</Step>
</Strategy> </Strategy>
<GeneratedRun Dir="$PRUNDIR" File="gen_run.xml"/> <GeneratedRun Dir="$PRUNDIR" File="gen_run.xml"/>
<ReportStrategy Name="Vivado Synthesis Default Reports" Flow="Vivado Synthesis 2020"/> <ReportStrategy Name="Vivado Synthesis Default Reports" Flow="Vivado Synthesis 2024"/>
<Report Name="ROUTE_DESIGN.REPORT_METHODOLOGY" Enabled="1"/> <Report Name="ROUTE_DESIGN.REPORT_METHODOLOGY" Enabled="1"/>
<RQSFiles/> <RQSFiles/>
</Run> </Run>
@ -371,26 +378,18 @@
<Report Name="ROUTE_DESIGN.REPORT_METHODOLOGY" Enabled="1"/> <Report Name="ROUTE_DESIGN.REPORT_METHODOLOGY" Enabled="1"/>
<RQSFiles/> <RQSFiles/>
</Run> </Run>
<Run Id="impl_1" Type="Ft2:EntireDesign" Part="xc7a35ticsg324-1L" ConstrsSet="constrs_1" Description="Best predicted directive for place_design." AutoIncrementalCheckpoint="false" WriteIncrSynthDcp="false" State="current" Dir="$PRUNDIR/impl_1" SynthRun="synth_1" IncludeInArchive="true" IsChild="false" GenFullBitstream="true" AutoIncrementalDir="$PSRCDIR/utils_1/imports/impl_1" LaunchOptions="-jobs 6 " AutoRQSDir="$PSRCDIR/utils_1/imports/impl_1" ParallelReportGen="true"> <Run Id="impl_1" Type="Ft2:EntireDesign" Part="xc7a35ticsg324-1L" ConstrsSet="constrs_1" Description="Default settings for Implementation." AutoIncrementalCheckpoint="false" WriteIncrSynthDcp="false" State="current" Dir="$PRUNDIR/impl_1" SynthRun="synth_1" IncludeInArchive="true" IsChild="false" GenFullBitstream="true" AutoIncrementalDir="$PSRCDIR/utils_1/imports/impl_1" LaunchOptions="-jobs 6 " AutoRQSDir="$PSRCDIR/utils_1/imports/impl_1" ParallelReportGen="true">
<Strategy Version="1" Minor="2"> <Strategy Version="1" Minor="2">
<StratHandle Name="Performance_Auto_1" Flow="Vivado Implementation 2024"> <StratHandle Name="Vivado Implementation Defaults" Flow="Vivado Implementation 2024">
<Desc>Best predicted directive for place_design.</Desc> <Desc>Default settings for Implementation.</Desc>
</StratHandle> </StratHandle>
<Step Id="init_design"/> <Step Id="init_design"/>
<Step Id="opt_design"> <Step Id="opt_design"/>
<Option Id="Directive">0</Option>
</Step>
<Step Id="power_opt_design"/> <Step Id="power_opt_design"/>
<Step Id="place_design"> <Step Id="place_design"/>
<Option Id="Directive">20</Option>
</Step>
<Step Id="post_place_power_opt_design"/> <Step Id="post_place_power_opt_design"/>
<Step Id="phys_opt_design"> <Step Id="phys_opt_design"/>
<Option Id="Directive">2</Option> <Step Id="route_design"/>
</Step>
<Step Id="route_design">
<Option Id="Directive">1</Option>
</Step>
<Step Id="post_route_phys_opt_design"/> <Step Id="post_route_phys_opt_design"/>
<Step Id="write_bitstream"> <Step Id="write_bitstream">
<Option Id="BinFile">1</Option> <Option Id="BinFile">1</Option>