tridoracpu: implement data cache
This commit is contained in:
parent
b2c2e8dc0c
commit
278f90a464
3 changed files with 138 additions and 38 deletions
|
|
@ -45,6 +45,7 @@ Running benchmarks.prog
|
|||
exp() 10K 00:00:29
|
||||
cos() 10K 00:00:06
|
||||
|
||||
--------------------------------------
|
||||
Arty-A7-35T
|
||||
76.92MHz, 64KB SRAM, 256MB DRAM
|
||||
running in DRAM (except corelib, stdlib, runtime)
|
||||
|
|
@ -68,7 +69,7 @@ Running benchmarks.prog
|
|||
exp() 10K 00:00:32
|
||||
cos() 10K 00:00:06
|
||||
|
||||
|
||||
--------------------------------------
|
||||
Arty-A7-35T
|
||||
76.92MHz, 64KB SRAM, 256MB DRAM, 16B instruction cache
|
||||
running in DRAM (except corelib, stdlib, runtime)
|
||||
|
|
@ -91,3 +92,57 @@ Running benchmarks.prog
|
|||
array copy 128k 1K 00:00:48
|
||||
exp() 10K 00:00:32
|
||||
cos() 10K 00:00:06
|
||||
|
||||
--------------------------------------
|
||||
Arty-A7-35T
|
||||
76.92MHz, 64KB SRAM, 256MB DRAM,
|
||||
16B instruction cache, 16B wt data cache
|
||||
running in DRAM (except corelib, stdlib, runtime)
|
||||
|
||||
Running benchmarks.prog
|
||||
empty loop 10M 00:00:07
|
||||
write variable 10M 00:00:17
|
||||
read variable 10M 00:00:20
|
||||
integer addition 10M 00:00:20
|
||||
real addition 1M 00:00:28
|
||||
integer multiplication 1M 00:01:11
|
||||
real multiplication 1M 00:00:59
|
||||
integer division 1M 00:01:36
|
||||
real division 1M 00:01:05
|
||||
string indexing 1M 00:00:39
|
||||
string iteration 1M 00:00:19
|
||||
new/dispose 1k 1M 00:00:19
|
||||
new/dispose 128k 1M 00:00:19
|
||||
array copy 1k 10K 00:00:03
|
||||
array copy 128k 1K 00:00:39
|
||||
exp() 10K 00:00:26
|
||||
cos() 10K 00:00:05
|
||||
|
||||
|
||||
|
||||
--------------------------------------
|
||||
Arty-A7-35T
|
||||
76.92MHz, 64KB SRAM, 256MB DRAM,
|
||||
16B instruction cache, 16B wb data cache
|
||||
running in DRAM (except corelib, stdlib, runtime)
|
||||
|
||||
Running benchmarks.prog
|
||||
empty loop 10M 00:00:04
|
||||
write variable 10M 00:00:11
|
||||
read variable 10M 00:00:18
|
||||
integer addition 10M 00:00:18
|
||||
real addition 1M 00:00:27
|
||||
integer multiplication 1M 00:00:49
|
||||
real multiplication 1M 00:00:58
|
||||
integer division 1M 00:01:06
|
||||
real division 1M 00:01:04
|
||||
string indexing 1M 00:00:36
|
||||
string iteration 1M 00:00:19
|
||||
new/dispose 1k 1M 00:00:18
|
||||
new/dispose 128k 1M 00:00:18
|
||||
array copy 1k 10K 00:00:03
|
||||
array copy 128k 1K 00:00:39
|
||||
exp() 10K 00:00:25
|
||||
cos() 10K 00:00:05
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -107,8 +107,14 @@ module dram_bridge #(ADDR_WIDTH = 32, WIDTH = 32)
|
|||
);
|
||||
|
||||
(*KEEP*) reg [DRAM_DATA_WIDTH-1:0] ins_cache;
|
||||
(*KEEP*) reg [DRAM_ADDR_WIDTH-1:4] cached_addr;
|
||||
(*KEEP*) wire cache_hit = mem_read_enable && mem_read_ins && (cached_addr == mem_addr[DRAM_ADDR_WIDTH-1:4]);
|
||||
(*KEEP*) reg [DRAM_ADDR_WIDTH-1:4] icached_addr;
|
||||
(*KEEP*) wire icache_hit = mem_read_enable && mem_read_ins && (icached_addr == mem_addr[DRAM_ADDR_WIDTH-1:4]);
|
||||
|
||||
(*KEEP*) reg [DRAM_DATA_WIDTH-1:0] d_cache;
|
||||
(*KEEP*) reg [DRAM_ADDR_WIDTH-1:4] dcached_addr;
|
||||
(*KEEP*) wire dcache_hit = mem_read_enable && !mem_read_ins && (dcached_addr == mem_addr[DRAM_ADDR_WIDTH-1:4]);
|
||||
|
||||
wire cache_hit = icache_hit | dcache_hit;
|
||||
|
||||
reg [WIDTH-1:0] read_buf;
|
||||
reg read_inprogress = 0;
|
||||
|
|
@ -125,25 +131,32 @@ module dram_bridge #(ADDR_WIDTH = 32, WIDTH = 32)
|
|||
wire [1:0] word_sel = mem_addr[3:2];
|
||||
|
||||
wire [WIDTH-1:0] read_word =
|
||||
word_sel == 3'b11 ? app_rd_data[31:0] :
|
||||
word_sel == 3'b10 ? app_rd_data[63:32] :
|
||||
word_sel == 3'b01 ? app_rd_data[95:64] :
|
||||
word_sel == 2'b11 ? app_rd_data[31:0] :
|
||||
word_sel == 2'b10 ? app_rd_data[63:32] :
|
||||
word_sel == 2'b01 ? app_rd_data[95:64] :
|
||||
app_rd_data[127:96];
|
||||
|
||||
wire [WIDTH-1:0] read_cached_word =
|
||||
word_sel == 3'b11 ? ins_cache[31:0] :
|
||||
word_sel == 3'b10 ? ins_cache[63:32] :
|
||||
word_sel == 3'b01 ? ins_cache[95:64] :
|
||||
wire [WIDTH-1:0] read_icached_word =
|
||||
word_sel == 2'b11 ? ins_cache[31:0] :
|
||||
word_sel == 2'b10 ? ins_cache[63:32] :
|
||||
word_sel == 2'b01 ? ins_cache[95:64] :
|
||||
ins_cache[127:96];
|
||||
|
||||
(*KEEP*) assign mem_read_data = cache_hit ? read_cached_word :
|
||||
wire [WIDTH-1:0] read_dcached_word =
|
||||
word_sel == 2'b11 ? d_cache[31:0] :
|
||||
word_sel == 2'b10 ? d_cache[63:32] :
|
||||
word_sel == 2'b01 ? d_cache[95:64] :
|
||||
d_cache[127:96];
|
||||
|
||||
(*KEEP*) assign mem_read_data = icache_hit ? read_icached_word :
|
||||
dcache_hit ? read_dcached_word :
|
||||
app_rd_data_valid ? read_word : read_buf;
|
||||
|
||||
// set the write mask according to the lower bits of the address
|
||||
// (ignoring bit 0)
|
||||
assign app_wdf_mask = word_sel == 3'b11 ? 16'b1111111111110000 :
|
||||
word_sel == 3'b10 ? 16'b1111111100001111 :
|
||||
word_sel == 3'b01 ? 16'b1111000011111111 :
|
||||
assign app_wdf_mask = word_sel == 2'b11 ? 16'b1111111111110000 :
|
||||
word_sel == 2'b10 ? 16'b1111111100001111 :
|
||||
word_sel == 2'b01 ? 16'b1111000011111111 :
|
||||
16'b0000111111111111 ;
|
||||
|
||||
wire write_ready = mem_write_enable & app_wdf_rdy & app_rdy;
|
||||
|
|
@ -152,7 +165,7 @@ module dram_bridge #(ADDR_WIDTH = 32, WIDTH = 32)
|
|||
assign app_wdf_data = { {4{mem_write_data}} };
|
||||
|
||||
assign mem_wait = (dram_read_enable & ~read_inprogress) |
|
||||
(mem_write_enable & (~app_wdf_rdy | ~app_rdy)) |
|
||||
(mem_write_enable & ~dcache_hit & (~app_wdf_rdy | ~app_rdy)) |
|
||||
(read_inprogress & ~app_rd_data_valid);
|
||||
|
||||
assign app_en = (dram_read_enable & ~read_inprogress) |
|
||||
|
|
@ -160,25 +173,58 @@ module dram_bridge #(ADDR_WIDTH = 32, WIDTH = 32)
|
|||
assign app_cmd = dram_read_enable ? CMD_READ : CMD_WRITE;
|
||||
|
||||
|
||||
/* set instruction cache */
|
||||
always @(posedge dram_front_clk)
|
||||
begin
|
||||
if(dram_read_enable && mem_read_ins && app_rd_data_valid)
|
||||
begin
|
||||
ins_cache <= app_rd_data;
|
||||
cached_addr <= mem_addr[DRAM_ADDR_WIDTH-1:4];
|
||||
icached_addr <= mem_addr[DRAM_ADDR_WIDTH-1:4];
|
||||
end
|
||||
end
|
||||
|
||||
/* set data cache */
|
||||
always @(posedge dram_front_clk)
|
||||
begin
|
||||
if(dram_read_enable && !mem_read_ins && app_rd_data_valid)
|
||||
begin
|
||||
d_cache <= app_rd_data;
|
||||
dcached_addr <= mem_addr[DRAM_ADDR_WIDTH-1:4];
|
||||
end
|
||||
|
||||
/* write-through cache - invalidate on write */
|
||||
/* invalidate data cache on write */
|
||||
// if(mem_write_enable && dcached_addr == mem_addr[DRAM_ADDR_WIDTH-1:4])
|
||||
// dcached_addr <= {DRAM_ADDR_WIDTH-4{1'b1}};
|
||||
|
||||
/* write-back cache - update cache on write */
|
||||
// write back to data cache on mem_write
|
||||
if(mem_write_enable && dcached_addr == mem_addr[DRAM_ADDR_WIDTH-1:4])
|
||||
begin
|
||||
case(word_sel)
|
||||
2'b11: d_cache[31:0] <= mem_write_data;
|
||||
2'b10: d_cache[63:32] <= mem_write_data;
|
||||
2'b01: d_cache[95:64] <= mem_write_data;
|
||||
2'b00: d_cache[127:96] <= mem_write_data;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
/* transfer read data, either from cache or from DRAM */
|
||||
always @(posedge dram_front_clk)
|
||||
begin
|
||||
if(dram_read_enable & ~read_inprogress & app_rdy)
|
||||
read_inprogress <= 1;
|
||||
if(read_inprogress & app_rd_data_valid)
|
||||
read_inprogress <= 0;
|
||||
|
||||
if(dram_read_enable & app_rd_data_valid)
|
||||
read_buf <= mem_read_data;
|
||||
else
|
||||
if (mem_read_enable & cache_hit)
|
||||
read_buf <= read_cached_word;
|
||||
if (mem_read_enable & icache_hit)
|
||||
read_buf <= read_icached_word;
|
||||
else
|
||||
if (mem_read_enable & dcache_hit)
|
||||
read_buf <= read_dcached_word;
|
||||
end
|
||||
endmodule
|
||||
|
|
|
|||
|
|
@ -349,15 +349,22 @@
|
|||
</Simulator>
|
||||
</Simulators>
|
||||
<Runs Version="1" Minor="22">
|
||||
<Run Id="synth_1" Type="Ft3:Synth" SrcSet="sources_1" Part="xc7a35ticsg324-1L" ConstrsSet="constrs_1" Description="Vivado Synthesis Defaults" AutoIncrementalCheckpoint="false" WriteIncrSynthDcp="false" State="current" Dir="$PRUNDIR/synth_1" IncludeInArchive="true" IsChild="false" AutoIncrementalDir="$PSRCDIR/utils_1/imports/synth_1" AutoRQSDir="$PSRCDIR/utils_1/imports/synth_1" ParallelReportGen="true">
|
||||
<Run Id="synth_1" Type="Ft3:Synth" SrcSet="sources_1" Part="xc7a35ticsg324-1L" ConstrsSet="constrs_1" Description="Higher performance designs, resource sharing is turned off, the global fanout guide is set to a lower number, FSM extraction forced to one-hot, LUT combining is disabled, equivalent registers are preserved, SRL are inferred with a larger threshold" AutoIncrementalCheckpoint="false" WriteIncrSynthDcp="false" State="current" Dir="$PRUNDIR/synth_1" IncludeInArchive="true" IsChild="false" AutoIncrementalDir="$PSRCDIR/utils_1/imports/synth_1" AutoRQSDir="$PSRCDIR/utils_1/imports/synth_1" ParallelReportGen="true">
|
||||
<Strategy Version="1" Minor="2">
|
||||
<StratHandle Name="Vivado Synthesis Defaults" Flow="Vivado Synthesis 2020">
|
||||
<Desc>Vivado Synthesis Defaults</Desc>
|
||||
<StratHandle Name="Flow_PerfOptimized_high" Flow="Vivado Synthesis 2024">
|
||||
<Desc>Higher performance designs, resource sharing is turned off, the global fanout guide is set to a lower number, FSM extraction forced to one-hot, LUT combining is disabled, equivalent registers are preserved, SRL are inferred with a larger threshold</Desc>
|
||||
</StratHandle>
|
||||
<Step Id="synth_design"/>
|
||||
<Step Id="synth_design">
|
||||
<Option Id="Directive">7</Option>
|
||||
<Option Id="FsmExtraction">1</Option>
|
||||
<Option Id="KeepEquivalentRegisters">1</Option>
|
||||
<Option Id="NoCombineLuts">1</Option>
|
||||
<Option Id="ResourceSharing">2</Option>
|
||||
<Option Id="ShregMinSize">5</Option>
|
||||
</Step>
|
||||
</Strategy>
|
||||
<GeneratedRun Dir="$PRUNDIR" File="gen_run.xml"/>
|
||||
<ReportStrategy Name="Vivado Synthesis Default Reports" Flow="Vivado Synthesis 2020"/>
|
||||
<ReportStrategy Name="Vivado Synthesis Default Reports" Flow="Vivado Synthesis 2024"/>
|
||||
<Report Name="ROUTE_DESIGN.REPORT_METHODOLOGY" Enabled="1"/>
|
||||
<RQSFiles/>
|
||||
</Run>
|
||||
|
|
@ -371,26 +378,18 @@
|
|||
<Report Name="ROUTE_DESIGN.REPORT_METHODOLOGY" Enabled="1"/>
|
||||
<RQSFiles/>
|
||||
</Run>
|
||||
<Run Id="impl_1" Type="Ft2:EntireDesign" Part="xc7a35ticsg324-1L" ConstrsSet="constrs_1" Description="Best predicted directive for place_design." AutoIncrementalCheckpoint="false" WriteIncrSynthDcp="false" State="current" Dir="$PRUNDIR/impl_1" SynthRun="synth_1" IncludeInArchive="true" IsChild="false" GenFullBitstream="true" AutoIncrementalDir="$PSRCDIR/utils_1/imports/impl_1" LaunchOptions="-jobs 6 " AutoRQSDir="$PSRCDIR/utils_1/imports/impl_1" ParallelReportGen="true">
|
||||
<Run Id="impl_1" Type="Ft2:EntireDesign" Part="xc7a35ticsg324-1L" ConstrsSet="constrs_1" Description="Default settings for Implementation." AutoIncrementalCheckpoint="false" WriteIncrSynthDcp="false" State="current" Dir="$PRUNDIR/impl_1" SynthRun="synth_1" IncludeInArchive="true" IsChild="false" GenFullBitstream="true" AutoIncrementalDir="$PSRCDIR/utils_1/imports/impl_1" LaunchOptions="-jobs 6 " AutoRQSDir="$PSRCDIR/utils_1/imports/impl_1" ParallelReportGen="true">
|
||||
<Strategy Version="1" Minor="2">
|
||||
<StratHandle Name="Performance_Auto_1" Flow="Vivado Implementation 2024">
|
||||
<Desc>Best predicted directive for place_design.</Desc>
|
||||
<StratHandle Name="Vivado Implementation Defaults" Flow="Vivado Implementation 2024">
|
||||
<Desc>Default settings for Implementation.</Desc>
|
||||
</StratHandle>
|
||||
<Step Id="init_design"/>
|
||||
<Step Id="opt_design">
|
||||
<Option Id="Directive">0</Option>
|
||||
</Step>
|
||||
<Step Id="opt_design"/>
|
||||
<Step Id="power_opt_design"/>
|
||||
<Step Id="place_design">
|
||||
<Option Id="Directive">20</Option>
|
||||
</Step>
|
||||
<Step Id="place_design"/>
|
||||
<Step Id="post_place_power_opt_design"/>
|
||||
<Step Id="phys_opt_design">
|
||||
<Option Id="Directive">2</Option>
|
||||
</Step>
|
||||
<Step Id="route_design">
|
||||
<Option Id="Directive">1</Option>
|
||||
</Step>
|
||||
<Step Id="phys_opt_design"/>
|
||||
<Step Id="route_design"/>
|
||||
<Step Id="post_route_phys_opt_design"/>
|
||||
<Step Id="write_bitstream">
|
||||
<Option Id="BinFile">1</Option>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue