diff --git a/.gitignore b/.gitignore index b7537cf..930fb01 100644 --- a/.gitignore +++ b/.gitignore @@ -33,6 +33,7 @@ pcomp/sdis tridoraemu/tridoraemu **/tridoracpu.cache/ **/tridoracpu.hw/ +**/tridoracpu.gen/ **/tridoracpu.ip_user_files/ **/tridoracpu.runs/ *.log diff --git a/examples/benchmarks.results.text b/examples/benchmarks.results.text index 23da7c0..5ae8a13 100644 --- a/examples/benchmarks.results.text +++ b/examples/benchmarks.results.text @@ -91,4 +91,3 @@ Running benchmarks.prog array copy 128k 1K 00:00:48 exp() 10K 00:00:32 cos() 10K 00:00:06 - diff --git a/lib/corelib.s b/lib/corelib.s index 57f35a8..6970971 100644 --- a/lib/corelib.s +++ b/lib/corelib.s @@ -588,13 +588,19 @@ DIVU_END: ; wait approx. 1 millisecond ; -; 83.333 MHz Clock, three instructions a 4 cycles -; 83333 / 12 = 6944.4166 -; works only if executed without wait states (i.e. -; from BRAM/SRAM) +; the ROM at address 4 +; contains the cpu clock freq in KHz + .EQU CLK_KHZ_ADDR 4 WAIT1MSEC: - LOADCP 6944 + LOADC CLK_KHZ_ADDR + LOADI + ; divide by 16 + SHR + SHR + SHR + SHR WAIT1LOOP: + INC 0 ; NOP to make the loop 16 cycles long DEC 1 DUP CBRANCH.NZ WAIT1LOOP diff --git a/lib/rommon.s b/lib/rommon.s index a967e4b..a1034c5 100644 --- a/lib/rommon.s +++ b/lib/rommon.s @@ -7,8 +7,16 @@ .EQU UART_REG 2048 .EQU MON_ADDR 64512 + .EQU CLK_KHZ 76923 + BRANCH 2 ; the very first instruction is not ; executed correctly + BRANCH MON_START ; branch over constant + +CLK_KHZ_ADDR: + .WORD CLK_KHZ ; to calibrate the delay loop + +MON_START: LOADCP 65020 ; initialise FP and RP registers STOREREG FP LOADCP 65024 @@ -782,13 +790,17 @@ COPY_BLK1: ; wait approx. 1 millisecond ; -; 83.333 MHz Clock, three instructions a 4 cycles -; 83333 / 12 = 6944.4166 -; works only if executed without wait states (i.e. -; from BRAM/SRAM) WAIT1MSEC: - LOADCP 6944 + ; get clock freq in khz + LOADC CLK_KHZ_ADDR + LOADI + ; divide by 16 + SHR + SHR + SHR + SHR WAIT1LOOP: + INC 0 ; NOP to make loop 16 cycles long DEC 1 DUP CBRANCH.NZ WAIT1LOOP @@ -798,7 +810,7 @@ WAIT1LOOP: %include "sdcardboot.s" .CPOOL MESSAGE: - .BYTE 13,10,"ROM Monitor v3.0.3", 13, 10, + .BYTE 13,10,"ROM Monitor v3.1.0", 13, 10, "Set A)ddress D)eposit eX)amine L)oad G)o B)oot",13,10,0 PROMPT2: .BYTE "]> ",0 diff --git a/tridoracpu/tridoracpu.srcs/Arty-A7-35-Master.xdc b/tridoracpu/tridoracpu.srcs/Arty-A7-35-Master.xdc index c618478..7c62767 100644 --- a/tridoracpu/tridoracpu.srcs/Arty-A7-35-Master.xdc +++ b/tridoracpu/tridoracpu.srcs/Arty-A7-35-Master.xdc @@ -216,3 +216,5 @@ set_property -dict {PACKAGE_PIN C2 IOSTANDARD LVCMOS33} [get_ports rst] #set_property -dict { PACKAGE_PIN A15 IOSTANDARD LVCMOS33 } [get_ports { isns0v95_p }]; #IO_L8P_T1_AD10P_15 Sch=ad_p[10] set_property BITSTREAM.GENERAL.COMPRESS True [current_design] + +set_max_delay -from [get_pins vgafb0/display_timings_inst/o_vblank_reg/C] -to [get_pins vgafb0/vblank_xfer_reg/D] 3.000 diff --git a/tridoracpu/tridoracpu.srcs/cpuclk.v b/tridoracpu/tridoracpu.srcs/cpuclk.v index 613d85e..a214cc4 100644 --- a/tridoracpu/tridoracpu.srcs/cpuclk.v +++ b/tridoracpu/tridoracpu.srcs/cpuclk.v @@ -17,7 +17,9 @@ module cpu_clkgen( .CLKFBOUT_PHASE(0.0), // Phase offset in degrees of CLKFB (-360.000-360.000). .CLKIN1_PERIOD(10.0), // Input clock period in ns to ps resolution (i.e. 33.333 is 30 MHz). // CLKOUT0_DIVIDE - CLKOUT6_DIVIDE: Divide amount for each CLKOUT (1-128) - .CLKOUT0_DIVIDE_F(12.0), // Divide amount for CLKOUT0 (1.000-128.000). + // CPU Clock: 12.0 = 83.33MHz CPU Clock, 333.33MHz Memory Clock + // 13.0 = 76.92MHz CPU Clock, 307.69MHz Memory Clock + .CLKOUT0_DIVIDE_F(13.0), // Divide amount for CLKOUT0 (1.000-128.000). .CLKOUT1_DIVIDE(5), .CLKOUT2_DIVIDE(40), // 40 = 25MHz pixel clock (should be 25.175MHz per spec) for 640x480 //.CLKOUT2_DIVIDE(25), // 25 = 40MHz pixel clock for 800x600 diff --git a/tridoracpu/tridoracpu.srcs/dram_bridge.v b/tridoracpu/tridoracpu.srcs/dram_bridge.v index 102a8cf..c3f948a 100644 --- a/tridoracpu/tridoracpu.srcs/dram_bridge.v +++ b/tridoracpu/tridoracpu.srcs/dram_bridge.v @@ -8,6 +8,7 @@ module dram_bridge #(ADDR_WIDTH = 32, WIDTH = 32) input wire [WIDTH-1:0] mem_write_data, input wire mem_read_enable, input wire mem_write_enable, + input wire mem_read_ins, output wire mem_wait, input wire rst_n, @@ -105,33 +106,38 @@ module dram_bridge #(ADDR_WIDTH = 32, WIDTH = 32) .sys_rst (rst_n) ); -// reg [DRAM_DATA_WIDTH-1:0] read_cache; -// reg [ADDR_WIDTH-1:0] cached_addr; -// wire cache_hit = cached_addr == mem_addr; -// wire [DRAM_DATA_WIDTH-1:0] read_data_wrapper = cache_hit ? read_cache : app_rd_data; + (*KEEP*) reg [DRAM_DATA_WIDTH-1:0] ins_cache; + (*KEEP*) reg [DRAM_ADDR_WIDTH-1:4] cached_addr; + (*KEEP*) wire cache_hit = mem_read_enable && mem_read_ins && (cached_addr == mem_addr[DRAM_ADDR_WIDTH-1:4]); reg [WIDTH-1:0] read_buf; reg read_inprogress = 0; + wire dram_read_enable = mem_read_enable && !cache_hit; assign app_rd_data_end = 1'b1; - //assign app_wdf_mask = 16'b1111111111111100; // addresses on the memory interface are aligned to 16 bytes // and 28 bits wide (=256MB) assign app_addr = { mem_addr[DRAM_ADDR_WIDTH:4], 4'b0000 }; - //assign app_addr = { 28'b0 }; // select a word from the 128 bits transferred by the dram controller // according to the lower bits of the address (ignoring bits 1:0) - wire [WIDTH-1:0] read_word; wire [1:0] word_sel = mem_addr[3:2]; - assign read_word = word_sel == 3'b11 ? app_rd_data[31:0] : + wire [WIDTH-1:0] read_word = + word_sel == 3'b11 ? app_rd_data[31:0] : word_sel == 3'b10 ? app_rd_data[63:32] : word_sel == 3'b01 ? app_rd_data[95:64] : app_rd_data[127:96]; - assign mem_read_data = app_rd_data_valid ? read_word : read_buf; + wire [WIDTH-1:0] read_cached_word = + word_sel == 3'b11 ? ins_cache[31:0] : + word_sel == 3'b10 ? ins_cache[63:32] : + word_sel == 3'b01 ? ins_cache[95:64] : + ins_cache[127:96]; + + (*KEEP*) assign mem_read_data = cache_hit ? read_cached_word : + app_rd_data_valid ? read_word : read_buf; // set the write mask according to the lower bits of the address // (ignoring bit 0) @@ -145,21 +151,34 @@ module dram_bridge #(ADDR_WIDTH = 32, WIDTH = 32) assign app_wdf_end = mem_write_enable & write_ready; assign app_wdf_data = { {4{mem_write_data}} }; - assign mem_wait = (mem_read_enable & ~read_inprogress) | + assign mem_wait = (dram_read_enable & ~read_inprogress) | (mem_write_enable & (~app_wdf_rdy | ~app_rdy)) | (read_inprogress & ~app_rd_data_valid); - assign app_en = (mem_read_enable & ~read_inprogress) | + assign app_en = (dram_read_enable & ~read_inprogress) | (mem_write_enable & write_ready); - assign app_cmd = mem_read_enable ? CMD_READ : CMD_WRITE; + assign app_cmd = dram_read_enable ? CMD_READ : CMD_WRITE; + always @(posedge dram_front_clk) begin - if(mem_read_enable & ~read_inprogress & app_rdy) + if(dram_read_enable && mem_read_ins && app_rd_data_valid) + begin + ins_cache <= app_rd_data; + cached_addr <= mem_addr[DRAM_ADDR_WIDTH-1:4]; + end + end + + always @(posedge dram_front_clk) + begin + if(dram_read_enable & ~read_inprogress & app_rdy) read_inprogress <= 1; if(read_inprogress & app_rd_data_valid) read_inprogress <= 0; - if(mem_read_enable & app_rd_data_valid) + if(dram_read_enable & app_rd_data_valid) read_buf <= mem_read_data; + else + if (mem_read_enable & cache_hit) + read_buf <= read_cached_word; end endmodule diff --git a/tridoracpu/tridoracpu.srcs/mig_dram_0/mig_a.prj b/tridoracpu/tridoracpu.srcs/mig_dram_0/mig_a.prj index b263a25..58ff963 100644 --- a/tridoracpu/tridoracpu.srcs/mig_dram_0/mig_a.prj +++ b/tridoracpu/tridoracpu.srcs/mig_dram_0/mig_a.prj @@ -39,12 +39,12 @@ DDR3_SDRAM/Components/MT41K128M16XX-15E - 3000 + 3300 1.8V 4:1 - 83.333 + 75.757 0 - 666 + 606 1.000 1 1 diff --git a/tridoracpu/tridoracpu.srcs/mig_dram_0/mig_b.prj b/tridoracpu/tridoracpu.srcs/mig_dram_0/mig_b.prj index b263a25..99d2e0c 100644 --- a/tridoracpu/tridoracpu.srcs/mig_dram_0/mig_b.prj +++ b/tridoracpu/tridoracpu.srcs/mig_dram_0/mig_b.prj @@ -39,12 +39,12 @@ DDR3_SDRAM/Components/MT41K128M16XX-15E - 3000 + 3250 1.8V 4:1 - 83.333 + 76.923 0 - 666 + 615 1.000 1 1 diff --git a/tridoracpu/tridoracpu.srcs/sdspi.v b/tridoracpu/tridoracpu.srcs/sdspi.v index 7141288..9f5c84d 100644 --- a/tridoracpu/tridoracpu.srcs/sdspi.v +++ b/tridoracpu/tridoracpu.srcs/sdspi.v @@ -107,7 +107,7 @@ module sdspi( tx_fifo_empty ); - fifo #(.ADDR_WIDTH(8)) rx_fifo(clk, reset, + fifo #(.ADDR_WIDTH(10)) rx_fifo(clk, reset, rx_fifo_wr_en, rx_fifo_rd_en, rx_shifter, rx_fifo_out, rx_fifo_full, diff --git a/tridoracpu/tridoracpu.srcs/stackcpu.v b/tridoracpu/tridoracpu.srcs/stackcpu.v index c65ae4e..33b58ec 100644 --- a/tridoracpu/tridoracpu.srcs/stackcpu.v +++ b/tridoracpu/tridoracpu.srcs/stackcpu.v @@ -11,20 +11,14 @@ module stackcpu #(parameter ADDR_WIDTH = 32, WIDTH = 32, output reg [ADDR_WIDTH-1:0] addr, input wire [WIDTH-1:0] data_in, output wire read_enable, + output wire read_ins, output wire [WIDTH-1:0] data_out, output wire write_enable, input wire mem_wait, output wire led1, output wire led2, - output wire led3, - - output wire [WIDTH-1:0] debug_out1, - output wire [WIDTH-1:0] debug_out2, - output wire [WIDTH-1:0] debug_out3, - output wire [WIDTH-1:0] debug_out4, - output wire [WIDTH-1:0] debug_out5, - output wire [WIDTH-1:0] debug_out6 + output wire led3 ); localparam EVAL_STACK_INDEX_WIDTH = 6; @@ -182,6 +176,8 @@ module stackcpu #(parameter ADDR_WIDTH = 32, WIDTH = 32, assign mem_read_enable = (seq_state == FETCH) || (seq_state == EXEC && mem_read); assign mem_write_enable = (seq_state == MEM && mem_write); + assign read_ins = (seq_state == FETCH) || (seq_state == DECODE); + initial begin PC <= 0; nPC <= 0; seq_state <= MEM; diff --git a/tridoracpu/tridoracpu.srcs/top.v b/tridoracpu/tridoracpu.srcs/top.v index e79d611..00066fe 100644 --- a/tridoracpu/tridoracpu.srcs/top.v +++ b/tridoracpu/tridoracpu.srcs/top.v @@ -3,7 +3,8 @@ // or as clk_1hz for debugging `define clock cpuclk -`define clkfreq 83333333 +//`define clkfreq 83333333 +`define clkfreq 76923076 //`define clock clk //`define clkfreq 100000000 //`define clock clk_1hz @@ -67,10 +68,11 @@ module top( localparam ADDR_WIDTH = 32, WIDTH = 32, ROMADDR_WIDTH = 11, IOADDR_WIDTH = 11, IOADDR_SEL = 4; - wire [ADDR_WIDTH-1:0] mem_addr; + (* KEEP *) wire [ADDR_WIDTH-1:0] mem_addr; wire [WIDTH-1:0] mem_read_data; wire [WIDTH-1:0] mem_write_data; (* KEEP *) wire mem_wait; + assign led0 = mem_wait; (* KEEP *) wire mem_read_enable; (* KEEP *) wire mem_write_enable; @@ -80,14 +82,6 @@ module top( wire irq; - // assign led0 = mem_wait; - - wire [WIDTH-1:0] debug_data1, debug_data2, - debug_data3, debug_data4, - debug_data5, debug_data6; - - assign led0 = debug_data6[0]; - wire cpuclk, cpuclk_locked; wire dram_refclk200; wire pixclk; @@ -97,9 +91,11 @@ module top( wire [ADDR_WIDTH-1:0] dram_addr; wire [WIDTH-1:0] dram_read_data, dram_write_data; wire dram_read_enable, dram_write_enable, dram_wait; + (* KEEP *) wire dram_read_ins; dram_bridge dram_bridge0 (dram_addr, - dram_read_data, dram_write_data, dram_read_enable, dram_write_enable, dram_wait, + dram_read_data, dram_write_data, dram_read_enable, dram_write_enable, + dram_read_ins, dram_wait, rst, cpuclk, dram_refclk200, ddr3_dq, ddr3_dqs_n, ddr3_dqs_p, ddr3_addr, ddr3_ba, ddr3_ras_n, ddr3_cas_n, ddr3_we_n, @@ -254,15 +250,10 @@ module top( stackcpu cpu0(.clk(`clock), .rst(rst), .irq(irq), .addr(mem_addr), .data_in(mem_read_data), .read_enable(mem_read_enable), + .read_ins(dram_read_ins), .data_out(mem_write_data), .write_enable(mem_write_enable), .mem_wait(mem_wait), - .led1(led1), .led2(led2), .led3(led3), - .debug_out1(debug_data1), - .debug_out2(debug_data2), - .debug_out3(debug_data3), - .debug_out4(debug_data4), - .debug_out5(debug_data5), - .debug_out6(debug_data6)); + .led1(led1), .led2(led2), .led3(led3)); // Interrupt Controller irqctrl irqctrl0(`clock, irq_in, irqc_cs, mem_write_enable, diff --git a/tridoracpu/tridoracpu.srcs/vgafb.v b/tridoracpu/tridoracpu.srcs/vgafb.v index 4e8d668..37c1376 100644 --- a/tridoracpu/tridoracpu.srcs/vgafb.v +++ b/tridoracpu/tridoracpu.srcs/vgafb.v @@ -62,10 +62,11 @@ module display_timings #( // o_scanline: high for one tick at the start of each visible scanline assign o_scanline = (o_sy >= VA_STA) && (o_sy <= VA_END) && (o_sx == H_STA); + // set vblank at end of frame, clear at start always @(posedge i_pix_clk) begin - if(o_frame) o_vblank <= 1; - else if (o_de) o_vblank <= 0; + if(o_sy == VA_END) o_vblank <= 1; + else if (o_sy == -1) o_vblank <= 0; end always @ (posedge i_pix_clk) @@ -175,6 +176,7 @@ module vgafb #(VMEM_ADDR_WIDTH = 15, VMEM_DATA_WIDTH = 32) ( wire scanline; // scanline start wire vblank; // vertical blank reg vblank_buf; // vertical blank in cpu clock domain + reg vblank_xfer; // vertical blank clock domain crossing display_timings #( // 640x480 800x600 1280x720 1920x1080 `ifdef RES_1024_768 @@ -233,7 +235,7 @@ module vgafb #(VMEM_ADDR_WIDTH = 15, VMEM_DATA_WIDTH = 32) ( always @(posedge pix_clk) frame_d <= frame; - always @(posedge cpu_clk) vblank_buf <= vblank; + always @(posedge cpu_clk) { vblank_buf, vblank_xfer } <= { vblank_xfer, vblank }; always @(posedge cpu_clk) begin diff --git a/tridoracpu/tridoracpu.xpr b/tridoracpu/tridoracpu.xpr index a4c6545..05c7b30 100644 --- a/tridoracpu/tridoracpu.xpr +++ b/tridoracpu/tridoracpu.xpr @@ -1,9 +1,10 @@ - - - + + + + - + - - + + @@ -188,7 +210,7 @@ - + @@ -201,7 +223,7 @@ - + @@ -224,28 +246,19 @@ - + - - - - - - - - - - - + @@ -272,9 +285,12 @@ - + @@ -295,9 +311,24 @@ + + + + + + + + + + @@ -317,8 +348,8 @@ - - + + @@ -328,17 +359,17 @@ - + - + - + - + @@ -364,9 +395,9 @@ - + - + @@ -377,14 +408,12 @@ - + - - - +