diff --git a/examples/graphbench.pas b/examples/graphbench.pas new file mode 100644 index 0000000..327e72e --- /dev/null +++ b/examples/graphbench.pas @@ -0,0 +1,92 @@ +program graphbench; +var starttime,endtime:DateTime; + +procedure startBench(name:string); +begin + write(name:20, ' '); + starttime := GetTime; +end; + +procedure endBench; +var secDelta, minDelta, hourDelta:integer; + procedure write2Digits(i:integer); + begin + if i < 10 then + write('0'); + write(i); + end; +begin + endTime := GetTime; + + hourDelta := endtime.hours - starttime.hours; + minDelta := endtime.minutes - starttime.minutes; + secDelta := endtime.seconds - starttime.seconds; + + if secDelta < 0 then + begin + secDelta := 60 + secDelta; + minDelta := minDelta - 1; + end; + + if minDelta < 0 then + begin + minDelta := 60 + minDelta; + hourDelta := hourDelta - 1; + end; + + write2Digits(hourDelta); + write(':'); write2Digits(minDelta); + write(':'); write2Digits(secDelta); + writeln; +end; + +function randint(lessthan:integer):integer; +var r:integer; +begin + r := random and 511; + if r >= lessthan then + r := r - lessthan; + randint := r; +end; + +procedure drawlines(count:integer); +var i,col,x1,y1,x2,y2:integer; +begin + col := 1; + for i := 1 to count do + begin + x1 := randint(500); + y1 := randint(400); + x2 := randint(500); + y2 := randint(400); + DrawLine(x1,y1,x2,y2,col); + col := col + 1; + if col > 15 then col := 1; + end; +end; + +procedure drawpoints(count:integer); +var i,col,x,y:integer; +begin + col := 1; + for i := 1 to count do + begin + x := randint(500); + y := randint(400); + PutPixel(x,y,col); + col := col + 1; + if col > 15 then col := 1; + end; +end; + +begin + InitGraphics; + startBench('200K points'); + drawpoints(200000); + endBench; + + InitGraphics; + startBench('10K lines'); + drawlines(10000); + endBench; +end. diff --git a/lib/corelib.s b/lib/corelib.s index a21b95c..93dc81f 100644 --- a/lib/corelib.s +++ b/lib/corelib.s @@ -706,108 +706,32 @@ CMPWORDS_XT2: .EQU FB_PS $90C .EQU FB_PD $910 .EQU FB_CTL $914 -; set a pixel in fb memory -; parameters: x,y - coordinates -PUTPIXEL_1BPP: - ; calculate vmem address: - OVER ; duplicate x - ; divide x by 32 - SHR - SHR - SHR - SHR - SHR - SWAP - ; multiply y by words per line - SHL 2 - SHL 2 - SHL + .EQU FB_SHIFTER $918 + .EQU FB_SHIFTCOUNT $91C + .EQU FB_SHIFTERM $920 + .EQU FB_SHIFTERSP $924 + .EQU FB_MASKGEN $928 - ADD ; add results together for vmem addr +; draw a single pixel +; args: x, y, color - DUP - LOADCP FB_WA - SWAP - STOREI ; store to framebuffer write addr register - DROP - LOADCP FB_RA ; and to framebuffer read addr register - SWAP - STOREI - DROP - - ; x is now at top of stack - ; get bit value from x modulo 32 - LOADC 31 - AND - SHL 2 ; (x & 31) * 4 = offset into table - LOADCP INT_TO_PIX_TABLE - ADD - LOADI - - LOADCP FB_IO - ; read old vmem value - LOADCP FB_IO - LOADI - ; or in new bit - OR - ; write new value - STOREI - DROP - - RET - -INT_TO_PIX_TABLE: - .WORD %10000000_00000000_00000000_00000000 - .WORD %01000000_00000000_00000000_00000000 - .WORD %00100000_00000000_00000000_00000000 - .WORD %00010000_00000000_00000000_00000000 - .WORD %00001000_00000000_00000000_00000000 - .WORD %00000100_00000000_00000000_00000000 - .WORD %00000010_00000000_00000000_00000000 - .WORD %00000001_00000000_00000000_00000000 - .WORD %00000000_10000000_00000000_00000000 - .WORD %00000000_01000000_00000000_00000000 - .WORD %00000000_00100000_00000000_00000000 - .WORD %00000000_00010000_00000000_00000000 - .WORD %00000000_00001000_00000000_00000000 - .WORD %00000000_00000100_00000000_00000000 - .WORD %00000000_00000010_00000000_00000000 - .WORD %00000000_00000001_00000000_00000000 - .WORD %00000000_00000000_10000000_00000000 - .WORD %00000000_00000000_01000000_00000000 - .WORD %00000000_00000000_00100000_00000000 - .WORD %00000000_00000000_00010000_00000000 - .WORD %00000000_00000000_00001000_00000000 - .WORD %00000000_00000000_00000100_00000000 - .WORD %00000000_00000000_00000010_00000000 - .WORD %00000000_00000000_00000001_00000000 - .WORD %00000000_00000000_00000000_10000000 - .WORD %00000000_00000000_00000000_01000000 - .WORD %00000000_00000000_00000000_00100000 - .WORD %00000000_00000000_00000000_00010000 - .WORD %00000000_00000000_00000000_00001000 - .WORD %00000000_00000000_00000000_00000100 - .WORD %00000000_00000000_00000000_00000010 - .WORD %00000000_00000000_00000000_00000001 - -PUTMPIXEL: - LOADC 1 -; set a pixel in fb memory -; parameters: x,y,color - coordinates, color value (0-15) PUTPIXEL: PUTPIXEL_4BPP: .EQU PUTPIXEL_X 0 .EQU PUTPIXEL_Y 4 .EQU PUTPIXEL_COLOR 8 - .EQU PUTPIXEL_PIXPOS 12 + .EQU PUTPIXEL_BPSAV 12 .EQU PUTPIXEL_FS 16 FPADJ -PUTPIXEL_FS - STORE PUTPIXEL_COLOR STORE PUTPIXEL_Y STORE PUTPIXEL_X + LOADREG BP + STORE PUTPIXEL_BPSAV + LOADC 0 + STOREREG BP ; calculate vmem address: (x / 8) + (y * 80) LOAD PUTPIXEL_X @@ -826,83 +750,36 @@ PUTPIXEL_4BPP: ADD ; add results together for vmem addr - LOADCP FB_WA - OVER - STOREI ; store to framebuffer write addr register - DROP - LOADCP FB_RA ; and to framebuffer read addr register - SWAP ; swap addr and value for STOREI - STOREI - DROP - - LOAD PUTPIXEL_X - ; |0000.0000|0000.0000|0000.0000|0000.1111| - LOADC 7 - AND ; calculate pixel position in word - LOADC 7 - SWAP - SUB ; pixpos = 7 - (x & 7) - STORE PUTPIXEL_PIXPOS + DUP + STORE.B FB_WA ; set as write and read addresses + STORE.B FB_RA + ; create pixel data from color value in + ; leftmost pixel data bits (31-28) + LOADC 0 LOAD PUTPIXEL_COLOR - LOAD PUTPIXEL_PIXPOS - SHR ; rcount = pixpos / 2 -ROTLOOP_: - DUP ; exit loop if rcount is 0 - CBRANCH.Z ROTLOOP_END - SWAP ; pixel value is now on top of stack - BROT ; value = value << 8 - SWAP ; rcount is now on top of stack - DEC 1 ; rcount = rcount - 1 - BRANCH ROTLOOP_ -ROTLOOP_END: - DROP ; drop rcount - ; shifted pixel value is now at top of stack - LOAD PUTPIXEL_PIXPOS - LOADC 1 - AND - CBRANCH.Z EVEN_PIXPOS - SHL 2 ; if pixpos is odd, shift by 4 bits + BPLC SHL 2 -EVEN_PIXPOS: - LOAD PUTPIXEL_X - ; get bit value from x modulo 8 - LOADC 7 - AND - SHL 2 ; (x & 7) * 4 = offset into table - LOADCP INT_TO_MASK_TABLE - ADD - LOADI + SHL 2 + STORE.B FB_SHIFTER ; store pixel into shifter - ; read old vmem value - LOADCP FB_IO - LOADI - ; mask bits - AND - ; or in shifted pixel value - OR + LOAD PUTPIXEL_X ; use x coord as shift count + STORE.B FB_SHIFTCOUNT ; writing triggers shifting - ; write new value - LOADCP FB_IO - SWAP - STOREI - DROP + LOAD.B FB_SHIFTERM ; get shift result as mask + LOAD.B FB_IO ; get background pixel data + AND ; remove bits for new pixel from bg + + LOAD.B FB_SHIFTER ; load shifted pixel + OR ; OR in new pixel bits + STORE.B FB_IO ; write new pixel data word to vmem + + LOAD PUTPIXEL_BPSAV + STOREREG BP FPADJ PUTPIXEL_FS RET - .CPOOL - -INT_TO_MASK_TABLE: - .WORD %00001111_11111111_11111111_11111111 - .WORD %11110000_11111111_11111111_11111111 - .WORD %11111111_00001111_11111111_11111111 - .WORD %11111111_11110000_11111111_11111111 - .WORD %11111111_11111111_00001111_11111111 - .WORD %11111111_11111111_11110000_11111111 - .WORD %11111111_11111111_11111111_00001111 - .WORD %11111111_11111111_11111111_11110000 - ; draw a line between two points ; parameters: x0, y0, x1, y1, color .EQU DL_X0 0 diff --git a/tridoracpu/tridoracpu.srcs/vgafb.v b/tridoracpu/tridoracpu.srcs/vgafb.v index 411e956..fd42627 100644 --- a/tridoracpu/tridoracpu.srcs/vgafb.v +++ b/tridoracpu/tridoracpu.srcs/vgafb.v @@ -162,10 +162,12 @@ module vgafb #(VMEM_ADDR_WIDTH = 15, VMEM_DATA_WIDTH = 32) ( reg [4:0] acc_shift_count; reg acc_start_shift; reg [VMEM_DATA_WIDTH-1:0] acc_mask_in; - wire [VMEM_DATA_WIDTH-1:0] acc_mask_out; - wire [VMEM_DATA_WIDTH-1:0] acc_shifter_mask; + reg [VMEM_DATA_WIDTH-1:0] acc_mask_buf; + reg [VMEM_DATA_WIDTH-1:0] acc_shiftmask_buf; + wire [VMEM_DATA_WIDTH-1:0] acc_shifter_mask = acc_shiftmask_buf; wire [VMEM_DATA_WIDTH-1:0] acc_shifter_out_h = acc_shifter_out[(VMEM_DATA_WIDTH*2)-1:VMEM_DATA_WIDTH]; wire [VMEM_DATA_WIDTH-1:0] acc_shifter_out_l = acc_shifter_out[VMEM_DATA_WIDTH-1:0]; + `endif assign vmem_rd_en = rd_en; @@ -176,9 +178,9 @@ module vgafb #(VMEM_ADDR_WIDTH = 15, VMEM_DATA_WIDTH = 32) ( (reg_sel == REG_CTL) ? status : `ifdef ENABLE_FB_ACCEL (reg_sel == REG_SHIFTER) ? acc_shifter_out_h: - (reg_sel == REG_SHIFTERM) ? acc_shifter_mask : + (reg_sel == REG_SHIFTERM) ? acc_shiftmask_buf : (reg_sel == REG_SHIFTERSP) ? acc_shifter_out_l : - (reg_sel == REG_MASKGEN) ? acc_mask_out : + (reg_sel == REG_MASKGEN) ? acc_mask_buf : `endif 32'hFFFFFFFF; @@ -335,27 +337,34 @@ module vgafb #(VMEM_ADDR_WIDTH = 15, VMEM_DATA_WIDTH = 32) ( acc_mask_in <= wr_data; end - assign acc_mask_out = { - {4{|{acc_mask_in[31:28]}}}, - {4{|{acc_mask_in[27:24]}}}, - {4{|{acc_mask_in[23:20]}}}, - {4{|{acc_mask_in[19:16]}}}, - {4{|{acc_mask_in[15:12]}}}, - {4{|{acc_mask_in[11:8]}}}, - {4{|{acc_mask_in[7:4]}}}, - {4{|{acc_mask_in[3:0]}}} - }; + // mask output is buffered to avoid timing problems + always @(posedge cpu_clk) + begin + acc_mask_buf <= { + {4{~|{acc_mask_in[31:28]}}}, + {4{~|{acc_mask_in[27:24]}}}, + {4{~|{acc_mask_in[23:20]}}}, + {4{~|{acc_mask_in[19:16]}}}, + {4{~|{acc_mask_in[15:12]}}}, + {4{~|{acc_mask_in[11:8]}}}, + {4{~|{acc_mask_in[7:4]}}}, + {4{~|{acc_mask_in[3:0]}}} + }; + end - assign acc_shifter_mask = { - {4{|{acc_shifter_out_h[31:28]}}}, - {4{|{acc_shifter_out_h[27:24]}}}, - {4{|{acc_shifter_out_h[23:20]}}}, - {4{|{acc_shifter_out_h[19:16]}}}, - {4{|{acc_shifter_out_h[15:12]}}}, - {4{|{acc_shifter_out_h[11:8]}}}, - {4{|{acc_shifter_out_h[7:4]}}}, - {4{|{acc_shifter_out_h[3:0]}}} - }; + always @(posedge cpu_clk) + begin + acc_shiftmask_buf = { + {4{~|{acc_shifter_out_h[31:28]}}}, + {4{~|{acc_shifter_out_h[27:24]}}}, + {4{~|{acc_shifter_out_h[23:20]}}}, + {4{~|{acc_shifter_out_h[19:16]}}}, + {4{~|{acc_shifter_out_h[15:12]}}}, + {4{~|{acc_shifter_out_h[11:8]}}}, + {4{~|{acc_shifter_out_h[7:4]}}}, + {4{~|{acc_shifter_out_h[3:0]}}} + }; + end `endif // diff --git a/tridoracpu/tridoracpu.xpr b/tridoracpu/tridoracpu.xpr index 4d21f83..a088319 100644 --- a/tridoracpu/tridoracpu.xpr +++ b/tridoracpu/tridoracpu.xpr @@ -358,9 +358,7 @@ - - Performs optimizations which creates alternative logic technology mapping, including disabling LUT combining, forcing F7/F8/F9 to logic, increasing the threshold of shift register inference. - + @@ -384,9 +382,7 @@ - - Best predicted directive for place_design. - +