diff --git a/examples/graphbench.pas b/examples/graphbench.pas deleted file mode 100644 index 327e72e..0000000 --- a/examples/graphbench.pas +++ /dev/null @@ -1,92 +0,0 @@ -program graphbench; -var starttime,endtime:DateTime; - -procedure startBench(name:string); -begin - write(name:20, ' '); - starttime := GetTime; -end; - -procedure endBench; -var secDelta, minDelta, hourDelta:integer; - procedure write2Digits(i:integer); - begin - if i < 10 then - write('0'); - write(i); - end; -begin - endTime := GetTime; - - hourDelta := endtime.hours - starttime.hours; - minDelta := endtime.minutes - starttime.minutes; - secDelta := endtime.seconds - starttime.seconds; - - if secDelta < 0 then - begin - secDelta := 60 + secDelta; - minDelta := minDelta - 1; - end; - - if minDelta < 0 then - begin - minDelta := 60 + minDelta; - hourDelta := hourDelta - 1; - end; - - write2Digits(hourDelta); - write(':'); write2Digits(minDelta); - write(':'); write2Digits(secDelta); - writeln; -end; - -function randint(lessthan:integer):integer; -var r:integer; -begin - r := random and 511; - if r >= lessthan then - r := r - lessthan; - randint := r; -end; - -procedure drawlines(count:integer); -var i,col,x1,y1,x2,y2:integer; -begin - col := 1; - for i := 1 to count do - begin - x1 := randint(500); - y1 := randint(400); - x2 := randint(500); - y2 := randint(400); - DrawLine(x1,y1,x2,y2,col); - col := col + 1; - if col > 15 then col := 1; - end; -end; - -procedure drawpoints(count:integer); -var i,col,x,y:integer; -begin - col := 1; - for i := 1 to count do - begin - x := randint(500); - y := randint(400); - PutPixel(x,y,col); - col := col + 1; - if col > 15 then col := 1; - end; -end; - -begin - InitGraphics; - startBench('200K points'); - drawpoints(200000); - endBench; - - InitGraphics; - startBench('10K lines'); - drawlines(10000); - endBench; -end. diff --git a/lib/corelib.s b/lib/corelib.s index 93dc81f..a21b95c 100644 --- a/lib/corelib.s +++ b/lib/corelib.s @@ -706,32 +706,108 @@ CMPWORDS_XT2: .EQU FB_PS $90C .EQU FB_PD $910 .EQU FB_CTL $914 - .EQU FB_SHIFTER $918 - .EQU FB_SHIFTCOUNT $91C - .EQU FB_SHIFTERM $920 - .EQU FB_SHIFTERSP $924 - .EQU FB_MASKGEN $928 +; set a pixel in fb memory +; parameters: x,y - coordinates +PUTPIXEL_1BPP: + ; calculate vmem address: + OVER ; duplicate x + ; divide x by 32 + SHR + SHR + SHR + SHR + SHR + SWAP + ; multiply y by words per line + SHL 2 + SHL 2 + SHL -; draw a single pixel -; args: x, y, color + ADD ; add results together for vmem addr + DUP + LOADCP FB_WA + SWAP + STOREI ; store to framebuffer write addr register + DROP + LOADCP FB_RA ; and to framebuffer read addr register + SWAP + STOREI + DROP + + ; x is now at top of stack + ; get bit value from x modulo 32 + LOADC 31 + AND + SHL 2 ; (x & 31) * 4 = offset into table + LOADCP INT_TO_PIX_TABLE + ADD + LOADI + + LOADCP FB_IO + ; read old vmem value + LOADCP FB_IO + LOADI + ; or in new bit + OR + ; write new value + STOREI + DROP + + RET + +INT_TO_PIX_TABLE: + .WORD %10000000_00000000_00000000_00000000 + .WORD %01000000_00000000_00000000_00000000 + .WORD %00100000_00000000_00000000_00000000 + .WORD %00010000_00000000_00000000_00000000 + .WORD %00001000_00000000_00000000_00000000 + .WORD %00000100_00000000_00000000_00000000 + .WORD %00000010_00000000_00000000_00000000 + .WORD %00000001_00000000_00000000_00000000 + .WORD %00000000_10000000_00000000_00000000 + .WORD %00000000_01000000_00000000_00000000 + .WORD %00000000_00100000_00000000_00000000 + .WORD %00000000_00010000_00000000_00000000 + .WORD %00000000_00001000_00000000_00000000 + .WORD %00000000_00000100_00000000_00000000 + .WORD %00000000_00000010_00000000_00000000 + .WORD %00000000_00000001_00000000_00000000 + .WORD %00000000_00000000_10000000_00000000 + .WORD %00000000_00000000_01000000_00000000 + .WORD %00000000_00000000_00100000_00000000 + .WORD %00000000_00000000_00010000_00000000 + .WORD %00000000_00000000_00001000_00000000 + .WORD %00000000_00000000_00000100_00000000 + .WORD %00000000_00000000_00000010_00000000 + .WORD %00000000_00000000_00000001_00000000 + .WORD %00000000_00000000_00000000_10000000 + .WORD %00000000_00000000_00000000_01000000 + .WORD %00000000_00000000_00000000_00100000 + .WORD %00000000_00000000_00000000_00010000 + .WORD %00000000_00000000_00000000_00001000 + .WORD %00000000_00000000_00000000_00000100 + .WORD %00000000_00000000_00000000_00000010 + .WORD %00000000_00000000_00000000_00000001 + +PUTMPIXEL: + LOADC 1 +; set a pixel in fb memory +; parameters: x,y,color - coordinates, color value (0-15) PUTPIXEL: PUTPIXEL_4BPP: .EQU PUTPIXEL_X 0 .EQU PUTPIXEL_Y 4 .EQU PUTPIXEL_COLOR 8 - .EQU PUTPIXEL_BPSAV 12 + .EQU PUTPIXEL_PIXPOS 12 .EQU PUTPIXEL_FS 16 FPADJ -PUTPIXEL_FS + STORE PUTPIXEL_COLOR STORE PUTPIXEL_Y STORE PUTPIXEL_X - LOADREG BP - STORE PUTPIXEL_BPSAV - LOADC 0 - STOREREG BP ; calculate vmem address: (x / 8) + (y * 80) LOAD PUTPIXEL_X @@ -750,36 +826,83 @@ PUTPIXEL_4BPP: ADD ; add results together for vmem addr - DUP - STORE.B FB_WA ; set as write and read addresses - STORE.B FB_RA + LOADCP FB_WA + OVER + STOREI ; store to framebuffer write addr register + DROP + LOADCP FB_RA ; and to framebuffer read addr register + SWAP ; swap addr and value for STOREI + STOREI + DROP + + LOAD PUTPIXEL_X + ; |0000.0000|0000.0000|0000.0000|0000.1111| + LOADC 7 + AND ; calculate pixel position in word + LOADC 7 + SWAP + SUB ; pixpos = 7 - (x & 7) + STORE PUTPIXEL_PIXPOS - ; create pixel data from color value in - ; leftmost pixel data bits (31-28) - LOADC 0 LOAD PUTPIXEL_COLOR - BPLC + LOAD PUTPIXEL_PIXPOS + SHR ; rcount = pixpos / 2 +ROTLOOP_: + DUP ; exit loop if rcount is 0 + CBRANCH.Z ROTLOOP_END + SWAP ; pixel value is now on top of stack + BROT ; value = value << 8 + SWAP ; rcount is now on top of stack + DEC 1 ; rcount = rcount - 1 + BRANCH ROTLOOP_ +ROTLOOP_END: + DROP ; drop rcount + ; shifted pixel value is now at top of stack + LOAD PUTPIXEL_PIXPOS + LOADC 1 + AND + CBRANCH.Z EVEN_PIXPOS + SHL 2 ; if pixpos is odd, shift by 4 bits SHL 2 - SHL 2 - STORE.B FB_SHIFTER ; store pixel into shifter +EVEN_PIXPOS: + LOAD PUTPIXEL_X + ; get bit value from x modulo 8 + LOADC 7 + AND + SHL 2 ; (x & 7) * 4 = offset into table + LOADCP INT_TO_MASK_TABLE + ADD + LOADI - LOAD PUTPIXEL_X ; use x coord as shift count - STORE.B FB_SHIFTCOUNT ; writing triggers shifting + ; read old vmem value + LOADCP FB_IO + LOADI + ; mask bits + AND + ; or in shifted pixel value + OR - LOAD.B FB_SHIFTERM ; get shift result as mask - LOAD.B FB_IO ; get background pixel data - AND ; remove bits for new pixel from bg - - LOAD.B FB_SHIFTER ; load shifted pixel - OR ; OR in new pixel bits - STORE.B FB_IO ; write new pixel data word to vmem - - LOAD PUTPIXEL_BPSAV - STOREREG BP + ; write new value + LOADCP FB_IO + SWAP + STOREI + DROP FPADJ PUTPIXEL_FS RET + .CPOOL + +INT_TO_MASK_TABLE: + .WORD %00001111_11111111_11111111_11111111 + .WORD %11110000_11111111_11111111_11111111 + .WORD %11111111_00001111_11111111_11111111 + .WORD %11111111_11110000_11111111_11111111 + .WORD %11111111_11111111_00001111_11111111 + .WORD %11111111_11111111_11110000_11111111 + .WORD %11111111_11111111_11111111_00001111 + .WORD %11111111_11111111_11111111_11110000 + ; draw a line between two points ; parameters: x0, y0, x1, y1, color .EQU DL_X0 0 diff --git a/tridoracpu/tridoracpu.srcs/vgafb.v b/tridoracpu/tridoracpu.srcs/vgafb.v index fd42627..411e956 100644 --- a/tridoracpu/tridoracpu.srcs/vgafb.v +++ b/tridoracpu/tridoracpu.srcs/vgafb.v @@ -162,12 +162,10 @@ module vgafb #(VMEM_ADDR_WIDTH = 15, VMEM_DATA_WIDTH = 32) ( reg [4:0] acc_shift_count; reg acc_start_shift; reg [VMEM_DATA_WIDTH-1:0] acc_mask_in; - reg [VMEM_DATA_WIDTH-1:0] acc_mask_buf; - reg [VMEM_DATA_WIDTH-1:0] acc_shiftmask_buf; - wire [VMEM_DATA_WIDTH-1:0] acc_shifter_mask = acc_shiftmask_buf; + wire [VMEM_DATA_WIDTH-1:0] acc_mask_out; + wire [VMEM_DATA_WIDTH-1:0] acc_shifter_mask; wire [VMEM_DATA_WIDTH-1:0] acc_shifter_out_h = acc_shifter_out[(VMEM_DATA_WIDTH*2)-1:VMEM_DATA_WIDTH]; wire [VMEM_DATA_WIDTH-1:0] acc_shifter_out_l = acc_shifter_out[VMEM_DATA_WIDTH-1:0]; - `endif assign vmem_rd_en = rd_en; @@ -178,9 +176,9 @@ module vgafb #(VMEM_ADDR_WIDTH = 15, VMEM_DATA_WIDTH = 32) ( (reg_sel == REG_CTL) ? status : `ifdef ENABLE_FB_ACCEL (reg_sel == REG_SHIFTER) ? acc_shifter_out_h: - (reg_sel == REG_SHIFTERM) ? acc_shiftmask_buf : + (reg_sel == REG_SHIFTERM) ? acc_shifter_mask : (reg_sel == REG_SHIFTERSP) ? acc_shifter_out_l : - (reg_sel == REG_MASKGEN) ? acc_mask_buf : + (reg_sel == REG_MASKGEN) ? acc_mask_out : `endif 32'hFFFFFFFF; @@ -337,34 +335,27 @@ module vgafb #(VMEM_ADDR_WIDTH = 15, VMEM_DATA_WIDTH = 32) ( acc_mask_in <= wr_data; end - // mask output is buffered to avoid timing problems - always @(posedge cpu_clk) - begin - acc_mask_buf <= { - {4{~|{acc_mask_in[31:28]}}}, - {4{~|{acc_mask_in[27:24]}}}, - {4{~|{acc_mask_in[23:20]}}}, - {4{~|{acc_mask_in[19:16]}}}, - {4{~|{acc_mask_in[15:12]}}}, - {4{~|{acc_mask_in[11:8]}}}, - {4{~|{acc_mask_in[7:4]}}}, - {4{~|{acc_mask_in[3:0]}}} - }; - end + assign acc_mask_out = { + {4{|{acc_mask_in[31:28]}}}, + {4{|{acc_mask_in[27:24]}}}, + {4{|{acc_mask_in[23:20]}}}, + {4{|{acc_mask_in[19:16]}}}, + {4{|{acc_mask_in[15:12]}}}, + {4{|{acc_mask_in[11:8]}}}, + {4{|{acc_mask_in[7:4]}}}, + {4{|{acc_mask_in[3:0]}}} + }; - always @(posedge cpu_clk) - begin - acc_shiftmask_buf = { - {4{~|{acc_shifter_out_h[31:28]}}}, - {4{~|{acc_shifter_out_h[27:24]}}}, - {4{~|{acc_shifter_out_h[23:20]}}}, - {4{~|{acc_shifter_out_h[19:16]}}}, - {4{~|{acc_shifter_out_h[15:12]}}}, - {4{~|{acc_shifter_out_h[11:8]}}}, - {4{~|{acc_shifter_out_h[7:4]}}}, - {4{~|{acc_shifter_out_h[3:0]}}} - }; - end + assign acc_shifter_mask = { + {4{|{acc_shifter_out_h[31:28]}}}, + {4{|{acc_shifter_out_h[27:24]}}}, + {4{|{acc_shifter_out_h[23:20]}}}, + {4{|{acc_shifter_out_h[19:16]}}}, + {4{|{acc_shifter_out_h[15:12]}}}, + {4{|{acc_shifter_out_h[11:8]}}}, + {4{|{acc_shifter_out_h[7:4]}}}, + {4{|{acc_shifter_out_h[3:0]}}} + }; `endif // diff --git a/tridoracpu/tridoracpu.xpr b/tridoracpu/tridoracpu.xpr index a088319..4d21f83 100644 --- a/tridoracpu/tridoracpu.xpr +++ b/tridoracpu/tridoracpu.xpr @@ -358,7 +358,9 @@ - + + Performs optimizations which creates alternative logic technology mapping, including disabling LUT combining, forcing F7/F8/F9 to logic, increasing the threshold of shift register inference. + @@ -382,7 +384,9 @@ - + + Best predicted directive for place_design. +