Compare commits

...

3 commits

Author SHA1 Message Date
slederer
c119a2a5bb add line/points drawing benchmark 2026-01-31 17:26:13 +01:00
slederer
1e56251fc1 vgafb: buffer maskgen outputs to avoid timing problems 2026-01-31 17:24:36 +01:00
slederer
8900eb90be corelib: new putpixel routine using shifter/maskgen 2026-01-31 02:31:00 +01:00
4 changed files with 160 additions and 186 deletions

92
examples/graphbench.pas Normal file
View file

@ -0,0 +1,92 @@
program graphbench;
var starttime,endtime:DateTime;
procedure startBench(name:string);
begin
write(name:20, ' ');
starttime := GetTime;
end;
procedure endBench;
var secDelta, minDelta, hourDelta:integer;
procedure write2Digits(i:integer);
begin
if i < 10 then
write('0');
write(i);
end;
begin
endTime := GetTime;
hourDelta := endtime.hours - starttime.hours;
minDelta := endtime.minutes - starttime.minutes;
secDelta := endtime.seconds - starttime.seconds;
if secDelta < 0 then
begin
secDelta := 60 + secDelta;
minDelta := minDelta - 1;
end;
if minDelta < 0 then
begin
minDelta := 60 + minDelta;
hourDelta := hourDelta - 1;
end;
write2Digits(hourDelta);
write(':'); write2Digits(minDelta);
write(':'); write2Digits(secDelta);
writeln;
end;
function randint(lessthan:integer):integer;
var r:integer;
begin
r := random and 511;
if r >= lessthan then
r := r - lessthan;
randint := r;
end;
procedure drawlines(count:integer);
var i,col,x1,y1,x2,y2:integer;
begin
col := 1;
for i := 1 to count do
begin
x1 := randint(500);
y1 := randint(400);
x2 := randint(500);
y2 := randint(400);
DrawLine(x1,y1,x2,y2,col);
col := col + 1;
if col > 15 then col := 1;
end;
end;
procedure drawpoints(count:integer);
var i,col,x,y:integer;
begin
col := 1;
for i := 1 to count do
begin
x := randint(500);
y := randint(400);
PutPixel(x,y,col);
col := col + 1;
if col > 15 then col := 1;
end;
end;
begin
InitGraphics;
startBench('200K points');
drawpoints(200000);
endBench;
InitGraphics;
startBench('10K lines');
drawlines(10000);
endBench;
end.

View file

@ -706,108 +706,32 @@ CMPWORDS_XT2:
.EQU FB_PS $90C
.EQU FB_PD $910
.EQU FB_CTL $914
; set a pixel in fb memory
; parameters: x,y - coordinates
PUTPIXEL_1BPP:
; calculate vmem address:
OVER ; duplicate x
; divide x by 32
SHR
SHR
SHR
SHR
SHR
SWAP
; multiply y by words per line
SHL 2
SHL 2
SHL
.EQU FB_SHIFTER $918
.EQU FB_SHIFTCOUNT $91C
.EQU FB_SHIFTERM $920
.EQU FB_SHIFTERSP $924
.EQU FB_MASKGEN $928
ADD ; add results together for vmem addr
; draw a single pixel
; args: x, y, color
DUP
LOADCP FB_WA
SWAP
STOREI ; store to framebuffer write addr register
DROP
LOADCP FB_RA ; and to framebuffer read addr register
SWAP
STOREI
DROP
; x is now at top of stack
; get bit value from x modulo 32
LOADC 31
AND
SHL 2 ; (x & 31) * 4 = offset into table
LOADCP INT_TO_PIX_TABLE
ADD
LOADI
LOADCP FB_IO
; read old vmem value
LOADCP FB_IO
LOADI
; or in new bit
OR
; write new value
STOREI
DROP
RET
INT_TO_PIX_TABLE:
.WORD %10000000_00000000_00000000_00000000
.WORD %01000000_00000000_00000000_00000000
.WORD %00100000_00000000_00000000_00000000
.WORD %00010000_00000000_00000000_00000000
.WORD %00001000_00000000_00000000_00000000
.WORD %00000100_00000000_00000000_00000000
.WORD %00000010_00000000_00000000_00000000
.WORD %00000001_00000000_00000000_00000000
.WORD %00000000_10000000_00000000_00000000
.WORD %00000000_01000000_00000000_00000000
.WORD %00000000_00100000_00000000_00000000
.WORD %00000000_00010000_00000000_00000000
.WORD %00000000_00001000_00000000_00000000
.WORD %00000000_00000100_00000000_00000000
.WORD %00000000_00000010_00000000_00000000
.WORD %00000000_00000001_00000000_00000000
.WORD %00000000_00000000_10000000_00000000
.WORD %00000000_00000000_01000000_00000000
.WORD %00000000_00000000_00100000_00000000
.WORD %00000000_00000000_00010000_00000000
.WORD %00000000_00000000_00001000_00000000
.WORD %00000000_00000000_00000100_00000000
.WORD %00000000_00000000_00000010_00000000
.WORD %00000000_00000000_00000001_00000000
.WORD %00000000_00000000_00000000_10000000
.WORD %00000000_00000000_00000000_01000000
.WORD %00000000_00000000_00000000_00100000
.WORD %00000000_00000000_00000000_00010000
.WORD %00000000_00000000_00000000_00001000
.WORD %00000000_00000000_00000000_00000100
.WORD %00000000_00000000_00000000_00000010
.WORD %00000000_00000000_00000000_00000001
PUTMPIXEL:
LOADC 1
; set a pixel in fb memory
; parameters: x,y,color - coordinates, color value (0-15)
PUTPIXEL:
PUTPIXEL_4BPP:
.EQU PUTPIXEL_X 0
.EQU PUTPIXEL_Y 4
.EQU PUTPIXEL_COLOR 8
.EQU PUTPIXEL_PIXPOS 12
.EQU PUTPIXEL_BPSAV 12
.EQU PUTPIXEL_FS 16
FPADJ -PUTPIXEL_FS
STORE PUTPIXEL_COLOR
STORE PUTPIXEL_Y
STORE PUTPIXEL_X
LOADREG BP
STORE PUTPIXEL_BPSAV
LOADC 0
STOREREG BP
; calculate vmem address: (x / 8) + (y * 80)
LOAD PUTPIXEL_X
@ -826,83 +750,36 @@ PUTPIXEL_4BPP:
ADD ; add results together for vmem addr
LOADCP FB_WA
OVER
STOREI ; store to framebuffer write addr register
DROP
LOADCP FB_RA ; and to framebuffer read addr register
SWAP ; swap addr and value for STOREI
STOREI
DROP
LOAD PUTPIXEL_X
; |0000.0000|0000.0000|0000.0000|0000.1111|
LOADC 7
AND ; calculate pixel position in word
LOADC 7
SWAP
SUB ; pixpos = 7 - (x & 7)
STORE PUTPIXEL_PIXPOS
DUP
STORE.B FB_WA ; set as write and read addresses
STORE.B FB_RA
; create pixel data from color value in
; leftmost pixel data bits (31-28)
LOADC 0
LOAD PUTPIXEL_COLOR
LOAD PUTPIXEL_PIXPOS
SHR ; rcount = pixpos / 2
ROTLOOP_:
DUP ; exit loop if rcount is 0
CBRANCH.Z ROTLOOP_END
SWAP ; pixel value is now on top of stack
BROT ; value = value << 8
SWAP ; rcount is now on top of stack
DEC 1 ; rcount = rcount - 1
BRANCH ROTLOOP_
ROTLOOP_END:
DROP ; drop rcount
; shifted pixel value is now at top of stack
LOAD PUTPIXEL_PIXPOS
LOADC 1
AND
CBRANCH.Z EVEN_PIXPOS
SHL 2 ; if pixpos is odd, shift by 4 bits
BPLC
SHL 2
EVEN_PIXPOS:
LOAD PUTPIXEL_X
; get bit value from x modulo 8
LOADC 7
AND
SHL 2 ; (x & 7) * 4 = offset into table
LOADCP INT_TO_MASK_TABLE
ADD
LOADI
SHL 2
STORE.B FB_SHIFTER ; store pixel into shifter
; read old vmem value
LOADCP FB_IO
LOADI
; mask bits
AND
; or in shifted pixel value
OR
LOAD PUTPIXEL_X ; use x coord as shift count
STORE.B FB_SHIFTCOUNT ; writing triggers shifting
; write new value
LOADCP FB_IO
SWAP
STOREI
DROP
LOAD.B FB_SHIFTERM ; get shift result as mask
LOAD.B FB_IO ; get background pixel data
AND ; remove bits for new pixel from bg
LOAD.B FB_SHIFTER ; load shifted pixel
OR ; OR in new pixel bits
STORE.B FB_IO ; write new pixel data word to vmem
LOAD PUTPIXEL_BPSAV
STOREREG BP
FPADJ PUTPIXEL_FS
RET
.CPOOL
INT_TO_MASK_TABLE:
.WORD %00001111_11111111_11111111_11111111
.WORD %11110000_11111111_11111111_11111111
.WORD %11111111_00001111_11111111_11111111
.WORD %11111111_11110000_11111111_11111111
.WORD %11111111_11111111_00001111_11111111
.WORD %11111111_11111111_11110000_11111111
.WORD %11111111_11111111_11111111_00001111
.WORD %11111111_11111111_11111111_11110000
; draw a line between two points
; parameters: x0, y0, x1, y1, color
.EQU DL_X0 0

View file

@ -162,10 +162,12 @@ module vgafb #(VMEM_ADDR_WIDTH = 15, VMEM_DATA_WIDTH = 32) (
reg [4:0] acc_shift_count;
reg acc_start_shift;
reg [VMEM_DATA_WIDTH-1:0] acc_mask_in;
wire [VMEM_DATA_WIDTH-1:0] acc_mask_out;
wire [VMEM_DATA_WIDTH-1:0] acc_shifter_mask;
reg [VMEM_DATA_WIDTH-1:0] acc_mask_buf;
reg [VMEM_DATA_WIDTH-1:0] acc_shiftmask_buf;
wire [VMEM_DATA_WIDTH-1:0] acc_shifter_mask = acc_shiftmask_buf;
wire [VMEM_DATA_WIDTH-1:0] acc_shifter_out_h = acc_shifter_out[(VMEM_DATA_WIDTH*2)-1:VMEM_DATA_WIDTH];
wire [VMEM_DATA_WIDTH-1:0] acc_shifter_out_l = acc_shifter_out[VMEM_DATA_WIDTH-1:0];
`endif
assign vmem_rd_en = rd_en;
@ -176,9 +178,9 @@ module vgafb #(VMEM_ADDR_WIDTH = 15, VMEM_DATA_WIDTH = 32) (
(reg_sel == REG_CTL) ? status :
`ifdef ENABLE_FB_ACCEL
(reg_sel == REG_SHIFTER) ? acc_shifter_out_h:
(reg_sel == REG_SHIFTERM) ? acc_shifter_mask :
(reg_sel == REG_SHIFTERM) ? acc_shiftmask_buf :
(reg_sel == REG_SHIFTERSP) ? acc_shifter_out_l :
(reg_sel == REG_MASKGEN) ? acc_mask_out :
(reg_sel == REG_MASKGEN) ? acc_mask_buf :
`endif
32'hFFFFFFFF;
@ -335,27 +337,34 @@ module vgafb #(VMEM_ADDR_WIDTH = 15, VMEM_DATA_WIDTH = 32) (
acc_mask_in <= wr_data;
end
assign acc_mask_out = {
{4{|{acc_mask_in[31:28]}}},
{4{|{acc_mask_in[27:24]}}},
{4{|{acc_mask_in[23:20]}}},
{4{|{acc_mask_in[19:16]}}},
{4{|{acc_mask_in[15:12]}}},
{4{|{acc_mask_in[11:8]}}},
{4{|{acc_mask_in[7:4]}}},
{4{|{acc_mask_in[3:0]}}}
};
// mask output is buffered to avoid timing problems
always @(posedge cpu_clk)
begin
acc_mask_buf <= {
{4{~|{acc_mask_in[31:28]}}},
{4{~|{acc_mask_in[27:24]}}},
{4{~|{acc_mask_in[23:20]}}},
{4{~|{acc_mask_in[19:16]}}},
{4{~|{acc_mask_in[15:12]}}},
{4{~|{acc_mask_in[11:8]}}},
{4{~|{acc_mask_in[7:4]}}},
{4{~|{acc_mask_in[3:0]}}}
};
end
assign acc_shifter_mask = {
{4{|{acc_shifter_out_h[31:28]}}},
{4{|{acc_shifter_out_h[27:24]}}},
{4{|{acc_shifter_out_h[23:20]}}},
{4{|{acc_shifter_out_h[19:16]}}},
{4{|{acc_shifter_out_h[15:12]}}},
{4{|{acc_shifter_out_h[11:8]}}},
{4{|{acc_shifter_out_h[7:4]}}},
{4{|{acc_shifter_out_h[3:0]}}}
};
always @(posedge cpu_clk)
begin
acc_shiftmask_buf = {
{4{~|{acc_shifter_out_h[31:28]}}},
{4{~|{acc_shifter_out_h[27:24]}}},
{4{~|{acc_shifter_out_h[23:20]}}},
{4{~|{acc_shifter_out_h[19:16]}}},
{4{~|{acc_shifter_out_h[15:12]}}},
{4{~|{acc_shifter_out_h[11:8]}}},
{4{~|{acc_shifter_out_h[7:4]}}},
{4{~|{acc_shifter_out_h[3:0]}}}
};
end
`endif
//

View file

@ -358,9 +358,7 @@
<Runs Version="1" Minor="22">
<Run Id="synth_1" Type="Ft3:Synth" SrcSet="sources_1" Part="xc7a35ticsg324-1L" ConstrsSet="constrs_1" Description="Performs optimizations which creates alternative logic technology mapping, including disabling LUT combining, forcing F7/F8/F9 to logic, increasing the threshold of shift register inference." AutoIncrementalCheckpoint="false" WriteIncrSynthDcp="false" State="current" Dir="$PRUNDIR/synth_1" IncludeInArchive="true" IsChild="false" AutoIncrementalDir="$PSRCDIR/utils_1/imports/synth_1" AutoRQSDir="$PSRCDIR/utils_1/imports/synth_1" ParallelReportGen="true">
<Strategy Version="1" Minor="2">
<StratHandle Name="Flow_AlternateRoutability" Flow="Vivado Synthesis 2024">
<Desc>Performs optimizations which creates alternative logic technology mapping, including disabling LUT combining, forcing F7/F8/F9 to logic, increasing the threshold of shift register inference.</Desc>
</StratHandle>
<StratHandle Name="Flow_AlternateRoutability" Flow="Vivado Synthesis 2024"/>
<Step Id="synth_design">
<Option Id="Directive">3</Option>
<Option Id="NoCombineLuts">1</Option>
@ -384,9 +382,7 @@
</Run>
<Run Id="impl_1" Type="Ft2:EntireDesign" Part="xc7a35ticsg324-1L" ConstrsSet="constrs_1" Description="Best predicted directive for place_design." AutoIncrementalCheckpoint="false" WriteIncrSynthDcp="false" State="current" Dir="$PRUNDIR/impl_1" SynthRun="synth_1" IncludeInArchive="true" IsChild="false" GenFullBitstream="true" AutoIncrementalDir="$PSRCDIR/utils_1/imports/impl_1" LaunchOptions="-jobs 6 " AutoRQSDir="$PSRCDIR/utils_1/imports/impl_1" ParallelReportGen="true">
<Strategy Version="1" Minor="2">
<StratHandle Name="Performance_Auto_1" Flow="Vivado Implementation 2024">
<Desc>Best predicted directive for place_design.</Desc>
</StratHandle>
<StratHandle Name="Performance_Auto_1" Flow="Vivado Implementation 2024"/>
<Step Id="init_design"/>
<Step Id="opt_design">
<Option Id="Directive">0</Option>