Compare commits

..

3 commits

Author SHA1 Message Date
slederer
c119a2a5bb add line/points drawing benchmark 2026-01-31 17:26:13 +01:00
slederer
1e56251fc1 vgafb: buffer maskgen outputs to avoid timing problems 2026-01-31 17:24:36 +01:00
slederer
8900eb90be corelib: new putpixel routine using shifter/maskgen 2026-01-31 02:31:00 +01:00
4 changed files with 160 additions and 186 deletions

92
examples/graphbench.pas Normal file
View file

@ -0,0 +1,92 @@
program graphbench;
var starttime,endtime:DateTime;
procedure startBench(name:string);
begin
write(name:20, ' ');
starttime := GetTime;
end;
procedure endBench;
var secDelta, minDelta, hourDelta:integer;
procedure write2Digits(i:integer);
begin
if i < 10 then
write('0');
write(i);
end;
begin
endTime := GetTime;
hourDelta := endtime.hours - starttime.hours;
minDelta := endtime.minutes - starttime.minutes;
secDelta := endtime.seconds - starttime.seconds;
if secDelta < 0 then
begin
secDelta := 60 + secDelta;
minDelta := minDelta - 1;
end;
if minDelta < 0 then
begin
minDelta := 60 + minDelta;
hourDelta := hourDelta - 1;
end;
write2Digits(hourDelta);
write(':'); write2Digits(minDelta);
write(':'); write2Digits(secDelta);
writeln;
end;
function randint(lessthan:integer):integer;
var r:integer;
begin
r := random and 511;
if r >= lessthan then
r := r - lessthan;
randint := r;
end;
procedure drawlines(count:integer);
var i,col,x1,y1,x2,y2:integer;
begin
col := 1;
for i := 1 to count do
begin
x1 := randint(500);
y1 := randint(400);
x2 := randint(500);
y2 := randint(400);
DrawLine(x1,y1,x2,y2,col);
col := col + 1;
if col > 15 then col := 1;
end;
end;
procedure drawpoints(count:integer);
var i,col,x,y:integer;
begin
col := 1;
for i := 1 to count do
begin
x := randint(500);
y := randint(400);
PutPixel(x,y,col);
col := col + 1;
if col > 15 then col := 1;
end;
end;
begin
InitGraphics;
startBench('200K points');
drawpoints(200000);
endBench;
InitGraphics;
startBench('10K lines');
drawlines(10000);
endBench;
end.

View file

@ -706,108 +706,32 @@ CMPWORDS_XT2:
.EQU FB_PS $90C .EQU FB_PS $90C
.EQU FB_PD $910 .EQU FB_PD $910
.EQU FB_CTL $914 .EQU FB_CTL $914
; set a pixel in fb memory .EQU FB_SHIFTER $918
; parameters: x,y - coordinates .EQU FB_SHIFTCOUNT $91C
PUTPIXEL_1BPP: .EQU FB_SHIFTERM $920
; calculate vmem address: .EQU FB_SHIFTERSP $924
OVER ; duplicate x .EQU FB_MASKGEN $928
; divide x by 32
SHR
SHR
SHR
SHR
SHR
SWAP
; multiply y by words per line
SHL 2
SHL 2
SHL
ADD ; add results together for vmem addr ; draw a single pixel
; args: x, y, color
DUP
LOADCP FB_WA
SWAP
STOREI ; store to framebuffer write addr register
DROP
LOADCP FB_RA ; and to framebuffer read addr register
SWAP
STOREI
DROP
; x is now at top of stack
; get bit value from x modulo 32
LOADC 31
AND
SHL 2 ; (x & 31) * 4 = offset into table
LOADCP INT_TO_PIX_TABLE
ADD
LOADI
LOADCP FB_IO
; read old vmem value
LOADCP FB_IO
LOADI
; or in new bit
OR
; write new value
STOREI
DROP
RET
INT_TO_PIX_TABLE:
.WORD %10000000_00000000_00000000_00000000
.WORD %01000000_00000000_00000000_00000000
.WORD %00100000_00000000_00000000_00000000
.WORD %00010000_00000000_00000000_00000000
.WORD %00001000_00000000_00000000_00000000
.WORD %00000100_00000000_00000000_00000000
.WORD %00000010_00000000_00000000_00000000
.WORD %00000001_00000000_00000000_00000000
.WORD %00000000_10000000_00000000_00000000
.WORD %00000000_01000000_00000000_00000000
.WORD %00000000_00100000_00000000_00000000
.WORD %00000000_00010000_00000000_00000000
.WORD %00000000_00001000_00000000_00000000
.WORD %00000000_00000100_00000000_00000000
.WORD %00000000_00000010_00000000_00000000
.WORD %00000000_00000001_00000000_00000000
.WORD %00000000_00000000_10000000_00000000
.WORD %00000000_00000000_01000000_00000000
.WORD %00000000_00000000_00100000_00000000
.WORD %00000000_00000000_00010000_00000000
.WORD %00000000_00000000_00001000_00000000
.WORD %00000000_00000000_00000100_00000000
.WORD %00000000_00000000_00000010_00000000
.WORD %00000000_00000000_00000001_00000000
.WORD %00000000_00000000_00000000_10000000
.WORD %00000000_00000000_00000000_01000000
.WORD %00000000_00000000_00000000_00100000
.WORD %00000000_00000000_00000000_00010000
.WORD %00000000_00000000_00000000_00001000
.WORD %00000000_00000000_00000000_00000100
.WORD %00000000_00000000_00000000_00000010
.WORD %00000000_00000000_00000000_00000001
PUTMPIXEL:
LOADC 1
; set a pixel in fb memory
; parameters: x,y,color - coordinates, color value (0-15)
PUTPIXEL: PUTPIXEL:
PUTPIXEL_4BPP: PUTPIXEL_4BPP:
.EQU PUTPIXEL_X 0 .EQU PUTPIXEL_X 0
.EQU PUTPIXEL_Y 4 .EQU PUTPIXEL_Y 4
.EQU PUTPIXEL_COLOR 8 .EQU PUTPIXEL_COLOR 8
.EQU PUTPIXEL_PIXPOS 12 .EQU PUTPIXEL_BPSAV 12
.EQU PUTPIXEL_FS 16 .EQU PUTPIXEL_FS 16
FPADJ -PUTPIXEL_FS FPADJ -PUTPIXEL_FS
STORE PUTPIXEL_COLOR STORE PUTPIXEL_COLOR
STORE PUTPIXEL_Y STORE PUTPIXEL_Y
STORE PUTPIXEL_X STORE PUTPIXEL_X
LOADREG BP
STORE PUTPIXEL_BPSAV
LOADC 0
STOREREG BP
; calculate vmem address: (x / 8) + (y * 80) ; calculate vmem address: (x / 8) + (y * 80)
LOAD PUTPIXEL_X LOAD PUTPIXEL_X
@ -826,83 +750,36 @@ PUTPIXEL_4BPP:
ADD ; add results together for vmem addr ADD ; add results together for vmem addr
LOADCP FB_WA DUP
OVER STORE.B FB_WA ; set as write and read addresses
STOREI ; store to framebuffer write addr register STORE.B FB_RA
DROP
LOADCP FB_RA ; and to framebuffer read addr register
SWAP ; swap addr and value for STOREI
STOREI
DROP
LOAD PUTPIXEL_X
; |0000.0000|0000.0000|0000.0000|0000.1111|
LOADC 7
AND ; calculate pixel position in word
LOADC 7
SWAP
SUB ; pixpos = 7 - (x & 7)
STORE PUTPIXEL_PIXPOS
; create pixel data from color value in
; leftmost pixel data bits (31-28)
LOADC 0
LOAD PUTPIXEL_COLOR LOAD PUTPIXEL_COLOR
LOAD PUTPIXEL_PIXPOS BPLC
SHR ; rcount = pixpos / 2
ROTLOOP_:
DUP ; exit loop if rcount is 0
CBRANCH.Z ROTLOOP_END
SWAP ; pixel value is now on top of stack
BROT ; value = value << 8
SWAP ; rcount is now on top of stack
DEC 1 ; rcount = rcount - 1
BRANCH ROTLOOP_
ROTLOOP_END:
DROP ; drop rcount
; shifted pixel value is now at top of stack
LOAD PUTPIXEL_PIXPOS
LOADC 1
AND
CBRANCH.Z EVEN_PIXPOS
SHL 2 ; if pixpos is odd, shift by 4 bits
SHL 2 SHL 2
EVEN_PIXPOS: SHL 2
LOAD PUTPIXEL_X STORE.B FB_SHIFTER ; store pixel into shifter
; get bit value from x modulo 8
LOADC 7
AND
SHL 2 ; (x & 7) * 4 = offset into table
LOADCP INT_TO_MASK_TABLE
ADD
LOADI
; read old vmem value LOAD PUTPIXEL_X ; use x coord as shift count
LOADCP FB_IO STORE.B FB_SHIFTCOUNT ; writing triggers shifting
LOADI
; mask bits
AND
; or in shifted pixel value
OR
; write new value LOAD.B FB_SHIFTERM ; get shift result as mask
LOADCP FB_IO LOAD.B FB_IO ; get background pixel data
SWAP AND ; remove bits for new pixel from bg
STOREI
DROP LOAD.B FB_SHIFTER ; load shifted pixel
OR ; OR in new pixel bits
STORE.B FB_IO ; write new pixel data word to vmem
LOAD PUTPIXEL_BPSAV
STOREREG BP
FPADJ PUTPIXEL_FS FPADJ PUTPIXEL_FS
RET RET
.CPOOL
INT_TO_MASK_TABLE:
.WORD %00001111_11111111_11111111_11111111
.WORD %11110000_11111111_11111111_11111111
.WORD %11111111_00001111_11111111_11111111
.WORD %11111111_11110000_11111111_11111111
.WORD %11111111_11111111_00001111_11111111
.WORD %11111111_11111111_11110000_11111111
.WORD %11111111_11111111_11111111_00001111
.WORD %11111111_11111111_11111111_11110000
; draw a line between two points ; draw a line between two points
; parameters: x0, y0, x1, y1, color ; parameters: x0, y0, x1, y1, color
.EQU DL_X0 0 .EQU DL_X0 0

View file

@ -162,10 +162,12 @@ module vgafb #(VMEM_ADDR_WIDTH = 15, VMEM_DATA_WIDTH = 32) (
reg [4:0] acc_shift_count; reg [4:0] acc_shift_count;
reg acc_start_shift; reg acc_start_shift;
reg [VMEM_DATA_WIDTH-1:0] acc_mask_in; reg [VMEM_DATA_WIDTH-1:0] acc_mask_in;
wire [VMEM_DATA_WIDTH-1:0] acc_mask_out; reg [VMEM_DATA_WIDTH-1:0] acc_mask_buf;
wire [VMEM_DATA_WIDTH-1:0] acc_shifter_mask; reg [VMEM_DATA_WIDTH-1:0] acc_shiftmask_buf;
wire [VMEM_DATA_WIDTH-1:0] acc_shifter_mask = acc_shiftmask_buf;
wire [VMEM_DATA_WIDTH-1:0] acc_shifter_out_h = acc_shifter_out[(VMEM_DATA_WIDTH*2)-1:VMEM_DATA_WIDTH]; wire [VMEM_DATA_WIDTH-1:0] acc_shifter_out_h = acc_shifter_out[(VMEM_DATA_WIDTH*2)-1:VMEM_DATA_WIDTH];
wire [VMEM_DATA_WIDTH-1:0] acc_shifter_out_l = acc_shifter_out[VMEM_DATA_WIDTH-1:0]; wire [VMEM_DATA_WIDTH-1:0] acc_shifter_out_l = acc_shifter_out[VMEM_DATA_WIDTH-1:0];
`endif `endif
assign vmem_rd_en = rd_en; assign vmem_rd_en = rd_en;
@ -176,9 +178,9 @@ module vgafb #(VMEM_ADDR_WIDTH = 15, VMEM_DATA_WIDTH = 32) (
(reg_sel == REG_CTL) ? status : (reg_sel == REG_CTL) ? status :
`ifdef ENABLE_FB_ACCEL `ifdef ENABLE_FB_ACCEL
(reg_sel == REG_SHIFTER) ? acc_shifter_out_h: (reg_sel == REG_SHIFTER) ? acc_shifter_out_h:
(reg_sel == REG_SHIFTERM) ? acc_shifter_mask : (reg_sel == REG_SHIFTERM) ? acc_shiftmask_buf :
(reg_sel == REG_SHIFTERSP) ? acc_shifter_out_l : (reg_sel == REG_SHIFTERSP) ? acc_shifter_out_l :
(reg_sel == REG_MASKGEN) ? acc_mask_out : (reg_sel == REG_MASKGEN) ? acc_mask_buf :
`endif `endif
32'hFFFFFFFF; 32'hFFFFFFFF;
@ -335,27 +337,34 @@ module vgafb #(VMEM_ADDR_WIDTH = 15, VMEM_DATA_WIDTH = 32) (
acc_mask_in <= wr_data; acc_mask_in <= wr_data;
end end
assign acc_mask_out = { // mask output is buffered to avoid timing problems
{4{|{acc_mask_in[31:28]}}}, always @(posedge cpu_clk)
{4{|{acc_mask_in[27:24]}}}, begin
{4{|{acc_mask_in[23:20]}}}, acc_mask_buf <= {
{4{|{acc_mask_in[19:16]}}}, {4{~|{acc_mask_in[31:28]}}},
{4{|{acc_mask_in[15:12]}}}, {4{~|{acc_mask_in[27:24]}}},
{4{|{acc_mask_in[11:8]}}}, {4{~|{acc_mask_in[23:20]}}},
{4{|{acc_mask_in[7:4]}}}, {4{~|{acc_mask_in[19:16]}}},
{4{|{acc_mask_in[3:0]}}} {4{~|{acc_mask_in[15:12]}}},
{4{~|{acc_mask_in[11:8]}}},
{4{~|{acc_mask_in[7:4]}}},
{4{~|{acc_mask_in[3:0]}}}
}; };
end
assign acc_shifter_mask = { always @(posedge cpu_clk)
{4{|{acc_shifter_out_h[31:28]}}}, begin
{4{|{acc_shifter_out_h[27:24]}}}, acc_shiftmask_buf = {
{4{|{acc_shifter_out_h[23:20]}}}, {4{~|{acc_shifter_out_h[31:28]}}},
{4{|{acc_shifter_out_h[19:16]}}}, {4{~|{acc_shifter_out_h[27:24]}}},
{4{|{acc_shifter_out_h[15:12]}}}, {4{~|{acc_shifter_out_h[23:20]}}},
{4{|{acc_shifter_out_h[11:8]}}}, {4{~|{acc_shifter_out_h[19:16]}}},
{4{|{acc_shifter_out_h[7:4]}}}, {4{~|{acc_shifter_out_h[15:12]}}},
{4{|{acc_shifter_out_h[3:0]}}} {4{~|{acc_shifter_out_h[11:8]}}},
{4{~|{acc_shifter_out_h[7:4]}}},
{4{~|{acc_shifter_out_h[3:0]}}}
}; };
end
`endif `endif
// //

View file

@ -358,9 +358,7 @@
<Runs Version="1" Minor="22"> <Runs Version="1" Minor="22">
<Run Id="synth_1" Type="Ft3:Synth" SrcSet="sources_1" Part="xc7a35ticsg324-1L" ConstrsSet="constrs_1" Description="Performs optimizations which creates alternative logic technology mapping, including disabling LUT combining, forcing F7/F8/F9 to logic, increasing the threshold of shift register inference." AutoIncrementalCheckpoint="false" WriteIncrSynthDcp="false" State="current" Dir="$PRUNDIR/synth_1" IncludeInArchive="true" IsChild="false" AutoIncrementalDir="$PSRCDIR/utils_1/imports/synth_1" AutoRQSDir="$PSRCDIR/utils_1/imports/synth_1" ParallelReportGen="true"> <Run Id="synth_1" Type="Ft3:Synth" SrcSet="sources_1" Part="xc7a35ticsg324-1L" ConstrsSet="constrs_1" Description="Performs optimizations which creates alternative logic technology mapping, including disabling LUT combining, forcing F7/F8/F9 to logic, increasing the threshold of shift register inference." AutoIncrementalCheckpoint="false" WriteIncrSynthDcp="false" State="current" Dir="$PRUNDIR/synth_1" IncludeInArchive="true" IsChild="false" AutoIncrementalDir="$PSRCDIR/utils_1/imports/synth_1" AutoRQSDir="$PSRCDIR/utils_1/imports/synth_1" ParallelReportGen="true">
<Strategy Version="1" Minor="2"> <Strategy Version="1" Minor="2">
<StratHandle Name="Flow_AlternateRoutability" Flow="Vivado Synthesis 2024"> <StratHandle Name="Flow_AlternateRoutability" Flow="Vivado Synthesis 2024"/>
<Desc>Performs optimizations which creates alternative logic technology mapping, including disabling LUT combining, forcing F7/F8/F9 to logic, increasing the threshold of shift register inference.</Desc>
</StratHandle>
<Step Id="synth_design"> <Step Id="synth_design">
<Option Id="Directive">3</Option> <Option Id="Directive">3</Option>
<Option Id="NoCombineLuts">1</Option> <Option Id="NoCombineLuts">1</Option>
@ -384,9 +382,7 @@
</Run> </Run>
<Run Id="impl_1" Type="Ft2:EntireDesign" Part="xc7a35ticsg324-1L" ConstrsSet="constrs_1" Description="Best predicted directive for place_design." AutoIncrementalCheckpoint="false" WriteIncrSynthDcp="false" State="current" Dir="$PRUNDIR/impl_1" SynthRun="synth_1" IncludeInArchive="true" IsChild="false" GenFullBitstream="true" AutoIncrementalDir="$PSRCDIR/utils_1/imports/impl_1" LaunchOptions="-jobs 6 " AutoRQSDir="$PSRCDIR/utils_1/imports/impl_1" ParallelReportGen="true"> <Run Id="impl_1" Type="Ft2:EntireDesign" Part="xc7a35ticsg324-1L" ConstrsSet="constrs_1" Description="Best predicted directive for place_design." AutoIncrementalCheckpoint="false" WriteIncrSynthDcp="false" State="current" Dir="$PRUNDIR/impl_1" SynthRun="synth_1" IncludeInArchive="true" IsChild="false" GenFullBitstream="true" AutoIncrementalDir="$PSRCDIR/utils_1/imports/impl_1" LaunchOptions="-jobs 6 " AutoRQSDir="$PSRCDIR/utils_1/imports/impl_1" ParallelReportGen="true">
<Strategy Version="1" Minor="2"> <Strategy Version="1" Minor="2">
<StratHandle Name="Performance_Auto_1" Flow="Vivado Implementation 2024"> <StratHandle Name="Performance_Auto_1" Flow="Vivado Implementation 2024"/>
<Desc>Best predicted directive for place_design.</Desc>
</StratHandle>
<Step Id="init_design"/> <Step Id="init_design"/>
<Step Id="opt_design"> <Step Id="opt_design">
<Option Id="Directive">0</Option> <Option Id="Directive">0</Option>