Compare commits
3 commits
042a18fc9b
...
c119a2a5bb
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c119a2a5bb | ||
|
|
1e56251fc1 | ||
|
|
8900eb90be |
4 changed files with 160 additions and 186 deletions
92
examples/graphbench.pas
Normal file
92
examples/graphbench.pas
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
program graphbench;
|
||||
var starttime,endtime:DateTime;
|
||||
|
||||
procedure startBench(name:string);
|
||||
begin
|
||||
write(name:20, ' ');
|
||||
starttime := GetTime;
|
||||
end;
|
||||
|
||||
procedure endBench;
|
||||
var secDelta, minDelta, hourDelta:integer;
|
||||
procedure write2Digits(i:integer);
|
||||
begin
|
||||
if i < 10 then
|
||||
write('0');
|
||||
write(i);
|
||||
end;
|
||||
begin
|
||||
endTime := GetTime;
|
||||
|
||||
hourDelta := endtime.hours - starttime.hours;
|
||||
minDelta := endtime.minutes - starttime.minutes;
|
||||
secDelta := endtime.seconds - starttime.seconds;
|
||||
|
||||
if secDelta < 0 then
|
||||
begin
|
||||
secDelta := 60 + secDelta;
|
||||
minDelta := minDelta - 1;
|
||||
end;
|
||||
|
||||
if minDelta < 0 then
|
||||
begin
|
||||
minDelta := 60 + minDelta;
|
||||
hourDelta := hourDelta - 1;
|
||||
end;
|
||||
|
||||
write2Digits(hourDelta);
|
||||
write(':'); write2Digits(minDelta);
|
||||
write(':'); write2Digits(secDelta);
|
||||
writeln;
|
||||
end;
|
||||
|
||||
function randint(lessthan:integer):integer;
|
||||
var r:integer;
|
||||
begin
|
||||
r := random and 511;
|
||||
if r >= lessthan then
|
||||
r := r - lessthan;
|
||||
randint := r;
|
||||
end;
|
||||
|
||||
procedure drawlines(count:integer);
|
||||
var i,col,x1,y1,x2,y2:integer;
|
||||
begin
|
||||
col := 1;
|
||||
for i := 1 to count do
|
||||
begin
|
||||
x1 := randint(500);
|
||||
y1 := randint(400);
|
||||
x2 := randint(500);
|
||||
y2 := randint(400);
|
||||
DrawLine(x1,y1,x2,y2,col);
|
||||
col := col + 1;
|
||||
if col > 15 then col := 1;
|
||||
end;
|
||||
end;
|
||||
|
||||
procedure drawpoints(count:integer);
|
||||
var i,col,x,y:integer;
|
||||
begin
|
||||
col := 1;
|
||||
for i := 1 to count do
|
||||
begin
|
||||
x := randint(500);
|
||||
y := randint(400);
|
||||
PutPixel(x,y,col);
|
||||
col := col + 1;
|
||||
if col > 15 then col := 1;
|
||||
end;
|
||||
end;
|
||||
|
||||
begin
|
||||
InitGraphics;
|
||||
startBench('200K points');
|
||||
drawpoints(200000);
|
||||
endBench;
|
||||
|
||||
InitGraphics;
|
||||
startBench('10K lines');
|
||||
drawlines(10000);
|
||||
endBench;
|
||||
end.
|
||||
189
lib/corelib.s
189
lib/corelib.s
|
|
@ -706,108 +706,32 @@ CMPWORDS_XT2:
|
|||
.EQU FB_PS $90C
|
||||
.EQU FB_PD $910
|
||||
.EQU FB_CTL $914
|
||||
; set a pixel in fb memory
|
||||
; parameters: x,y - coordinates
|
||||
PUTPIXEL_1BPP:
|
||||
; calculate vmem address:
|
||||
OVER ; duplicate x
|
||||
; divide x by 32
|
||||
SHR
|
||||
SHR
|
||||
SHR
|
||||
SHR
|
||||
SHR
|
||||
SWAP
|
||||
; multiply y by words per line
|
||||
SHL 2
|
||||
SHL 2
|
||||
SHL
|
||||
.EQU FB_SHIFTER $918
|
||||
.EQU FB_SHIFTCOUNT $91C
|
||||
.EQU FB_SHIFTERM $920
|
||||
.EQU FB_SHIFTERSP $924
|
||||
.EQU FB_MASKGEN $928
|
||||
|
||||
ADD ; add results together for vmem addr
|
||||
; draw a single pixel
|
||||
; args: x, y, color
|
||||
|
||||
DUP
|
||||
LOADCP FB_WA
|
||||
SWAP
|
||||
STOREI ; store to framebuffer write addr register
|
||||
DROP
|
||||
LOADCP FB_RA ; and to framebuffer read addr register
|
||||
SWAP
|
||||
STOREI
|
||||
DROP
|
||||
|
||||
; x is now at top of stack
|
||||
; get bit value from x modulo 32
|
||||
LOADC 31
|
||||
AND
|
||||
SHL 2 ; (x & 31) * 4 = offset into table
|
||||
LOADCP INT_TO_PIX_TABLE
|
||||
ADD
|
||||
LOADI
|
||||
|
||||
LOADCP FB_IO
|
||||
; read old vmem value
|
||||
LOADCP FB_IO
|
||||
LOADI
|
||||
; or in new bit
|
||||
OR
|
||||
; write new value
|
||||
STOREI
|
||||
DROP
|
||||
|
||||
RET
|
||||
|
||||
INT_TO_PIX_TABLE:
|
||||
.WORD %10000000_00000000_00000000_00000000
|
||||
.WORD %01000000_00000000_00000000_00000000
|
||||
.WORD %00100000_00000000_00000000_00000000
|
||||
.WORD %00010000_00000000_00000000_00000000
|
||||
.WORD %00001000_00000000_00000000_00000000
|
||||
.WORD %00000100_00000000_00000000_00000000
|
||||
.WORD %00000010_00000000_00000000_00000000
|
||||
.WORD %00000001_00000000_00000000_00000000
|
||||
.WORD %00000000_10000000_00000000_00000000
|
||||
.WORD %00000000_01000000_00000000_00000000
|
||||
.WORD %00000000_00100000_00000000_00000000
|
||||
.WORD %00000000_00010000_00000000_00000000
|
||||
.WORD %00000000_00001000_00000000_00000000
|
||||
.WORD %00000000_00000100_00000000_00000000
|
||||
.WORD %00000000_00000010_00000000_00000000
|
||||
.WORD %00000000_00000001_00000000_00000000
|
||||
.WORD %00000000_00000000_10000000_00000000
|
||||
.WORD %00000000_00000000_01000000_00000000
|
||||
.WORD %00000000_00000000_00100000_00000000
|
||||
.WORD %00000000_00000000_00010000_00000000
|
||||
.WORD %00000000_00000000_00001000_00000000
|
||||
.WORD %00000000_00000000_00000100_00000000
|
||||
.WORD %00000000_00000000_00000010_00000000
|
||||
.WORD %00000000_00000000_00000001_00000000
|
||||
.WORD %00000000_00000000_00000000_10000000
|
||||
.WORD %00000000_00000000_00000000_01000000
|
||||
.WORD %00000000_00000000_00000000_00100000
|
||||
.WORD %00000000_00000000_00000000_00010000
|
||||
.WORD %00000000_00000000_00000000_00001000
|
||||
.WORD %00000000_00000000_00000000_00000100
|
||||
.WORD %00000000_00000000_00000000_00000010
|
||||
.WORD %00000000_00000000_00000000_00000001
|
||||
|
||||
PUTMPIXEL:
|
||||
LOADC 1
|
||||
; set a pixel in fb memory
|
||||
; parameters: x,y,color - coordinates, color value (0-15)
|
||||
PUTPIXEL:
|
||||
PUTPIXEL_4BPP:
|
||||
.EQU PUTPIXEL_X 0
|
||||
.EQU PUTPIXEL_Y 4
|
||||
.EQU PUTPIXEL_COLOR 8
|
||||
.EQU PUTPIXEL_PIXPOS 12
|
||||
.EQU PUTPIXEL_BPSAV 12
|
||||
.EQU PUTPIXEL_FS 16
|
||||
|
||||
FPADJ -PUTPIXEL_FS
|
||||
|
||||
STORE PUTPIXEL_COLOR
|
||||
STORE PUTPIXEL_Y
|
||||
STORE PUTPIXEL_X
|
||||
|
||||
LOADREG BP
|
||||
STORE PUTPIXEL_BPSAV
|
||||
LOADC 0
|
||||
STOREREG BP
|
||||
|
||||
; calculate vmem address: (x / 8) + (y * 80)
|
||||
LOAD PUTPIXEL_X
|
||||
|
|
@ -826,83 +750,36 @@ PUTPIXEL_4BPP:
|
|||
|
||||
ADD ; add results together for vmem addr
|
||||
|
||||
LOADCP FB_WA
|
||||
OVER
|
||||
STOREI ; store to framebuffer write addr register
|
||||
DROP
|
||||
LOADCP FB_RA ; and to framebuffer read addr register
|
||||
SWAP ; swap addr and value for STOREI
|
||||
STOREI
|
||||
DROP
|
||||
|
||||
LOAD PUTPIXEL_X
|
||||
; |0000.0000|0000.0000|0000.0000|0000.1111|
|
||||
LOADC 7
|
||||
AND ; calculate pixel position in word
|
||||
LOADC 7
|
||||
SWAP
|
||||
SUB ; pixpos = 7 - (x & 7)
|
||||
STORE PUTPIXEL_PIXPOS
|
||||
DUP
|
||||
STORE.B FB_WA ; set as write and read addresses
|
||||
STORE.B FB_RA
|
||||
|
||||
; create pixel data from color value in
|
||||
; leftmost pixel data bits (31-28)
|
||||
LOADC 0
|
||||
LOAD PUTPIXEL_COLOR
|
||||
LOAD PUTPIXEL_PIXPOS
|
||||
SHR ; rcount = pixpos / 2
|
||||
ROTLOOP_:
|
||||
DUP ; exit loop if rcount is 0
|
||||
CBRANCH.Z ROTLOOP_END
|
||||
SWAP ; pixel value is now on top of stack
|
||||
BROT ; value = value << 8
|
||||
SWAP ; rcount is now on top of stack
|
||||
DEC 1 ; rcount = rcount - 1
|
||||
BRANCH ROTLOOP_
|
||||
ROTLOOP_END:
|
||||
DROP ; drop rcount
|
||||
; shifted pixel value is now at top of stack
|
||||
LOAD PUTPIXEL_PIXPOS
|
||||
LOADC 1
|
||||
AND
|
||||
CBRANCH.Z EVEN_PIXPOS
|
||||
SHL 2 ; if pixpos is odd, shift by 4 bits
|
||||
BPLC
|
||||
SHL 2
|
||||
EVEN_PIXPOS:
|
||||
LOAD PUTPIXEL_X
|
||||
; get bit value from x modulo 8
|
||||
LOADC 7
|
||||
AND
|
||||
SHL 2 ; (x & 7) * 4 = offset into table
|
||||
LOADCP INT_TO_MASK_TABLE
|
||||
ADD
|
||||
LOADI
|
||||
SHL 2
|
||||
STORE.B FB_SHIFTER ; store pixel into shifter
|
||||
|
||||
; read old vmem value
|
||||
LOADCP FB_IO
|
||||
LOADI
|
||||
; mask bits
|
||||
AND
|
||||
; or in shifted pixel value
|
||||
OR
|
||||
LOAD PUTPIXEL_X ; use x coord as shift count
|
||||
STORE.B FB_SHIFTCOUNT ; writing triggers shifting
|
||||
|
||||
; write new value
|
||||
LOADCP FB_IO
|
||||
SWAP
|
||||
STOREI
|
||||
DROP
|
||||
LOAD.B FB_SHIFTERM ; get shift result as mask
|
||||
LOAD.B FB_IO ; get background pixel data
|
||||
AND ; remove bits for new pixel from bg
|
||||
|
||||
LOAD.B FB_SHIFTER ; load shifted pixel
|
||||
OR ; OR in new pixel bits
|
||||
STORE.B FB_IO ; write new pixel data word to vmem
|
||||
|
||||
LOAD PUTPIXEL_BPSAV
|
||||
STOREREG BP
|
||||
|
||||
FPADJ PUTPIXEL_FS
|
||||
RET
|
||||
|
||||
.CPOOL
|
||||
|
||||
INT_TO_MASK_TABLE:
|
||||
.WORD %00001111_11111111_11111111_11111111
|
||||
.WORD %11110000_11111111_11111111_11111111
|
||||
.WORD %11111111_00001111_11111111_11111111
|
||||
.WORD %11111111_11110000_11111111_11111111
|
||||
.WORD %11111111_11111111_00001111_11111111
|
||||
.WORD %11111111_11111111_11110000_11111111
|
||||
.WORD %11111111_11111111_11111111_00001111
|
||||
.WORD %11111111_11111111_11111111_11110000
|
||||
|
||||
; draw a line between two points
|
||||
; parameters: x0, y0, x1, y1, color
|
||||
.EQU DL_X0 0
|
||||
|
|
|
|||
|
|
@ -162,10 +162,12 @@ module vgafb #(VMEM_ADDR_WIDTH = 15, VMEM_DATA_WIDTH = 32) (
|
|||
reg [4:0] acc_shift_count;
|
||||
reg acc_start_shift;
|
||||
reg [VMEM_DATA_WIDTH-1:0] acc_mask_in;
|
||||
wire [VMEM_DATA_WIDTH-1:0] acc_mask_out;
|
||||
wire [VMEM_DATA_WIDTH-1:0] acc_shifter_mask;
|
||||
reg [VMEM_DATA_WIDTH-1:0] acc_mask_buf;
|
||||
reg [VMEM_DATA_WIDTH-1:0] acc_shiftmask_buf;
|
||||
wire [VMEM_DATA_WIDTH-1:0] acc_shifter_mask = acc_shiftmask_buf;
|
||||
wire [VMEM_DATA_WIDTH-1:0] acc_shifter_out_h = acc_shifter_out[(VMEM_DATA_WIDTH*2)-1:VMEM_DATA_WIDTH];
|
||||
wire [VMEM_DATA_WIDTH-1:0] acc_shifter_out_l = acc_shifter_out[VMEM_DATA_WIDTH-1:0];
|
||||
|
||||
`endif
|
||||
|
||||
assign vmem_rd_en = rd_en;
|
||||
|
|
@ -176,9 +178,9 @@ module vgafb #(VMEM_ADDR_WIDTH = 15, VMEM_DATA_WIDTH = 32) (
|
|||
(reg_sel == REG_CTL) ? status :
|
||||
`ifdef ENABLE_FB_ACCEL
|
||||
(reg_sel == REG_SHIFTER) ? acc_shifter_out_h:
|
||||
(reg_sel == REG_SHIFTERM) ? acc_shifter_mask :
|
||||
(reg_sel == REG_SHIFTERM) ? acc_shiftmask_buf :
|
||||
(reg_sel == REG_SHIFTERSP) ? acc_shifter_out_l :
|
||||
(reg_sel == REG_MASKGEN) ? acc_mask_out :
|
||||
(reg_sel == REG_MASKGEN) ? acc_mask_buf :
|
||||
`endif
|
||||
32'hFFFFFFFF;
|
||||
|
||||
|
|
@ -335,27 +337,34 @@ module vgafb #(VMEM_ADDR_WIDTH = 15, VMEM_DATA_WIDTH = 32) (
|
|||
acc_mask_in <= wr_data;
|
||||
end
|
||||
|
||||
assign acc_mask_out = {
|
||||
{4{|{acc_mask_in[31:28]}}},
|
||||
{4{|{acc_mask_in[27:24]}}},
|
||||
{4{|{acc_mask_in[23:20]}}},
|
||||
{4{|{acc_mask_in[19:16]}}},
|
||||
{4{|{acc_mask_in[15:12]}}},
|
||||
{4{|{acc_mask_in[11:8]}}},
|
||||
{4{|{acc_mask_in[7:4]}}},
|
||||
{4{|{acc_mask_in[3:0]}}}
|
||||
// mask output is buffered to avoid timing problems
|
||||
always @(posedge cpu_clk)
|
||||
begin
|
||||
acc_mask_buf <= {
|
||||
{4{~|{acc_mask_in[31:28]}}},
|
||||
{4{~|{acc_mask_in[27:24]}}},
|
||||
{4{~|{acc_mask_in[23:20]}}},
|
||||
{4{~|{acc_mask_in[19:16]}}},
|
||||
{4{~|{acc_mask_in[15:12]}}},
|
||||
{4{~|{acc_mask_in[11:8]}}},
|
||||
{4{~|{acc_mask_in[7:4]}}},
|
||||
{4{~|{acc_mask_in[3:0]}}}
|
||||
};
|
||||
end
|
||||
|
||||
assign acc_shifter_mask = {
|
||||
{4{|{acc_shifter_out_h[31:28]}}},
|
||||
{4{|{acc_shifter_out_h[27:24]}}},
|
||||
{4{|{acc_shifter_out_h[23:20]}}},
|
||||
{4{|{acc_shifter_out_h[19:16]}}},
|
||||
{4{|{acc_shifter_out_h[15:12]}}},
|
||||
{4{|{acc_shifter_out_h[11:8]}}},
|
||||
{4{|{acc_shifter_out_h[7:4]}}},
|
||||
{4{|{acc_shifter_out_h[3:0]}}}
|
||||
always @(posedge cpu_clk)
|
||||
begin
|
||||
acc_shiftmask_buf = {
|
||||
{4{~|{acc_shifter_out_h[31:28]}}},
|
||||
{4{~|{acc_shifter_out_h[27:24]}}},
|
||||
{4{~|{acc_shifter_out_h[23:20]}}},
|
||||
{4{~|{acc_shifter_out_h[19:16]}}},
|
||||
{4{~|{acc_shifter_out_h[15:12]}}},
|
||||
{4{~|{acc_shifter_out_h[11:8]}}},
|
||||
{4{~|{acc_shifter_out_h[7:4]}}},
|
||||
{4{~|{acc_shifter_out_h[3:0]}}}
|
||||
};
|
||||
end
|
||||
`endif
|
||||
|
||||
//
|
||||
|
|
|
|||
|
|
@ -358,9 +358,7 @@
|
|||
<Runs Version="1" Minor="22">
|
||||
<Run Id="synth_1" Type="Ft3:Synth" SrcSet="sources_1" Part="xc7a35ticsg324-1L" ConstrsSet="constrs_1" Description="Performs optimizations which creates alternative logic technology mapping, including disabling LUT combining, forcing F7/F8/F9 to logic, increasing the threshold of shift register inference." AutoIncrementalCheckpoint="false" WriteIncrSynthDcp="false" State="current" Dir="$PRUNDIR/synth_1" IncludeInArchive="true" IsChild="false" AutoIncrementalDir="$PSRCDIR/utils_1/imports/synth_1" AutoRQSDir="$PSRCDIR/utils_1/imports/synth_1" ParallelReportGen="true">
|
||||
<Strategy Version="1" Minor="2">
|
||||
<StratHandle Name="Flow_AlternateRoutability" Flow="Vivado Synthesis 2024">
|
||||
<Desc>Performs optimizations which creates alternative logic technology mapping, including disabling LUT combining, forcing F7/F8/F9 to logic, increasing the threshold of shift register inference.</Desc>
|
||||
</StratHandle>
|
||||
<StratHandle Name="Flow_AlternateRoutability" Flow="Vivado Synthesis 2024"/>
|
||||
<Step Id="synth_design">
|
||||
<Option Id="Directive">3</Option>
|
||||
<Option Id="NoCombineLuts">1</Option>
|
||||
|
|
@ -384,9 +382,7 @@
|
|||
</Run>
|
||||
<Run Id="impl_1" Type="Ft2:EntireDesign" Part="xc7a35ticsg324-1L" ConstrsSet="constrs_1" Description="Best predicted directive for place_design." AutoIncrementalCheckpoint="false" WriteIncrSynthDcp="false" State="current" Dir="$PRUNDIR/impl_1" SynthRun="synth_1" IncludeInArchive="true" IsChild="false" GenFullBitstream="true" AutoIncrementalDir="$PSRCDIR/utils_1/imports/impl_1" LaunchOptions="-jobs 6 " AutoRQSDir="$PSRCDIR/utils_1/imports/impl_1" ParallelReportGen="true">
|
||||
<Strategy Version="1" Minor="2">
|
||||
<StratHandle Name="Performance_Auto_1" Flow="Vivado Implementation 2024">
|
||||
<Desc>Best predicted directive for place_design.</Desc>
|
||||
</StratHandle>
|
||||
<StratHandle Name="Performance_Auto_1" Flow="Vivado Implementation 2024"/>
|
||||
<Step Id="init_design"/>
|
||||
<Step Id="opt_design">
|
||||
<Option Id="Directive">0</Option>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue