diff --git a/LICENSE.md b/LICENSE.md index 3755dbb..6392510 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -4,7 +4,7 @@ All files, except where explicitly stated otherwise, are licensed according to t ------------------------------------------------------------------------------ -Copyright 2024 Sebastian Lederer +Copyright 2024-2026 Sebastian Lederer Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/doc/mem.md b/doc/mem.md index f7dbc2b..29177b2 100644 --- a/doc/mem.md +++ b/doc/mem.md @@ -22,11 +22,12 @@ The _BSEL_ and _BPLC_ instructions are designed to assist with accessing bytes w The byte ordering is big-endian. ## Accessing the I/O Area -The I/O area organizes memory slightly different. Here, pointing out individual bytes is not very useful, so the I/O controllers use register addresses with increments of one. In practice, there is only the VGA framebuffer controller which uses multiple registers. +The I/O area uses the same word addressing in increments of four to access the registers of the I/O controllers. In practice, only the VGA framebuffer controller and the audio controller use multiple registers. +For the other controllers, there is a single 32 bit register that is repeated all over the address space of the corresponding I/O slot. The individual I/O controllers each have a memory area of 128 bytes, so there is a maximum number of 16 I/O controllers. -Currently, only I/O slots 0-3 are being used. +Currently, only I/O slots 0-4 are being used. |I/O slot| Address | Controller | |--------|---------|------------| diff --git a/doc/tdraudio.md b/doc/tdraudio.md index 999ebfc..5d8b22f 100644 --- a/doc/tdraudio.md +++ b/doc/tdraudio.md @@ -10,12 +10,12 @@ For the first channel the register addresses are: |Address|Description| |-------|-----------| | $A00 | Control Register | -| $A01 | Clock Divider Register | -| $A02 | Amplitude Register | +| $A04 | Clock Divider Register | +| $A08 | Amplitude Register | -The register addresses for the second channel start at $A04, -the third channel at $A08 -and the fourth channel at $A0C. +The register addresses for the second channel start at $A10, +the third channel at $A20 +and the fourth channel at $A30. ## Reading the control register diff --git a/doc/vga.md b/doc/vga.md index b53f56d..76520f2 100644 --- a/doc/vga.md +++ b/doc/vga.md @@ -4,13 +4,16 @@ Registers |Name|Address|Description| |----|-------|-----------| |_FB_RA_ | $900 | Read Address | -|_FB_WA_ | $901 | Write Address | -| _FB_IO_ | $902 | I/O Register | -| _FB_PS_ | $903 | Palette Select | -| _FB_PD_ | $904 | Palette Data | -| _FB_CTL_ | $905 | Control Register | - - +|_FB_WA_ | $904 | Write Address | +| _FB_IO_ | $908 | I/O Register | +| _FB_PS_ | $90C | Palette Select | +| _FB_PD_ | $910 | Palette Data | +| _FB_CTL_ | $914 | Control Register | +| _FB_SHIFTER | $918 | Shift Assist Register | +| _FB_SHIFTCOUNT | $91C | Shift Count Register | +| _FB_SHIFTERM | $920 | Shifted Mask Register | +| _FB_SHIFTERSP | $924 | Shifter Spill Register | +| _FB_MASKGEN | $928 | Mask Generator Register | ## Pixel Data Pixel data is organized in 32-bit-words. With four bits per pixel, one word @@ -81,3 +84,54 @@ The control register contains status information. It can only be read. The _m_ field indicates the current graphics mode. At the time of writing, it is always 1 which denotes a 640x400x4 mode. The _vb_ bit is 1 when the video signal generator is in its vertical blank phase. + +## Shift Assist Register +The *shift assist register* can be used to accelerate shifting pixel/bitmap data. +Writing a word of pixel data to this register initialises the shifting process. + +After writing to the shift count register (see below), reading the shift assist +register retrieves the shifted pixel data. + +Writing to the shift assist register will reset the shift count. + +## Shift Count Register +Writing a number from 0-7 to the *shift count register* triggers shifting the +contents of the shift assist register. Pixel data is shifted by four bits +to the right times the shift count. Bits 31-3 of the shift count are ignored, so you can +directly write a horizontal screen coordinate to the register. + +This register cannot be read. + +## Shifter Mask Register +The *shifter mask register* contains the shifted pixel data converted into +a mask. See the *mask generator register* for an +explanation of the mask. + +## Shifter Spill Register +The *shifter spill register* contains the pixel data that has +been shifted out to the right. For example, if the shift count is two, +the spill register contains the two rightmost pixels (bits 7-0) of +the original pixel data, placed into the two topmost pixels (bits 31-24). + +The rest of the register is set to zero. + +## Mask Generator Register +The *mask generator register* creates a mask from pixel data. +For each four bits of a pixel, the corresponding four mask bits +are all set to one if the pixel value is not zero. + +This can be used to combine foreground and background pixel data +with a pixel value of zero for a transparent background color. + +Usually, the mask will be inverted with a *NOT* instruction +to clear all pixels in the background that are set in the foreground +with an *AND* instruction +before *ORing* foreground and background together. + +Example in hexadecimal, each digit is a pixel: +| Pixel Data | Mask | +|------------|------| +| $00000000 | $00000000 | +| $00000001 | $0000000F | +| $0407000F | $0F0F000F | +| $1234ABC0 | $FFFFFFF0 | diff --git a/examples/fastfire.s b/examples/fastfire.s index f0e10e4..63ace51 100644 --- a/examples/fastfire.s +++ b/examples/fastfire.s @@ -123,11 +123,11 @@ FF_EXIT: ; framebuffer controller registers .EQU FB_RA $900 - .EQU FB_WA $901 - .EQU FB_IO $902 - .EQU FB_PS $903 - .EQU FB_PD $904 - .EQU FB_CTL $905 + .EQU FB_WA $904 + .EQU FB_IO $908 + .EQU FB_PS $90C + .EQU FB_PD $910 + .EQU FB_CTL $914 .EQU WORDS_PER_LINE 80 ; fire width in vmem words (strict left-to-right evaluation) diff --git a/examples/graphbench.pas b/examples/graphbench.pas new file mode 100644 index 0000000..9abbfba --- /dev/null +++ b/examples/graphbench.pas @@ -0,0 +1,125 @@ +program graphbench; +uses sprites; + +var starttime,endtime:DateTime; + spriteData:SpritePixels; + +procedure readSpriteData(filename:string); +var f:file; +begin + open(f,filename,ModeReadOnly); + seek(f,8); (* skip file header *) + read(f,spriteData); + close(f); +end; + +procedure startBench(name:string); +begin + write(name:20, ' '); + starttime := GetTime; +end; + +procedure endBench; +var secDelta, minDelta, hourDelta:integer; + procedure write2Digits(i:integer); + begin + if i < 10 then + write('0'); + write(i); + end; +begin + endTime := GetTime; + + hourDelta := endtime.hours - starttime.hours; + minDelta := endtime.minutes - starttime.minutes; + secDelta := endtime.seconds - starttime.seconds; + + if secDelta < 0 then + begin + secDelta := 60 + secDelta; + minDelta := minDelta - 1; + end; + + if minDelta < 0 then + begin + minDelta := 60 + minDelta; + hourDelta := hourDelta - 1; + end; + + write2Digits(hourDelta); + write(':'); write2Digits(minDelta); + write(':'); write2Digits(secDelta); + writeln; +end; + +function randint(lessthan:integer):integer; +var r:integer; +begin + r := random and 511; + if r >= lessthan then + r := r - lessthan; + randint := r; +end; + +procedure drawsprites(count:integer); +var i,col,x,y:integer; +begin + col := 1; + for i := 1 to count do + begin + x := randint(350); + y := randint(350); + PutSprite(x,y,spriteData); + col := col + 1; + if col > 15 then col := 1; + end; +end; + +procedure drawlines(count:integer); +var i,col,x1,y1,x2,y2:integer; +begin + col := 1; + for i := 1 to count do + begin + x1 := randint(500); + y1 := randint(400); + x2 := randint(500); + y2 := randint(400); + DrawLine(x1,y1,x2,y2,col); + col := col + 1; + if col > 15 then col := 1; + end; +end; + +procedure drawpoints(count:integer); +var i,col,x,y:integer; +begin + col := 1; + for i := 1 to count do + begin + x := randint(500); + y := randint(400); + PutPixel(x,y,col); + col := col + 1; + if col > 15 then col := 1; + end; +end; + +begin + readSpriteData('rocket.sprt'); + + InitGraphics; + startBench('points 200K'); + drawpoints(200000); + endBench; + + InitGraphics; + startBench('lines 10K'); + drawlines(10000); + endBench; + + InitGraphics; + startBench('sprites 50K'); + drawsprites(50000); + endBench; +end. diff --git a/examples/sprites.s b/examples/sprites.s index 3391339..5f50081 100644 --- a/examples/sprites.s +++ b/examples/sprites.s @@ -3,31 +3,16 @@ .EQU WORDS_PER_LINE 80 .EQU FB_RA $900 - .EQU FB_WA $901 - .EQU FB_IO $902 - .EQU FB_PS $903 - -; calculate mask for a word of pixels -; args: word of pixels with four bits per pixel -; returns: value that masks out all pixels that are set -CALC_MASK: - LOADC $F ; pixel mask -C_M_L0: - SWAP ; swap mask and pixels value - AND.S1.X2Y ; isolate one pixel, keep args - CBRANCH.Z C_M_L1 ; if pixel is zero, dont set mask bits - OVER ; copy current mask - OR ; or into pixels value -C_M_L1: - SWAP ; swap back, ToS is now mask bits - SHL 2 ; shift mask for next pixel to the left - SHL 2 - - DUP - CBRANCH.NZ C_M_L0 ; if mask is zero, we are done - DROP ; remove mask bits - NOT ; invert result - RET + .EQU FB_WA $904 + .EQU FB_IO $908 + .EQU FB_PS $90C + .EQU FB_PD $910 + .EQU FB_CTL $914 + .EQU FB_SHIFTER $918 + .EQU FB_SHIFTCOUNT $91C + .EQU FB_SHIFTERM $920 + .EQU FB_SHIFTERSP $924 + .EQU FB_MASKGEN $928 ; calculate vmem address from coordinates ; args: x,y @@ -67,13 +52,19 @@ CALC_VMEM_ADDR: .EQU PS_SHIFT_C 20 .EQU PS_SPILL 24 .EQU PS_STRIPE_C 28 - .EQU PS_FS 32 + .EQU PS_BPSAVE 32 + .EQU PS_FS 36 PUTSPRITE: FPADJ -PS_FS STORE PS_SPRITE_DATA STORE PS_Y STORE PS_X + LOADREG BP + STORE PS_BPSAVE + LOADC 0 + STOREREG BP + ; calculate vmem address LOAD PS_X LOAD PS_Y @@ -81,11 +72,6 @@ PUTSPRITE: CALL STORE PS_VMEM_ADDR - LOAD PS_X ; shift count = x mod 8 - LOADC 7 - AND - STORE PS_SHIFT_C - LOADC SPRITE_HEIGHT STORE PS_SPRITE_LINES @@ -93,12 +79,10 @@ PUTSPRITE: PS_LOOP1: ; set read and write address ; in the vga controller - LOADC FB_RA ; read address register LOAD PS_VMEM_ADDR - STOREI 1 ; use autoincrement to get to the next register - LOAD PS_VMEM_ADDR - STOREI - DROP + DUP + STORE.B FB_RA + STORE.B FB_WA LOAD PS_SPRITE_DATA ; address of sprite data DUP @@ -106,61 +90,20 @@ PS_LOOP1: STORE PS_SPRITE_DATA ; and store it again LOADI ; load word from orig. address + ; ------- one word of sprite pixels on stack - LOADC 0 - STORE PS_SPILL + STORE.B FB_SHIFTER + LOAD PS_X + STORE.B FB_SHIFTCOUNT - ; loop to shift pixel data to right - LOAD PS_SHIFT_C ; load shift count -PS_LOOP2: - DUP ; test it for zero - CBRANCH.Z PS_LOOP2_X + LOAD.B FB_SHIFTERM ; get shifted mask + NOT + LOAD.B FB_IO ; and background pixel data + AND ; remove foreground pixels - SWAP ; swap count with pixels - - ; save the pixel that is shifted out - LOADC $F ; mask the four bits - AND.S0 ; keep original value on stack - BROT ; and move them to MSB - BROT - BROT - SHL 2 - SHL 2 ; shift by 28 in total - - LOAD PS_SPILL ; load spill bits - SHR ; shift by four to make space - SHR - SHR - SHR - OR ; or with orig value - STORE PS_SPILL ; store new value - - SHR ; shift pixels right - SHR ; four bits per pixel - SHR - SHR - - SWAP ; swap back, count now ToS - DEC 1 - BRANCH PS_LOOP2 -PS_LOOP2_X: - DROP ; remove shift count, shifted pixels now in ToS - - DUP - LOADCP CALC_MASK ; calculate sprite mask for this word - CALL - - LOADCP FB_IO ; address of the i/o register - LOADI ; read word from video mem - - AND ; and word with mask - - OR ; OR sprite data with original pixels - - LOADCP FB_IO - SWAP - STOREI ; store result into i/o reg - DROP + LOAD.B FB_SHIFTER ; get shifted pixels + OR ; combine with background + STORE.B FB_IO ; store into vmem ; set counter for remaining stripes LOADC SPRITE_STRIPES - 1 @@ -170,8 +113,8 @@ PS_LOOP2_X: ; process spilled bits and next vertical stripe of sprite data ; PS_NEXT_STRIPE: - ; put spill bits on stack for later - LOAD PS_SPILL + ;use spill bits from first column + LOAD.B FB_SHIFTERSP LOAD PS_SPRITE_DATA ; address of sprite data DUP @@ -179,65 +122,21 @@ PS_NEXT_STRIPE: STORE PS_SPRITE_DATA ; and store it again LOADI ; load word from orig. address - ; reset spill bits - LOADC 0 - STORE PS_SPILL - - ; last spill bits are on ToS now - - ; shift pixel data to right - LOAD PS_SHIFT_C ; load shift count -PS_LOOP3: ; test it for zero + STORE.B FB_SHIFTER ; store into shifter + LOAD PS_X + STORE.B FB_SHIFTCOUNT ; shift stuff + LOAD.B FB_SHIFTER ; get shifted pixels + OR ; combine with spill bits (see above) DUP - CBRANCH.Z PS_LOOP3_X + STORE.B FB_MASKGEN ; store to mask reg to get new mask - SWAP ; swap count with pixels + LOAD.B FB_MASKGEN ; get mask for spill bits + shifted pixels + NOT + LOAD.B FB_IO ; get vmem data + AND ; remove foreground pixels from bg - ; save the pixel that is shifted out - LOADC $F ; mask the four bits - AND.S0 ; keep original value on stack - BROT ; and move them to MSB - BROT - BROT - SHL 2 - SHL 2 ; shift by 28 in total - - LOAD PS_SPILL ; load spill bits - SHR ; shift by four to make space - SHR - SHR - SHR - OR ; or with orig value - STORE PS_SPILL ; store new value - - SHR ; shift pixels right - SHR ; four bits per pixel - SHR - SHR - - SWAP ; swap back, count now ToS - DEC 1 - BRANCH PS_LOOP3 -PS_LOOP3_X: - DROP ; remove shift count, shifted pixels now in ToS - - OR ; or together with spill bits - - DUP - LOADCP CALC_MASK ; calculate sprite mask - CALL - - LOADCP FB_IO ; load original pixels - LOADI - - AND ; and with mask - - OR ; or together with original pixels - - LOADCP FB_IO - SWAP - STOREI - DROP + OR ; combine with shifted pixels + STORE.B FB_IO ; write to vmem LOAD PS_STRIPE_C ; decrement stripe count DEC 1 @@ -246,22 +145,19 @@ PS_LOOP3_X: CBRANCH.NZ PS_NEXT_STRIPE ; if non-zero, next stripe ; write spilled bits of the last stripe into next vmem word - LOAD PS_SPILL ; get spill bits + LOAD.B FB_SHIFTERSP ; get spill bits DUP - LOADCP CALC_MASK ; calculate sprite mask for spill bits - CALL + STORE.B FB_MASKGEN + LOAD.B FB_MASKGEN ; get sprite mask for spill bits + NOT - LOADCP FB_IO - LOADI ; load next vmem word + LOAD.B FB_IO ; load next vmem word AND ; apply sprite mask OR ; OR in spill bits - LOADCP FB_IO - SWAP ; swap pixels and addr - STOREI ; write back - DROP - + STORE.B FB_IO ; write to vmem + LOAD PS_SPRITE_LINES ; decrement lines count DEC 1 DUP @@ -275,7 +171,10 @@ PS_LOOP3_X: BRANCH PS_LOOP1 PS_L_XT: DROP - + + LOAD PS_BPSAVE + STOREREG BP + FPADJ PS_FS RET @@ -322,7 +221,7 @@ UD_S_L1: ; store vmem offset into write addr reg LOADCP FB_WA LOAD UD_S_OFFSET - STOREI 1 ; ugly but fast: reuse addr + STOREI 4 ; ugly but fast: reuse addr ; with postincrement to ; get to FB_IO for STOREI below diff --git a/lib/corelib.s b/lib/corelib.s index d147934..1ac12e9 100644 --- a/lib/corelib.s +++ b/lib/corelib.s @@ -701,113 +701,37 @@ CMPWORDS_XT2: ; --------- Graphics Library --------------- ; vga controller registers .EQU FB_RA $900 - .EQU FB_WA $901 - .EQU FB_IO $902 - .EQU FB_PS $903 - .EQU FB_PD $904 - .EQU FB_CTL $905 -; set a pixel in fb memory -; parameters: x,y - coordinates -PUTPIXEL_1BPP: - ; calculate vmem address: - OVER ; duplicate x - ; divide x by 32 - SHR - SHR - SHR - SHR - SHR - SWAP - ; multiply y by words per line - SHL 2 - SHL 2 - SHL + .EQU FB_WA $904 + .EQU FB_IO $908 + .EQU FB_PS $90C + .EQU FB_PD $910 + .EQU FB_CTL $914 + .EQU FB_SHIFTER $918 + .EQU FB_SHIFTCOUNT $91C + .EQU FB_SHIFTERM $920 + .EQU FB_SHIFTERSP $924 + .EQU FB_MASKGEN $928 - ADD ; add results together for vmem addr +; draw a single pixel +; args: x, y, color - DUP - LOADCP FB_WA - SWAP - STOREI ; store to framebuffer write addr register - DROP - LOADCP FB_RA ; and to framebuffer read addr register - SWAP - STOREI - DROP - - ; x is now at top of stack - ; get bit value from x modulo 32 - LOADC 31 - AND - SHL 2 ; (x & 31) * 4 = offset into table - LOADCP INT_TO_PIX_TABLE - ADD - LOADI - - LOADCP FB_IO - ; read old vmem value - LOADCP FB_IO - LOADI - ; or in new bit - OR - ; write new value - STOREI - DROP - - RET - -INT_TO_PIX_TABLE: - .WORD %10000000_00000000_00000000_00000000 - .WORD %01000000_00000000_00000000_00000000 - .WORD %00100000_00000000_00000000_00000000 - .WORD %00010000_00000000_00000000_00000000 - .WORD %00001000_00000000_00000000_00000000 - .WORD %00000100_00000000_00000000_00000000 - .WORD %00000010_00000000_00000000_00000000 - .WORD %00000001_00000000_00000000_00000000 - .WORD %00000000_10000000_00000000_00000000 - .WORD %00000000_01000000_00000000_00000000 - .WORD %00000000_00100000_00000000_00000000 - .WORD %00000000_00010000_00000000_00000000 - .WORD %00000000_00001000_00000000_00000000 - .WORD %00000000_00000100_00000000_00000000 - .WORD %00000000_00000010_00000000_00000000 - .WORD %00000000_00000001_00000000_00000000 - .WORD %00000000_00000000_10000000_00000000 - .WORD %00000000_00000000_01000000_00000000 - .WORD %00000000_00000000_00100000_00000000 - .WORD %00000000_00000000_00010000_00000000 - .WORD %00000000_00000000_00001000_00000000 - .WORD %00000000_00000000_00000100_00000000 - .WORD %00000000_00000000_00000010_00000000 - .WORD %00000000_00000000_00000001_00000000 - .WORD %00000000_00000000_00000000_10000000 - .WORD %00000000_00000000_00000000_01000000 - .WORD %00000000_00000000_00000000_00100000 - .WORD %00000000_00000000_00000000_00010000 - .WORD %00000000_00000000_00000000_00001000 - .WORD %00000000_00000000_00000000_00000100 - .WORD %00000000_00000000_00000000_00000010 - .WORD %00000000_00000000_00000000_00000001 - -PUTMPIXEL: - LOADC 1 -; set a pixel in fb memory -; parameters: x,y,color - coordinates, color value (0-15) PUTPIXEL: PUTPIXEL_4BPP: .EQU PUTPIXEL_X 0 .EQU PUTPIXEL_Y 4 .EQU PUTPIXEL_COLOR 8 - .EQU PUTPIXEL_PIXPOS 12 + .EQU PUTPIXEL_BPSAV 12 .EQU PUTPIXEL_FS 16 FPADJ -PUTPIXEL_FS - STORE PUTPIXEL_COLOR STORE PUTPIXEL_Y STORE PUTPIXEL_X + LOADREG BP + STORE PUTPIXEL_BPSAV + LOADC 0 + STOREREG BP ; calculate vmem address: (x / 8) + (y * 80) LOAD PUTPIXEL_X @@ -826,82 +750,56 @@ PUTPIXEL_4BPP: ADD ; add results together for vmem addr - LOADCP FB_WA - OVER - STOREI ; store to framebuffer write addr register - DROP - LOADCP FB_RA ; and to framebuffer read addr register - SWAP ; swap addr and value for STOREI - STOREI - DROP - - LOAD PUTPIXEL_X - ; |0000.0000|0000.0000|0000.0000|0000.1111| - LOADC 7 - AND ; calculate pixel position in word - LOADC 7 - SWAP - SUB ; pixpos = 7 - (x & 7) - STORE PUTPIXEL_PIXPOS + DUP + STORE.B FB_WA ; set as write and read addresses + STORE.B FB_RA LOAD PUTPIXEL_COLOR - LOAD PUTPIXEL_PIXPOS - SHR ; rcount = pixpos / 2 -ROTLOOP_: - DUP ; exit loop if rcount is 0 - CBRANCH.Z ROTLOOP_END - SWAP ; pixel value is now on top of stack - BROT ; value = value << 8 - SWAP ; rcount is now on top of stack - DEC 1 ; rcount = rcount - 1 - BRANCH ROTLOOP_ -ROTLOOP_END: - DROP ; drop rcount - ; shifted pixel value is now at top of stack - LOAD PUTPIXEL_PIXPOS - LOADC 1 - AND - CBRANCH.Z EVEN_PIXPOS - SHL 2 ; if pixpos is odd, shift by 4 bits + CBRANCH.Z PUTPX_CLR ; color 0 is special case + + ; create pixel data from color value in + ; leftmost pixel data bits (31-28) + LOADC 0 + LOAD PUTPIXEL_COLOR + BPLC SHL 2 -EVEN_PIXPOS: - LOAD PUTPIXEL_X - ; get bit value from x modulo 8 - LOADC 7 - AND - SHL 2 ; (x & 7) * 4 = offset into table - LOADCP INT_TO_MASK_TABLE - ADD - LOADI + SHL 2 + STORE.B FB_SHIFTER ; store pixel into shifter - ; read old vmem value - LOADCP FB_IO - LOADI - ; mask bits - AND - ; or in shifted pixel value - OR + LOAD PUTPIXEL_X ; use x coord as shift count + STORE.B FB_SHIFTCOUNT ; writing triggers shifting - ; write new value - LOADCP FB_IO - SWAP - STOREI - DROP + LOAD.B FB_SHIFTERM ; get shift result as mask + NOT ; invert to get background mask + LOAD.B FB_IO ; get background pixel data + AND ; remove bits for new pixel from bg + + LOAD.B FB_SHIFTER ; load shifted pixel + OR ; OR in new pixel bits + STORE.B FB_IO ; write new pixel data word to vmem + +PUTPX_XT: + LOAD PUTPIXEL_BPSAV + STOREREG BP FPADJ PUTPIXEL_FS RET - .CPOOL +PUTPX_CLR: + LOADCP $F0000000 ; mask for leftmost pixel + STORE.B FB_SHIFTER ; shift accordingly + LOAD PUTPIXEL_X + STORE.B FB_SHIFTCOUNT + + LOAD.B FB_SHIFTER ; get shifted value + NOT ; invert for real mask + LOAD.B FB_IO ; get background pixels + AND ; clear pixel with mask + STORE.B FB_IO ; no need to OR in new pixel, just store to vmem + + BRANCH PUTPX_XT + -INT_TO_MASK_TABLE: - .WORD %00001111_11111111_11111111_11111111 - .WORD %11110000_11111111_11111111_11111111 - .WORD %11111111_00001111_11111111_11111111 - .WORD %11111111_11110000_11111111_11111111 - .WORD %11111111_11111111_00001111_11111111 - .WORD %11111111_11111111_11110000_11111111 - .WORD %11111111_11111111_11111111_00001111 - .WORD %11111111_11111111_11111111_11110000 ; draw a line between two points ; parameters: x0, y0, x1, y1, color diff --git a/lib/pcmaudio.s b/lib/pcmaudio.s index 530f52f..ebe812a 100644 --- a/lib/pcmaudio.s +++ b/lib/pcmaudio.s @@ -11,9 +11,9 @@ START_PCMAUDIO: LOADCP _DIV CALL - LOADC AUDIO_BASE + 1 + LOADC AUDIO_BASE + 4 SWAP ; put clock divider on ToS - STOREI 1 + STOREI 4 LOADCP 32768 ; set amplitude to biased 0 STOREI DROP @@ -95,7 +95,7 @@ PLAY1_L0: AND CBRANCH.NZ PLAY1_L0 ; loop if fifo is full - LOADC AUDIO_BASE+2 ; store amplitude value + LOADC AUDIO_BASE+8 ; store amplitude value SWAP STOREI DROP @@ -207,7 +207,7 @@ SMPLQ_I_B: LOADCP $FFFF AND - LOADC AUDIO_BASE+2 + LOADC AUDIO_BASE+8 SWAP STOREI ; write sample, keep addr @@ -281,7 +281,7 @@ SMPLQ_I_END1: DROP ; set amplitude out to zero (biased) - LOADC AUDIO_BASE+2 + LOADC AUDIO_BASE+8 LOADCP 32768 STOREI DROP diff --git a/tridoracpu/tridoracpu.srcs/top.v b/tridoracpu/tridoracpu.srcs/top.v index 0dc3346..bf3bea8 100644 --- a/tridoracpu/tridoracpu.srcs/top.v +++ b/tridoracpu/tridoracpu.srcs/top.v @@ -137,7 +137,7 @@ module top( assign fb_wr_data = mem_write_data; vgafb vgafb0(`clock, pixclk, rst, - mem_addr[3:0], fb_rd_data, fb_wr_data, + mem_addr[5:2], fb_rd_data, fb_wr_data, fb_rd_en, fb_wr_en, VGA_HS_O, VGA_VS_O, VGA_R, VGA_G, VGA_B); `endif @@ -247,7 +247,7 @@ module top( assign tdraudio_wr_data = mem_write_data; tdraudio tdraudio0(`clock, ~rst, - mem_addr[6:0], + mem_addr[8:2], tdraudio_rd_data, tdraudio_wr_data, tdraudio_rd_en, diff --git a/tridoracpu/tridoracpu.srcs/vgafb.v b/tridoracpu/tridoracpu.srcs/vgafb.v index f87e514..49dad2d 100644 --- a/tridoracpu/tridoracpu.srcs/vgafb.v +++ b/tridoracpu/tridoracpu.srcs/vgafb.v @@ -1,6 +1,9 @@ `timescale 1ns / 1ps `default_nettype none +// enable shifter/masker registers +`define ENABLE_FB_ACCEL + // Project F: Display Timings // (C)2019 Will Green, Open Source Hardware released under the MIT License // Learn more at https://projectf.io @@ -126,6 +129,14 @@ module vgafb #(VMEM_ADDR_WIDTH = 15, VMEM_DATA_WIDTH = 32) ( localparam REG_PAL_SLOT = 3; localparam REG_PAL_DATA = 4; localparam REG_CTL = 5; +`ifdef ENABLE_FB_ACCEL + localparam REG_SHIFTER = 6; + localparam REG_SHIFTCOUNT = 7; + localparam REG_SHIFTERM = 8; + localparam REG_SHIFTERSP = 09; + localparam REG_MASKGEN = 10; +`endif + localparam COLOR_WIDTH = 12; localparam PALETTE_WIDTH = 4; @@ -145,12 +156,32 @@ module vgafb #(VMEM_ADDR_WIDTH = 15, VMEM_DATA_WIDTH = 32) ( wire pix_rd; wire [VMEM_DATA_WIDTH-1:0] status; +`ifdef ENABLE_FB_ACCEL + reg [VMEM_DATA_WIDTH-1:0] acc_shifter_in; + reg [(VMEM_DATA_WIDTH*2)-1:0] acc_shifter_out; + reg [4:0] acc_shift_count; + reg acc_start_shift; + reg [VMEM_DATA_WIDTH-1:0] acc_mask_in; + reg [VMEM_DATA_WIDTH-1:0] acc_mask_buf; + reg [VMEM_DATA_WIDTH-1:0] acc_shiftmask_buf; + wire [VMEM_DATA_WIDTH-1:0] acc_shifter_mask = acc_shiftmask_buf; + wire [VMEM_DATA_WIDTH-1:0] acc_shifter_out_h = acc_shifter_out[(VMEM_DATA_WIDTH*2)-1:VMEM_DATA_WIDTH]; + wire [VMEM_DATA_WIDTH-1:0] acc_shifter_out_l = acc_shifter_out[VMEM_DATA_WIDTH-1:0]; + + `endif + assign vmem_rd_en = rd_en; assign vmem_wr_en = (reg_sel == REG_VMEM) && wr_en; assign rd_data = (reg_sel == REG_VMEM) ? vmem_rd_data : (reg_sel == REG_RD_ADDR) ? cpu_rd_addr : (reg_sel == REG_WR_ADDR) ? cpu_wr_addr : (reg_sel == REG_CTL) ? status : +`ifdef ENABLE_FB_ACCEL + (reg_sel == REG_SHIFTER) ? acc_shifter_out_h: + (reg_sel == REG_SHIFTERM) ? acc_shiftmask_buf : + (reg_sel == REG_SHIFTERSP) ? acc_shifter_out_l : + (reg_sel == REG_MASKGEN) ? acc_mask_buf : + `endif 32'hFFFFFFFF; wire [VMEM_ADDR_WIDTH-1:0] cpu_addr = vmem_wr_en ? cpu_wr_addr : cpu_rd_addr; @@ -271,6 +302,74 @@ module vgafb #(VMEM_ADDR_WIDTH = 15, VMEM_DATA_WIDTH = 32) ( if(rd_en && reg_sel == REG_VMEM) cpu_rd_addr <= cpu_rd_addr + 1; // auto-increment read addr on read end +`ifdef ENABLE_FB_ACCEL + // + // shifter/masker registers + // + always @(posedge cpu_clk) + begin + if(wr_en && reg_sel == REG_SHIFTER) + acc_shifter_in <= wr_data; + end + + always @(posedge cpu_clk) + begin + if(wr_en && reg_sel == REG_SHIFTCOUNT) + begin + acc_shift_count <= { wr_data[2:0], 2'b0}; + acc_start_shift <= 1; + end + + if(acc_start_shift) + acc_start_shift <= 0; + end + + always @(posedge cpu_clk) + begin + if (acc_start_shift) + acc_shifter_out <= {acc_shifter_in, {VMEM_DATA_WIDTH{1'b0}}} >> acc_shift_count; + end + + // mask register + always @(posedge cpu_clk) + begin + if (wr_en && reg_sel == REG_MASKGEN) + acc_mask_in <= wr_data; + end + + // mask output is buffered to avoid timing problems + always @(posedge cpu_clk) + begin + acc_mask_buf <= { + {4{|{acc_mask_in[31:28]}}}, + {4{|{acc_mask_in[27:24]}}}, + {4{|{acc_mask_in[23:20]}}}, + {4{|{acc_mask_in[19:16]}}}, + {4{|{acc_mask_in[15:12]}}}, + {4{|{acc_mask_in[11:8]}}}, + {4{|{acc_mask_in[7:4]}}}, + {4{|{acc_mask_in[3:0]}}} + }; + end + + always @(posedge cpu_clk) + begin + acc_shiftmask_buf = { + {4{|{acc_shifter_out_h[31:28]}}}, + {4{|{acc_shifter_out_h[27:24]}}}, + {4{|{acc_shifter_out_h[23:20]}}}, + {4{|{acc_shifter_out_h[19:16]}}}, + {4{|{acc_shifter_out_h[15:12]}}}, + {4{|{acc_shifter_out_h[11:8]}}}, + {4{|{acc_shifter_out_h[7:4]}}}, + {4{|{acc_shifter_out_h[3:0]}}} + }; + end +`endif + + // + // shifting pixels at pixel clock + // always @(posedge pix_clk) begin if(scanline || shift_count == MAX_SHIFT_COUNT) // before start of a line diff --git a/tridoracpu/tridoracpu.xpr b/tridoracpu/tridoracpu.xpr index a3dd3f6..5d8ff88 100644 --- a/tridoracpu/tridoracpu.xpr +++ b/tridoracpu/tridoracpu.xpr @@ -358,7 +358,9 @@ - + + Vivado Synthesis Defaults + @@ -378,7 +380,9 @@ - + + Default settings for Implementation. + diff --git a/utils/tdrimg.py b/utils/tdrimg.py index b7ce4cb..4eeaead 100644 --- a/utils/tdrimg.py +++ b/utils/tdrimg.py @@ -614,6 +614,7 @@ def create_image_with_stuff(imgfile): slotnr = putfile("../examples/benchmarks.pas", None , f, part, partstart, slotnr) slotnr = putfile("../examples/animate.pas", None , f, part, partstart, slotnr) + slotnr = putfile("../examples/graphbench.pas", None , f, part, partstart, slotnr) slotnr = putfile("../examples/sprites.inc", None , f, part, partstart, slotnr) slotnr = putfile("../examples/sprites.s", None , f, part, partstart, slotnr) slotnr = putfile("../examples/background.pict", None , f, part, partstart, slotnr)