7,22 → 7,25 |
; Optimized for KolibriOS, By Diamond |
; Assemble with |
; c:fasm firework.asm firework.kex |
; NOTE: Needs MMX & SSE |
; NOTE: Needs MMX & SSE, optionally AVX |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
use32 |
org 0x0 |
|
db 'MENUET00' ; 8 byte id |
dd 38 ; required os |
db 'MENUET01' ; 8 byte id |
dd 0x01 ; version |
dd STARTAPP ; program start |
dd I_END ; program image size |
dd 0x100000 ; required amount of memory |
dd 0x00000000 ; reserved=no extended header |
dd E_END ; required amount of memory |
dd stacktop ; reserved=no extended header |
dd 0, 0 |
|
include '../../../macros.inc' |
include "aspapi.inc" |
SCREEN_WIDTH equ 320 |
SCREEN_HEIGHT equ 200 |
SCREEN_WIDTH = 320 |
SCREEN_HEIGHT = 200 |
SIMD equ SSE |
SIMD_BYTES = 8 |
assert SCREEN_WIDTH mod SIMD_BYTES = 0 |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
; Global defines |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
35,54 → 38,62 |
COLOR_OFFSET = 16 |
PART_SIZE = 20 |
|
macro draw_window |
macro shade |
{ |
local x, xsize, y, ysize, areacolor, caption |
x = 100 |
y = 70 |
xsize = SCREEN_WIDTH+9 |
ysize = SCREEN_HEIGHT+4 |
areacolor = 0x54224466 |
caption = labelt |
mov eax, 12 ; function 12:tell os about windowdraw |
mov ebx, 1 ; 1, start of draw |
int 0x40 |
; DRAW WINDOW |
mov eax, 48 |
mov ebx, 4 |
int 0x40 |
lea ecx, [y*65536+ysize+eax] |
xor eax, eax ; function 0 : define and draw window |
mov ebx, x*65536+xsize ; [x start] *65536 + [x size] |
mov edx, areacolor ; color of work area RRGGBB |
mov edi, caption |
int 0x40 |
; start_draw_window 100,70,SCREEN_WIDTH+9,SCREEN_HEIGHT+29,0x04224466,labelt;, 14;labellen-labelt |
end_draw_window |
} |
|
macro mmx_shade |
{ |
mov ecx, SCREEN_WIDTH*SCREEN_HEIGHT/8 |
local .lop |
if SIMD eq SSE |
mov ecx, SCREEN_WIDTH * SCREEN_HEIGHT / SIMD_BYTES |
mov edi,buffer |
movq mm1, [sub_mask] |
movq mm1, qword [sub_mask] |
.lop: |
movq mm0, [edi] |
psubusb mm0, mm1 |
movq [edi], mm0 |
add edi, 8 |
add edi, SIMD_BYTES |
loop .lop |
else if SIMD eq AVX |
mov ecx, SCREEN_WIDTH * SCREEN_HEIGHT / SIMD_BYTES |
mov edi, buffer |
vmovdqa xmm1, xword [sub_mask] |
.lop: |
vmovdqa xmm0, [edi] |
vpsubusb xmm0, xmm0, xmm1 |
vmovdqa [edi], xmm0 |
add edi, SIMD_BYTES |
loop .lop |
else if SIMD eq AVX2 |
mov ecx, SCREEN_WIDTH * SCREEN_HEIGHT / SIMD_BYTES |
mov edi, buffer |
vmovdqa ymm1, yword [sub_mask] |
.lop: |
vmovdqa ymm0, [edi] |
vpsubusb ymm0, ymm0, ymm1 |
vmovdqa [edi], ymm0 |
add edi, SIMD_BYTES |
loop .lop |
else if SIMD eq AVX512 |
mov ecx, SCREEN_WIDTH * SCREEN_HEIGHT / SIMD_BYTES |
mov edi, buffer |
vmovdqa64 zmm1, zword [sub_mask] |
.lop: |
vmovdqa64 zmm0, [edi] |
vpsubusb zmm0, zmm0, zmm1 |
vmovdqa64 [edi], zmm0 |
add edi, SIMD_BYTES |
loop .lop |
end if |
} |
|
macro mmx_blur_prepare |
macro blur_prepare |
{ |
mov ecx, (SCREEN_WIDTH*SCREEN_HEIGHT-330*2)/8 |
mov edi,buffer + 328 |
mov ecx, (SCREEN_WIDTH * SCREEN_HEIGHT - SCREEN_WIDTH * 2 - SIMD_BYTES*2) / SIMD_BYTES |
mov edi, buffer + SCREEN_WIDTH + SIMD_BYTES |
} |
|
macro mmx_blur |
macro blur |
{ |
local .lop |
if SIMD eq SSE |
.lop: |
movq mm0, [edi] |
movq mm1, [edi+1] |
93,19 → 104,65 |
|
pavgb mm0, mm1 ; mm0 = avg(cur,cur+1) |
pavgb mm3, mm2 ; mm3 = avg(cur,cur-1) |
pavgb mm4, mm5 ; mm4 = avg(cur+320,cur-320) |
pavgb mm3, mm4 ; mm3 = avg(avg(cur,cur-1),avg(cur+320,cur-320)) |
pavgb mm4, mm5 ; mm4 = avg(cur+width,cur-width) |
pavgb mm3, mm4 ; mm3 = avg(avg(cur,cur-1),avg(cur+width,cur-width)) |
pavgb mm0, mm3 ; mm0 = avg(avg(cur,cur+1), |
|
movq [edi], mm0 |
add edi, 8 |
add edi, SIMD_BYTES |
loop .lop |
else if SIMD eq AVX |
.lop: |
vmovdqa xmm0, [edi] |
vmovdqa xmm1, xmm0 |
vmovdqa xmm2, [edi - SCREEN_WIDTH] |
|
vpavgb xmm0, xmm0, [edi + 1] |
vpavgb xmm1, xmm1, [edi - 1] |
vpavgb xmm2, xmm2, [edi + SCREEN_WIDTH] |
vpavgb xmm1, xmm1, xmm2 |
vpavgb xmm0, xmm0, xmm1 |
|
vmovdqa [edi], xmm0 |
add edi, SIMD_BYTES |
loop .lop |
else if SIMD eq AVX2 |
.lop: |
vmovdqa ymm0, [edi] |
vmovdqa ymm1, ymm0 |
vmovdqa ymm2, [edi - SCREEN_WIDTH] |
|
vpavgb ymm0, ymm0, [edi + 1] |
vpavgb ymm1, ymm1, [edi - 1] |
vpavgb ymm2, ymm2, [edi + SCREEN_WIDTH] |
vpavgb ymm1, ymm1, ymm2 |
vpavgb ymm0, ymm0, ymm1 |
|
vmovdqa [edi], ymm0 |
add edi, SIMD_BYTES |
loop .lop |
else if SIMD eq AVX512 |
.lop: |
vmovdqa64 zmm0, [edi] |
vmovdqa64 zmm1, zmm0 |
vmovdqa64 zmm2, [edi - SCREEN_WIDTH] |
|
vpavgb zmm0, zmm0, [edi + 1] |
vpavgb zmm1, zmm1, [edi - 1] |
vpavgb zmm2, zmm2, [edi + SCREEN_WIDTH] |
vpavgb zmm1, zmm1, zmm2 |
vpavgb zmm0, zmm0, zmm1 |
|
vmovdqa64 [edi], zmm0 |
add edi, SIMD_BYTES |
loop .lop |
end if |
} |
|
|
macro mmx_blur_right |
macro blur_right |
{ |
local .lop |
if SIMD eq SSE |
.lop: |
movq mm0, [edi] |
movq mm1, [edi+1] |
115,8 → 172,39 |
pavgb mm3, mm2 |
pavgb mm0, mm3 |
movq [edi], mm0 |
add edi, 8 |
add edi, SIMD_BYTES |
loop .lop |
else if SIMD eq AVX |
.lop: |
vmovdqa xmm0, [edi] |
vmovdqu xmm1, [edi + SCREEN_WIDTH + 1] |
vpavgb xmm2, xmm0, [edi + 1] |
vpavgb xmm3, xmm1, [edi + SCREEN_WIDTH] |
vpavgb xmm4, xmm2, xmm3 |
vmovdqa [edi], xmm4 |
add edi, SIMD_BYTES |
loop .lop |
else if SIMD eq AVX2 |
.lop: |
vmovdqa ymm0, [edi] |
vmovdqu ymm1, [edi + SCREEN_WIDTH + 1] |
vpavgb ymm2, ymm0, [edi + 1] |
vpavgb ymm3, ymm1, [edi + SCREEN_WIDTH] |
vpavgb ymm4, ymm2, ymm3 |
vmovdqa [edi], ymm4 |
add edi, SIMD_BYTES |
loop .lop |
else if SIMD eq AVX512 |
.lop: |
vmovdqa64 zmm0, [edi] |
vmovdqu64 zmm1, [edi + SCREEN_WIDTH + 1] |
vpavgb zmm2, zmm0, [edi + 1] |
vpavgb zmm3, zmm1, [edi + SCREEN_WIDTH] |
vpavgb zmm4, zmm2, zmm3 |
vmovdqa64 [edi], zmm4 |
add edi, SIMD_BYTES |
loop .lop |
end if |
} |
|
STARTAPP: |
131,7 → 219,7 |
and eax, 0xFFFFFF |
jnz red_loop |
|
mov eax, 63*4*65536 |
mov eax, 63*4 SHL 16 |
@@: |
stosd |
stosd |
143,6 → 231,7 |
; mov edi,buffer |
xor eax, eax |
rep stosd |
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
; Main Functions |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
152,7 → 241,7 |
seed dd ? |
end virtual |
|
db 0x0f, 0x31 |
rdtsc |
push eax ; seed |
push 100*64 ; global_y |
push 160*64 ; global_x |
162,7 → 251,27 |
|
red: |
mcall 9,proc_info,-1 |
draw_window |
x = 100 |
y = 70 |
xsize = SCREEN_WIDTH+9 |
ysize = SCREEN_HEIGHT+4 |
areacolor = 0x54224466 |
mov eax, 12 ; function 12:tell os about windowdraw |
mov ebx, 1 ; 1, start of draw |
int 0x40 |
mov eax, 48 |
mov ebx, 4 |
int 0x40 |
lea ecx, [(y SHL 16) + ysize + eax] |
xor eax, eax ; function 0 : define and draw window |
mov ebx, (x SHL 16) + xsize ; [x start] *65536 + [x size] |
mov edx, areacolor ; color of work area RRGGBB |
mov edi, window_title |
int 0x40 |
mov eax, 12 ; end of redraw |
mov ebx, 2 |
int 0x40 |
|
MAIN: |
test [proc_info.wnd_state], 0x04 |
jnz still |
177,11 → 286,11 |
|
cmp eax, 5 |
jb .new_particle |
cmp eax, SCREEN_WIDTH-5;315 |
cmp eax, SCREEN_WIDTH - 5 |
jge .new_particle |
cmp ebx, 5 |
jb .new_particle |
cmp ebx, SCREEN_HEIGHT-5;195 |
cmp ebx, SCREEN_HEIGHT - 5 |
jl .part_ok |
|
.new_particle: |
189,26 → 298,16 |
jmp .advance_particles |
|
.part_ok: |
; mov edi, eax |
; add edi,buffer |
; mov eax, SCREEN_WIDTH |
; mul ebx |
imul edi, ebx, SCREEN_WIDTH |
mov dl, [ebp+COLOR_OFFSET] |
mov [buffer+eax+edi], dl |
|
; mov eax, [ebp+X_OFFSET] |
; mov ebx, [ebp+Y_OFFSET] |
; add eax, [ebp+X_SPEED_OFFSET] |
; add ebx, [ebp+Y_SPEED_OFFSET] |
; mov [ebp+X_OFFSET], eax |
; mov [ebp+Y_OFFSET], ebx |
mov eax, [ebp+X_SPEED_OFFSET] |
add [ebp+X_OFFSET], eax |
mov eax, [ebp+Y_SPEED_OFFSET] |
add [ebp+Y_OFFSET], eax |
|
db 0x0f, 0x31 |
rdtsc |
and al, 0x7F |
jnz .dont_inc_y_speed |
inc dword [ebp+Y_SPEED_OFFSET] |
217,39 → 316,32 |
add ebp, PART_SIZE |
loop .advance_particles |
|
mmx_shade |
shade |
; jmp .copy_buffer_to_video |
mmx_blur_prepare |
blur_prepare |
test dword [blur_right_flag] , 0x800000 |
jnz .do_blur_right |
mmx_blur |
db 0x0f, 0x31 |
blur |
rdtsc |
and al, 1 |
jz .blur_ok |
jmp .dont_blur |
.do_blur_right: |
mmx_blur_right |
blur_right |
.blur_ok: |
add dword [blur_right_flag], 0x1000 |
.dont_blur: |
|
.copy_buffer_to_video: |
; mov eax, 18 ;@WAITVSYNC(); |
; mov ebx, 14 |
; int 0x40 |
|
mov eax, 48 |
mov ebx, 4 |
int 0x40 |
lea edx, [5*65536+eax] |
mcall 48, 4 |
lea edx, [(5 SHL 16) + eax] |
|
mov eax, 65 ;copyfard(0xA000,0,screen,0,16000); |
mov ebx, buffer;dword [screen] |
mov ecx, SCREEN_WIDTH*65536+SCREEN_HEIGHT ;ecx = w*65536+h |
; mov edx, 5*65536+25 ;edx = x*65536+y |
mov eax, 65 |
mov ebx, buffer |
mov ecx, (SCREEN_WIDTH SHL 16) + SCREEN_HEIGHT |
push 8 |
pop esi |
;mov esi, 8 |
mov edi, pal |
xor ebp, ebp |
int 0x40 |
280,21 → 372,12 |
|
button: |
; we have only one button, close |
; mov eax, 17 ; Get pressed button code |
; int 0x40 |
; cmp ah, 1 ; Test x button |
; je close_app |
; jmp MAIN |
; fall through to close_app |
|
fail: |
; Type something here. |
close_app: |
mov eax,-1 ; close this program |
int 0x40 |
|
init_particle: |
db 0x0f, 0x31 |
rdtsc |
and al, 0x1F |
jnz .dont_re_init_globals |
; init x |
333,17 → 416,9 |
;shl ax, 6 |
mov [ebp+Y_SPEED_OFFSET], eax |
; init color |
; mov ax, 255 |
;call rand |
;and ax, 0xFF |
mov [ebp+COLOR_OFFSET], dword 255;ax |
mov [ebp + COLOR_OFFSET], dword 255 |
ret |
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
; Misc. Functions |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|
|
rand: |
mov eax, [8+seed] |
imul eax, 214013 |
353,23 → 428,18 |
ret |
|
; DATA AREA |
|
; Application Title |
labelt db 'Firework demo',0 |
;labelt db 'Matrix demo',0 |
|
;seed: dd 0 |
;global_x: dd 160*64 |
;global_y: dd 100*64 |
sub_mask: dd 0x01010101, 0x01010101 |
window_title db 'Firework demo',0 |
align SIMD_BYTES |
sub_mask db SIMD_BYTES dup 0x01 |
; x, y, x_speed, y_speed, color |
particles: times NUM_PARTS dd 0, 0, 0, 0, 0 |
blur_right_flag: dd 0 |
;include 'Dex.inc' |
blur_right_flag dd 0 |
I_END: |
proc_info process_information |
pal rb 256*4 ;dup(0) |
;pal dd 256 dup(0) |
;buffer rb 1024*64 |
align 16 |
pal rb 256 * 4 |
align SIMD_BYTES |
buffer rb SCREEN_WIDTH*SCREEN_HEIGHT |
|
E_END: |
rd 0x200 |
stacktop: |