;;
;;	second boot
;;
;;  program:
;;		Sakky
;;
;;	platform:
;;		AMD64
;;
;;	license:
;;		BSD License
;;
;;  to build:
;;      yasm -o boot2.bin -f bin boot2.nas
;;


;;	Don't touch
%define	_cs_base		0x00010000
%define	_base_PML4		0x00021000
%define	_base_PDPT		0x00022000
%define	_base_PDT4GB	0x00024000
%define	_base_idt64		0x00028000


[section .text]
[bits 16]

	jmp near _crt0


_puts:
	push es
	push di
	mov es,[cs:_text_vram]
psl0:
	mov al,[cs:_row]
	mov cl,80
	mul cl
	add al,[cs:_col]
	adc ah,0
	add ax,ax
	mov di,ax
psl1:
	lodsb
	or al,al
	jz psle
	cmp al,10
	jz psl_lf
	stosb
	inc di
	inc byte [cs:_col]
	jmp psl1
psl_lf:
	inc byte [cs:_row]
	xor al,al
	mov [cs:_col],al
	jmp psl0
psle:
	pop di
	pop es
_ret_near:
	ret

_PS2_wait_for_out:
	in al,064h
	test al,2
	in al,060h
	jnz _PS2_wait_for_out
	ret

puts_halt:
	push cs
	pop ds
	call _puts
forever:
	sti
	hlt
	jmp short $-1

_crt0:
	;;	IPL signature check
	cmp ax,"oL"
	jnz short forever
	cmp cl,0x01
	jnz forever

	;;	386 check
	mov ax,03000h
	push ax
	popf
	pushf
	pop ax
	test ax,03000h
	jz short forever
	push byte 0
	popf


	mov ax,cs
	mov ss,ax
	mov esp,_start_kernel
	mov ds,ax




	;;	init FDBIOS
	xor ax,ax
	xor dx,dx
	int 0x13


	;;	get Lowem
	int 12h
	shl ax,6
	mov [szLowMem],ax


	;;	Legacy A20 enable
%if 0
	call _PS2_wait_for_out
	mov al,0AEh
	out 064h,al
	call _PS2_wait_for_out
	mov al,0D1h
	out 064h,al
	call _PS2_wait_for_out
	mov al,0DFh
	out 060h,al
	call _PS2_wait_for_out
	mov al,0A8h
	out 064h,al
%endif


	;;	OADG A20 enable
	in al,092h
	or al,2
	out 092h,al


	;;	get SMAP
	push cs
	pop es
	xor ebx,ebx
	sub esp,20
smap_cont:
	mov eax,0xE820
	mov edx,0x534D4150
	mov edi,esp
	mov ecx,20
	int 0x15
	jc smap_end
	mov al,[di+16]
	cmp al,0x01
	jz smap_freemem
	cmp al,0x03
	jz smap_acpi_rec
	cmp al,0x04
	jz smap_acpi_nvs
	jmp smap_nomem

smap_freemem:
	mov eax,[di+4]
	or eax,eax
	jnz smap_above_4G
	mov eax,[di]
;	or eax,eax
;	jz smap_lomem
	cmp eax,0x00100000
	jz smap_protmem
	cmp eax,0x01000000
	jz smap_himem
	jmp smap_nomem

smap_protmem:
	mov eax,[di+8]
	mov ecx,0x00F00000
	cmp eax,ecx
	ja smap_prot_noisa
	mov [szMidMem],eax
	jmp smap_nomem
smap_prot_noisa:
	mov [szMidMem],ecx
	sub eax,ecx
	mov [szHiMem],eax
	jmp smap_nomem

smap_himem:
	mov eax,[di+8]
	mov [szHiMem],eax
	jmp smap_nomem

	;;	above 4G
smap_above_4G:
	cmp eax,1
	jnz smap_nomem	; high of 4GB=1
	mov eax,[di]
	or eax,eax		; low of 4GB=0
	jnz smap_nomem
	mov eax,[di+8]
	mov [szExtMem],eax
	mov eax,[di+12]
	mov [szExtMem+4],eax
	jmp smap_nomem

smap_acpi_rec:
	mov eax,[di]
	mov [acpiRecMemBase],eax
	mov eax,[di+8]
	mov [acpiRecMemSize],eax
	jmp smap_nomem
smap_acpi_nvs:
	mov eax,[di]
	mov [acpiNVSMemBase],eax
	mov eax,[di+8]
	mov [acpiNVSMemSize],eax
;	jmp smap_nomem
smap_nomem:
	or ebx,ebx
	jnz smap_cont
	add esp,20
smap_end:


	;;  APM Check
	mov ax,05300h
	xor bx,bx
	int 015h
	jc init_noapmAT
	cmp bx,504Dh
	jnz init_noapmAT
	cmp ax,0100h
	jb init_noapmAT
	mov [_apm_ver],ax
init_noapmAT:


	;;	VGA setting
	mov ax,0x0003
	int 0x10
;	mov ax,0x1112
;	xor bl,bl
;	int 0x10


	;;  VM check
	smsw ax
	and ax,1
	jz _no_inPM
	mov si,_not_inRM_msg
	jmp puts_halt
_no_inPM:


	;;  A20 check
	xor ax,ax
	mov ds,ax
	dec ax
	mov es,ax
	xor si,si
	mov di,16
	mov cx,256
	rep cmpsd
	push cs
	pop ds
	jnz _A20_ok
	mov si,a20_err_msg
	jmp puts_halt
_A20_ok:


	;;  486 check
	and sp,byte 0xFC
	cli
	pushfd
	pop eax
	mov ecx,eax
	mov edx,040000h
	xor eax,edx
	push eax
	popfd
	pushfd
	pop eax
	xor eax,ecx
	test eax,edx
	jnz _486ok
	mov si,_no486_msg
	jmp puts_halt
_486ok:


	;;	LONG MODE Check
	pushfd
	pop eax
	mov ecx,eax
	mov edx,0200000h
	xor eax,edx
	push eax
	popfd
	pushfd
	pop eax
	xor eax,ecx
	test eax,edx
	jz no_long_mode
	mov eax,0x80000000
	cpuid
	cmp eax,0x80000000
	jbe no_long_mode
	mov eax,0x80000001
	cpuid
	bt edx,29
	jc long_mode_ok
no_long_mode:
	mov si,_no_amd64_msg
	jmp puts_halt
long_mode_ok:


	;;	ROM BUS CHECK
	push byte -1
	pop ds
	xor bx,bx
	mov al,0xCB
	mov [bx],al
	wbinvd
	sti
	nop
	cmp [bx],al
	jnz mem_wp
	db 0x9A
	dw 0,-1
	mov si,rom_ram_msg
	jmp puts_halt
mem_wp:
	push cs
	pop ds


	;;	MEMORY CHECK
	mov eax,0x00100000
	add eax,[szMidMem]
	add eax,[szHiMem]
	cmp eax,64*1024*1024
	jae mem_ok
	mov si,no_mem_msg
	jmp puts_halt
mem_ok:


	;;	signature check
	mov eax,[_start_kernel]
	cmp eax,0x464C457F ; ELF
	jz sign_ok
	mov si,bad_kernel_image
	jmp puts_halt
sign_ok:


	;;	"now starting"
	mov si,banner
	call _puts

	;; GDT
	lgdt [cs:__GDT]
	
	;; 
	mov eax, __GDT
	mov [gdt64_Base], eax

	;; 32bit Protected Mode
	cli
	mov eax,cr0
	or	eax,0x000050021	; AM/WP/NE/PE
	mov cr0,eax
	db 0xEB,0x00

	db 0x66,0xEA
	dd _cs_base+now_in_prot
	dw 0x0020

[bits 32]
now_in_prot:
	mov eax,0x28
	mov ss,eax
	mov esp,0x10000
	mov ds,eax
	mov es,eax
	mov fs,eax
	mov gs,eax


%if 0
	mov edx,_cs_base+_start_kernel
	mov esi,edx
	mov edi,[edx+0x14]
	and edi,0xFFFFF000
	mov ecx,[edx+0x1C]
	rep movsb
	mov ecx,_cs_base+memstat
	jmp dword [edx+0x18]
%endif

	;;	init page table for OrangePekoe

	;;	PML4 (256TB)
	mov edi,_base_PML4
	mov eax,_base_PDPT+7	; USER/RW/PRESENT
	stosd
	xor eax,eax
	mov ecx,0x7FA/2
	rep stosd
	mov eax,_base_PDPT+0x1007	; USER/RW/PRESENT
	stosd
	xor eax,eax
	stosd

	;;	PDPT (512GB*2)
	mov edx,_base_PDT4GB+7	; USER/RW/PRESENT
	mov cl,3
shl00:
	mov eax,edx
	stosd
	xor eax,eax
	stosd
	add edx,0x00001000
	loop shl00

	mov eax,edx
	stosd
	xor eax,eax
	stosd
	mov cx,0xFF8/4
	rep stosd
	mov eax,edx
	stosd
	xor eax,eax
	stosd
	mov cx,0x0FD8/4
	rep stosd
	mov eax,edx
	stosd
	xor eax,eax
	stosd

	;;	PDE2M (3GB)
	mov edx,0x00000083	;; 2MB/KERNEL/RW/PRESENT
	mov cl,8
loop_pdt:
	mov eax,edx
	stosd
	xor eax,eax
	stosd
	add edx,0x00200000
	loop loop_pdt
;	xor eax,eax
	mov cx,0xFE0
	rep stosd
	mov edx,0x00000183 ; GLOBAL/2MB/KERNEL/RW/PRESENT
	mov cl,0x008
shl010:
	mov eax,edx
	stosd
	xor eax,eax
	stosd
	add edx,0x00200000
	loop shl010
;	xor eax,eax
;	stosd
;	stosd


	mov eax, cr4
	or eax,0x2A0		;OSFXSR/PGE/PAE
	mov cr4, eax

 	mov ecx, 0xc0000080
	rdmsr
	or eax,0x0901	; NXE/LME/SCE
	wrmsr

	mov eax, _base_PML4
	mov cr3, eax	; PDBR

	;;	Enter to Long Mode
	mov eax, cr0
	bts eax, 31		; PG
	mov cr0, eax
	db 0xEB,0x00

	db 0xEA
	dd _cs_base+start64
	dd 0x0040

;;	64bit mode
[bits 64]
start64:
	;; 64bit IDT
	lidt [__IDT_64 wrt rip]

	;;	parse ELF program header
	lea r15,[_start_kernel wrt rip]
	mov r8,[r15+0x20]
	movzx r9d,word [r15+0x36]
	movzx r10d,word [r15+0x38]
	lea r14,[r15+r8]

locate_kernel_l00:
	mov eax,[r14]
	cmp eax,byte 0x01 ; PT_LOAD
	jnz locate_kernel_l99

	mov rcx,[r14+0x20]
	mov rdi,[r14+0x10]
	xor eax,eax
	lea ecx,[rcx+0xFFFF]
	and ecx,dword 0xFFFFF000
	shr ecx,3
	rep stosq
	mov ecx,[r14+0x28]
	mov rsi,[r14+0x08]
	mov rdi,[r14+0x10]
	lea ecx,[rcx+7]
	shr ecx,3
	lea rsi,[rsi+r15]
	rep movsq

locate_kernel_l99:
	lea r14,[r14+r9]
	dec r10d
	jnz locate_kernel_l00

	;;	jmp kernel
	lea rdi,[memstat wrt rip]
	mov rax,[r15+0x18]
	jmp rax


alignb 16
[section .data]

	;;	GDT
	;;	00 NULL
	;;	08 RESERVED
	;;	10 16bit CODE Loader
	;;	18 16bit DATA Loader
	;;	20 32bit CODE Flat Kernel
	;;	28 32bit DATA Flat Kernel
	;;	33 32bit CODE Flat User
	;;	3B 32bit DATA Flat User
	;;	40 64bit Kernel
	;;	4B 64bit User
	;;	50 64bit TSS
align 16
__GDT:
	dw 0x5F,__GDT,_cs_base/65536,0
	dw 0,0,0,0
	dw 0FFFFh,00000h
	db 001h,09Ah,000h,000h
	dw 0FFFFh,00000h
	db 001h,092h,000h,000h
	dw 0FFFFh,00000h
	db 000h,09Ah,0CFh,000h
	dw 0FFFFh,00000h
	db 000h,092h,0CFh,000h
	dw 0FFFFh,00000h
	db 000h,0FAh,0CFh,000h
	dw 0FFFFh,00000h
	db 000h,0F2h,0CFh,000h
	dw 0FFFFh,0,09A00h,000AFh
	dw 0FFFFh,0,0FA00h,000AFh
	dw 0,0,0,0
	dw 0,0,0,0

__IDT_64:
	dw 0xFFF
	dq _base_idt64

memstat:
acpiRecMemBase	dd 0
acpiRecMemSize	dd 0
acpiNVSMemBase	dd 0
acpiNVSMemSize	dd 0
szMidMem		dd 0
szHiMem			dd 0
szExtMem		dq 0
szLowMem		dw 0
_text_vram		dw 0xB800
_apm_ver		dw 0
_col			db 0
_row			db 0
gdt64_Base		dw 0
memstat_end:


no_mem_msg:
	db "NOT_ENOUGH_MEMORY",0

a20_err_msg:
	db "A20_HARDWARE_ERROR",0

_not_inRM_msg:
	db "VIRTUAL_MODE_ERROR",0

_no486_msg:
_no_amd64_msg:
	db "AMD64_FEATURE_IS_REQUIED",0

rom_ram_msg:
	db "BUS_ERROR (BIOS)",0

bad_kernel_image:
	db "BAD_KERNEL_SIGNATURE_FOUND",0

banner:
	db "System check done.",10
	db "Now enter to Longmode...",10,10,0


alignb 16

;	times 1024 db 0
	times 256 db 0
_strbuff:
_start_kernel:
