blob: 6bafae7f2da4e33eb9db22067753224533dacc0c [file] [log] [blame]
;; -*- fundamental -*-
;; -----------------------------------------------------------------------
;;
;; Copyright 1994-2008 H. Peter Anvin - All Rights Reserved
;; Copyright 2009 Intel Corporation; author: H. Peter Anvin
;;
;; This program is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, Inc., 53 Temple Place Ste 330,
;; Boston MA 02111-1307, USA; either version 2 of the License, or
;; (at your option) any later version; incorporated herein by reference.
;;
;; -----------------------------------------------------------------------
;;
;; init16.asm
;;
;; Routine to initialize and to trampoline into 32-bit
;; protected memory. This code is derived from bcopy32.inc and
;; com32.inc in the main SYSLINUX distribution.
;;
%include '../version.gen'
MY_CS equ 0x0800 ; Segment address to use
CS_BASE equ (MY_CS << 4) ; Corresponding address
; Low memory bounce buffer
BOUNCE_SEG equ (MY_CS+0x1000)
%define DO_WBINVD 0
section .rodata align=16
section .data align=16
section .bss align=16
section .stack align=16 nobits
stack resb 512
stack_end equ $
;; -----------------------------------------------------------------------
;; Kernel image header
;; -----------------------------------------------------------------------
section .text ; Must be first in image
bits 16
cmdline times 497 db 0 ; We put the command line here
setup_sects db 0
root_flags dw 0
syssize dw 0
swap_dev dw 0
ram_size dw 0
vid_mode dw 0
root_dev dw 0
boot_flag dw 0xAA55
_start: jmp short start
db "HdrS" ; Header signature
dw 0x0203 ; Header version number
realmode_swtch dw 0, 0 ; default_switch, SETUPSEG
start_sys_seg dw 0x1000 ; obsolete
version_ptr dw memdisk_version-0x200 ; version string ptr
type_of_loader db 0 ; Filled in by boot loader
loadflags db 1 ; Please load high
setup_move_size dw 0 ; Unused
code32_start dd 0x100000 ; 32-bit start address
ramdisk_image dd 0 ; Loaded ramdisk image address
ramdisk_size dd 0 ; Size of loaded ramdisk
bootsect_kludge dw 0, 0
heap_end_ptr dw 0
pad1 dw 0
cmd_line_ptr dd 0 ; Command line
ramdisk_max dd 0xffffffff ; Highest allowed ramdisk address
;
; These fields aren't real setup fields, they're poked in by the
; 32-bit code.
;
b_esdi dd 0 ; ES:DI for boot sector invocation
b_edx dd 0 ; EDX for boot sector invocation
b_sssp dd 0 ; SS:SP on boot sector invocation
b_csip dd 0 ; CS:IP on boot sector invocation
section .rodata
memdisk_version:
db "MEMDISK ", VERSION_STR, " ", DATE, 0
;; -----------------------------------------------------------------------
;; End kernel image header
;; -----------------------------------------------------------------------
;
; Move ourselves down into memory to reduce the risk of conflicts;
; then canonicalize CS to match the other segments.
;
section .text
bits 16
start:
mov ax,MY_CS
mov es,ax
movzx cx,byte [setup_sects]
inc cx ; Add one for the boot sector
shl cx,7 ; Convert to dwords
xor si,si
xor di,di
mov fs,si ; fs <- 0
cld
rep movsd
mov ds,ax
mov ss,ax
mov esp,stack_end
jmp MY_CS:.next
.next:
;
; Copy the command line, if there is one
;
copy_cmdline:
xor di,di ; Bottom of our own segment (= "boot sector")
mov eax,[cmd_line_ptr]
and eax,eax
jz .endcmd ; No command line
mov si,ax
shr eax,4 ; Convert to segment
and si,0x000F ; Starting offset only
mov gs,ax
mov cx,496 ; Max number of bytes
.copycmd:
gs lodsb
and al,al
jz .endcmd
stosb
loop .copycmd
.endcmd:
xor al,al
stosb
;
; Now jump to 32-bit code
;
sti
call init32
;
; When init32 returns, we have been set up, the new boot sector loaded,
; and we should go and and run the newly loaded boot sector.
;
; The setup function will have poked values into the setup area.
;
movzx edi,word [cs:b_esdi]
mov es,word [cs:b_esdi+2]
mov edx,[cs:b_edx]
cli
xor esi,esi ; No partition table involved
mov ds,si ; Make all the segments consistent
mov fs,si
mov gs,si
lss sp,[cs:b_sssp]
movzx esp,sp
jmp far [cs:b_csip]
;
; We enter protected mode, set up a flat 32-bit environment, run rep movsd
; and then exit. IMPORTANT: This code assumes cs == MY_CS.
;
; This code is probably excessively anal-retentive in its handling of
; segments, but this stuff is painful enough as it is without having to rely
; on everything happening "as it ought to."
;
DummyTSS equ 0x580 ; Hopefully safe place in low mmoery
section .data
; desc base, limit, flags
%macro desc 3
dd (%2 & 0xffff) | ((%1 & 0xffff) << 16)
dd (%1 & 0xff000000) | (%2 & 0xf0000) | ((%3 & 0xf0ff) << 8) | ((%1 & 0x00ff0000) >> 16)
%endmacro
align 8, db 0
call32_gdt: dw call32_gdt_size-1 ; Null descriptor - contains GDT
.adj1: dd call32_gdt+CS_BASE ; pointer for LGDT instruction
dw 0
; 0008: Dummy TSS to make Intel VT happy
; Should never be actually accessed...
desc DummyTSS, 103, 0x8089
; 0010: Code segment, use16, readable, dpl 0, base CS_BASE, 64K
desc CS_BASE, 0xffff, 0x009b
; 0018: Data segment, use16, read/write, dpl 0, base CS_BASE, 64K
desc CS_BASE, 0xffff, 0x0093
; 0020: Code segment, use32, read/write, dpl 0, base 0, 4G
desc 0, 0xfffff, 0xc09b
; 0028: Data segment, use32, read/write, dpl 0, base 0, 4G
desc 0, 0xfffff, 0xc093
call32_gdt_size: equ $-call32_gdt
err_a20: db 'ERROR: A20 gate not responding!',13,10,0
section .bss
alignb 4
Return resd 1 ; Return value
SavedSP resw 1 ; Place to save SP
A20Tries resb 1
section .data
align 4, db 0
Target dd 0 ; Target address
Target_Seg dw 20h ; Target CS
A20Type dw 0 ; Default = unknown
section .text
bits 16
;
; Routines to enable and disable (yuck) A20. These routines are gathered
; from tips from a couple of sources, including the Linux kernel and
; http://www.x86.org/. The need for the delay to be as large as given here
; is indicated by Donnie Barnes of RedHat, the problematic system being an
; IBM ThinkPad 760EL.
;
; We typically toggle A20 twice for every 64K transferred.
;
%define io_delay call _io_delay
%define IO_DELAY_PORT 80h ; Invalid port (we hope!)
%define disable_wait 32 ; How long to wait for a disable
%define A20_DUNNO 0 ; A20 type unknown
%define A20_NONE 1 ; A20 always on?
%define A20_BIOS 2 ; A20 BIOS enable
%define A20_KBC 3 ; A20 through KBC
%define A20_FAST 4 ; A20 through port 92h
align 2, db 0
A20List dw a20_dunno, a20_none, a20_bios, a20_kbc, a20_fast
A20DList dw a20d_dunno, a20d_none, a20d_bios, a20d_kbc, a20d_fast
a20_adjust_cnt equ ($-A20List)/2
slow_out: out dx, al ; Fall through
_io_delay: out IO_DELAY_PORT,al
out IO_DELAY_PORT,al
ret
enable_a20:
pushad
mov byte [A20Tries],255 ; Times to try to make this work
try_enable_a20:
;
; Flush the caches
;
%if DO_WBINVD
call try_wbinvd
%endif
;
; If the A20 type is known, jump straight to type
;
mov bp,[A20Type]
add bp,bp ; Convert to word offset
.adj4: jmp word [bp+A20List]
;
; First, see if we are on a system with no A20 gate
;
a20_dunno:
a20_none:
mov byte [A20Type], A20_NONE
call a20_test
jnz a20_done
;
; Next, try the BIOS (INT 15h AX=2401h)
;
a20_bios:
mov byte [A20Type], A20_BIOS
mov ax,2401h
pushf ; Some BIOSes muck with IF
int 15h
popf
call a20_test
jnz a20_done
;
; Enable the keyboard controller A20 gate
;
a20_kbc:
mov dl, 1 ; Allow early exit
call empty_8042
jnz a20_done ; A20 live, no need to use KBC
mov byte [A20Type], A20_KBC ; Starting KBC command sequence
mov al,0D1h ; Write output port
out 064h, al
call empty_8042_uncond
mov al,0DFh ; A20 on
out 060h, al
call empty_8042_uncond
; Apparently the UHCI spec assumes that A20 toggle
; ends with a null command (assumed to be for sychronization?)
; Put it here to see if it helps anything...
mov al,0FFh ; Null command
out 064h, al
call empty_8042_uncond
; Verify that A20 actually is enabled. Do that by
; observing a word in low memory and the same word in
; the HMA until they are no longer coherent. Note that
; we don't do the same check in the disable case, because
; we don't want to *require* A20 masking (SYSLINUX should
; work fine without it, if the BIOS does.)
.kbc_wait: push cx
xor cx,cx
.kbc_wait_loop:
call a20_test
jnz a20_done_pop
loop .kbc_wait_loop
pop cx
;
; Running out of options here. Final attempt: enable the "fast A20 gate"
;
a20_fast:
mov byte [A20Type], A20_FAST ; Haven't used the KBC yet
in al, 092h
or al,02h
and al,~01h ; Don't accidentally reset the machine!
out 092h, al
.fast_wait: push cx
xor cx,cx
.fast_wait_loop:
call a20_test
jnz a20_done_pop
loop .fast_wait_loop
pop cx
;
; Oh bugger. A20 is not responding. Try frobbing it again; eventually give up
; and report failure to the user.
;
dec byte [A20Tries]
jnz try_enable_a20
; Error message time
mov si,err_a20
print_err:
lodsb
and al,al
jz die
mov bx,7
mov ah,0xe
int 10h
jmp print_err
die:
sti
.hlt: hlt
jmp short .hlt
;
; A20 unmasked, proceed...
;
a20_done_pop: pop cx
a20_done: popad
ret
;
; This routine tests if A20 is enabled (ZF = 0). This routine
; must not destroy any register contents.
;
; This is the INT 1Fh vector, which is standard PCs is used by the
; BIOS when the screen is in graphics mode. Even if it is, it points to
; data, not code, so it should be safe enough to fiddle with.
A20Test equ (1Fh*4)
a20_test:
push ds
push es
push cx
push eax
xor ax,ax
mov ds,ax ; DS == 0
dec ax
mov es,ax ; ES == 0FFFFh
mov cx,32 ; Loop count
mov eax,[A20Test]
cmp eax,[es:A20Test+10h]
jne .a20_done
push eax
.a20_wait:
inc eax
mov [A20Test],eax
io_delay
cmp eax,[es:A20Test+10h]
loopz .a20_wait
pop dword [A20Test] ; Restore original value
.a20_done:
pop eax
pop cx
pop es
pop ds
ret
disable_a20:
pushad
;
; Flush the caches
;
%if DO_WBINVD
call try_wbinvd
%endif
mov bp,[A20Type]
add bp,bp ; Convert to word offset
.adj5: jmp word [bp+A20DList]
a20d_bios:
mov ax,2400h
pushf ; Some BIOSes muck with IF
int 15h
popf
jmp short a20d_snooze
;
; Disable the "fast A20 gate"
;
a20d_fast:
in al, 092h
and al,~03h
out 092h, al
jmp short a20d_snooze
;
; Disable the keyboard controller A20 gate
;
a20d_kbc:
call empty_8042_uncond
mov al,0D1h
out 064h, al ; Write output port
call empty_8042_uncond
mov al,0DDh ; A20 off
out 060h, al
call empty_8042_uncond
mov al,0FFh ; Null command/synchronization
out 064h, al
call empty_8042_uncond
; Wait a bit for it to take effect
a20d_snooze:
push cx
mov cx, disable_wait
.delayloop: call a20_test
jz .disabled
loop .delayloop
.disabled: pop cx
a20d_dunno:
a20d_none:
popad
ret
;
; Routine to empty the 8042 KBC controller. If dl != 0
; then we will test A20 in the loop and exit if A20 is
; suddenly enabled.
;
empty_8042_uncond:
xor dl,dl
empty_8042:
call a20_test
jz .a20_on
and dl,dl
jnz .done
.a20_on: io_delay
in al, 064h ; Status port
test al,1
jz .no_output
io_delay
in al, 060h ; Read input
jmp short empty_8042
.no_output:
test al,2
jnz empty_8042
io_delay
.done: ret
;
; Execute a WBINVD instruction if possible on this CPU
;
%if DO_WBINVD
try_wbinvd:
wbinvd
ret
%endif
section .bss
alignb 4
PMESP resd 1 ; Protected mode %esp
section .idt nobits align=4096
alignb 4096
pm_idt resb 4096 ; Protected-mode IDT, followed by interrupt stubs
pm_entry: equ 0x100000
section .rodata
align 2, db 0
call32_rmidt:
dw 0ffffh ; Limit
dd 0 ; Address
section .data
alignb 2
call32_pmidt:
dw 8*256 ; Limit
dd 0 ; Address (entered later)
section .text
;
; This is the main entrypoint in this function
;
init32:
mov bx,call32_call_start ; Where to go in PM
;
; Enter protected mode. BX contains the entry point relative to the
; real-mode CS.
;
call32_enter_pm:
mov ax,cs
mov ds,ax
movzx ebp,ax
shl ebp,4 ; EBP <- CS_BASE
movzx ebx,bx
add ebx,ebp ; entry point += CS_BASE
cli
mov [SavedSP],sp
cld
call enable_a20
mov byte [call32_gdt+8+5],89h ; Mark TSS unbusy
o32 lgdt [call32_gdt] ; Set up GDT
o32 lidt [call32_pmidt] ; Set up IDT
mov eax,cr0
or al,1
mov cr0,eax ; Enter protected mode
jmp 20h:strict dword .in_pm+CS_BASE
.pm_jmp equ $-6
bits 32
.in_pm:
xor eax,eax ; Available for future use...
mov fs,eax
mov gs,eax
lldt ax
mov al,28h ; Set up data segments
mov es,eax
mov ds,eax
mov ss,eax
mov al,08h
ltr ax
mov esp,[ebp+PMESP] ; Load protmode %esp if available
jmp ebx ; Go to where we need to go
;
; This is invoked before first dispatch of the 32-bit code, in 32-bit mode
;
call32_call_start:
;
; Set up a temporary stack in the bounce buffer;
; start32.S will override this to point us to the real
; high-memory stack.
;
mov esp, (BOUNCE_SEG << 4) + 0x10000
push dword call32_enter_rm.rm_jmp+CS_BASE
push dword call32_enter_pm.pm_jmp+CS_BASE
push dword stack_end ; RM size
push dword call32_gdt+CS_BASE
push dword call32_handle_interrupt+CS_BASE
push dword CS_BASE ; Segment base
push dword (BOUNCE_SEG << 4) ; Bounce buffer address
push dword call32_syscall+CS_BASE ; Syscall entry point
call pm_entry-CS_BASE ; Run the program...
; ... fall through to call32_exit ...
call32_exit:
mov bx,call32_done ; Return to command loop
call32_enter_rm:
; Careful here... the PM code may have relocated the
; entire RM code, so we need to figure out exactly
; where we are executing from. If the PM code has
; relocated us, it *will* have adjusted the GDT to
; match, though.
call .here
.here: pop ebp
sub ebp,.here
o32 sidt [ebp+call32_pmidt]
cli
cld
mov [ebp+PMESP],esp ; Save exit %esp
xor esp,esp ; Make sure the high bits are zero
jmp 10h:.in_pm16 ; Return to 16-bit mode first
bits 16
.in_pm16:
mov ax,18h ; Real-mode-like segment
mov es,ax
mov ds,ax
mov ss,ax
mov fs,ax
mov gs,ax
lidt [call32_rmidt] ; Real-mode IDT (rm needs no GDT)
mov eax,cr0
and al,~1
mov cr0,eax
jmp MY_CS:.in_rm
.rm_jmp equ $-2
.in_rm: ; Back in real mode
mov ax,cs
mov ds,ax
mov es,ax
mov fs,ax
mov gs,ax
mov ss,ax
mov sp,[SavedSP] ; Restore stack
jmp bx ; Go to whereever we need to go...
call32_done:
call disable_a20
sti
ret
;
; 16-bit support code
;
bits 16
;
; 16-bit interrupt-handling code
;
call32_int_rm:
pushf ; Flags on stack
push cs ; Return segment
push word .cont ; Return address
push dword edx ; Segment:offset of IVT entry
retf ; Invoke IVT routine
.cont: ; ... on resume ...
mov bx,call32_int_resume
jmp call32_enter_pm ; Go back to PM
;
; 16-bit system call handling code
;
call32_sys_rm:
pop gs
pop fs
pop es
pop ds
popad
popfd
retf ; Invoke routine
.return:
pushfd
pushad
push ds
push es
push fs
push gs
mov bx,call32_sys_resume
jmp call32_enter_pm
;
; 32-bit support code
;
bits 32
;
; This is invoked on getting an interrupt in protected mode. At
; this point, we need to context-switch to real mode and invoke
; the interrupt routine.
;
; When this gets invoked, the registers are saved on the stack and
; AL contains the register number.
;
call32_handle_interrupt:
movzx eax,al
xor ebx,ebx ; Actually makes the code smaller
mov edx,[ebx+eax*4] ; Get the segment:offset of the routine
mov bx,call32_int_rm
jmp call32_enter_rm ; Go to real mode
call32_int_resume:
popad
iret
;
; Syscall invocation. We manifest a structure on the real-mode stack,
; containing the call32sys_t structure from <call32.h> as well as
; the following entries (from low to high address):
; - Target offset
; - Target segment
; - Return offset
; - Return segment (== real mode cs)
; - Return flags
;
call32_syscall:
pushfd ; Save IF among other things...
pushad ; We only need to save some, but...
cld
call .here
.here: pop ebp
sub ebp,.here
movzx edi,word [ebp+SavedSP]
sub edi,54 ; Allocate 54 bytes
mov [ebp+SavedSP],di
add edi,ebp ; Create linear address
mov esi,[esp+11*4] ; Source regs
xor ecx,ecx
mov cl,11 ; 44 bytes to copy
rep movsd
movzx eax,byte [esp+10*4] ; Interrupt number
; ecx == 0 here; adding it to the EA makes the
; encoding smaller
mov eax,[ecx+eax*4] ; Get IVT entry
stosd ; Save in stack frame
mov ax,call32_sys_rm.return ; Return offset
stosw ; Save in stack frame
mov eax,ebp
shr eax,4 ; Return segment
stosw ; Save in stack frame
mov eax,[edi-12] ; Return flags
and eax,0x200cd7 ; Mask (potentially) unsafe flags
mov [edi-12],eax ; Primary flags entry
stosw ; Return flags
mov bx,call32_sys_rm
jmp call32_enter_rm ; Go to real mode
; On return, the 44-byte return structure is on the
; real-mode stack. call32_enter_pm will leave ebp
; pointing to the real-mode base.
call32_sys_resume:
movzx esi,word [ebp+SavedSP]
mov edi,[esp+12*4] ; Dest regs
add esi,ebp ; Create linear address
and edi,edi ; NULL pointer?
jnz .do_copy
.no_copy: mov edi,esi ; Do a dummy copy-to-self
.do_copy: xor ecx,ecx
mov cl,11 ; 44 bytes
rep movsd ; Copy register block
add word [ebp+SavedSP],44 ; Remove from stack
popad
popfd
ret ; Return to 32-bit program