blob: 3658f09749da010f4691f62db7f8b5bc74aa87ef [file] [log] [blame]
;; -----------------------------------------------------------------------
;; Copyright 1994-2009 H. Peter Anvin - All Rights Reserved
;; Copyright 2009-2010 Intel Corporation; author: H. Peter Anvin
;; This program is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, Inc., 53 Temple Place Ste 330,
;; Boston MA 02111-1307, USA; either version 2 of the License, or
;; (at your option) any later version; incorporated herein by reference.
;; -----------------------------------------------------------------------
; 32-bit bcopy routine
; This is the actual 32-bit portion of the bcopy and shuffle and boot
; sole exception being the actual relocation code at the beginning of
; pm_shuffle_boot.
; It also really needs to live all in a single segment, for the
; address calculcations to actually work.
bits 32
section .bcopyxx.text
align 16
; pm_bcopy:
; This is the protected-mode core of the "bcopy" routine.
; Try to do aligned transfers; if the src and dst are relatively
; misaligned, align the dst.
; ECX is guaranteed to not be zero on entry.
; Clobbers ESI, EDI, ECX.
push ebx
push edx
push eax
cmp esi,-1
je .bzero
cmp esi,edi ; If source < destination, we might
jb .reverse ; have to copy backwards
; Initial alignment
mov edx,edi
shr edx,1
jnc .faa1
dec ecx
mov al,cl
cmp ecx,2
jb .f_tiny
shr edx,1
jnc .faa2
sub ecx,2
; Bulk transfer
mov al,cl ; Save low bits
shr ecx,2 ; Convert to dwords
rep movsd ; Do our business
; At this point ecx == 0
test al,2
jz .fab2
test al,1
jz .fab1
pop eax
pop edx
pop ebx
lea eax,[esi+ecx-1] ; Point to final byte
cmp edi,eax
ja .forward ; No overlap, do forward copy
std ; Reverse copy
lea edi,[edi+ecx-1]
mov esi,eax
; Initial alignment
mov edx,edi
shr edx,1
jc .raa1
dec ecx
dec esi
dec edi
mov al,cl
cmp ecx,2
jb .r_tiny
shr edx,1
jc .raa2
sub ecx,2
; Bulk copy
sub esi,2
sub edi,2
mov al,cl ; Save low bits
shr ecx,2
rep movsd
; Final alignment
add esi,2
add edi,2
test al,2
jz .rab2
inc esi
inc edi
test al,1
jz .rab1
jmp short .done
xor eax,eax
; Initial alignment
mov edx,edi
shr edx,1
jnc .zaa1
dec ecx
mov bl,cl
cmp ecx,2
jb .z_tiny
shr edx,1
jnc .zaa2
sub ecx,2
; Bulk
mov bl,cl ; Save low bits
shr ecx,2
rep stosd
test bl,2
jz .zab2
test bl,1
jz .zab1
jmp short .done
; shuffle_and_boot:
; This routine is used to shuffle memory around, followed by
; invoking an entry point somewhere in low memory. This routine
; can clobber any memory outside the bcopy special area.
; IMPORTANT: This routine does not set up any registers.
; It is the responsibility of the caller to generate an appropriate entry
; stub; *especially* when going to real mode.
; Inputs:
; ESI -> Pointer to list of (dst, src, len) pairs(*)
; EDI -> Pointer to safe area for list + shuffler
; (must not overlap this code nor the RM stack)
; ECX -> Byte count of list area (for initial copy)
; If src == -1: then the memory pointed to by (dst, len) is bzeroed;
; this is handled inside the bcopy routine.
; If len == 0: this marks the end of the list; dst indicates
; the entry point and src the mode (0 = pm, 1 = rm)
; (*) dst, src, and len are four bytes each
; do_raw_shuffle_and_boot is the same entry point, but with a C ABI:
; do_raw_shuffle_and_boot(safearea, descriptors, bytecount)
global do_raw_shuffle_and_boot
mov edi,eax
mov esi,edx
cli ; End interrupt service (for good)
mov ebx,edi ; EBX <- descriptor list
lea edx,[edi+ecx+15] ; EDX <- where to relocate our code to
and edx,~15 ; Align 16 to benefit the GDT
call pm_bcopy
mov esi,__bcopyxx_start ; Absolute source address
mov edi,edx ; Absolute target address
sub edx,esi ; EDX <- address delta
mov ecx,__bcopyxx_dwords
lea eax,[] ; Resume point
; Relocate this code
rep movsd
jmp eax ; Jump to safe location
; Give ourselves a safe stack
lea esp,[edx+bcopyxx_stack+__bcopyxx_end]
add edx,bcopy_gdt ; EDX <- new GDT
mov [edx+2],edx ; GDT self-pointer
lgdt [edx] ; Switch to local GDT
; Now for the actual shuffling...
mov edi,[ebx]
mov esi,[ebx+4]
mov ecx,[ebx+8]
add ebx,12
jecxz .done
call pm_bcopy
jmp .loop
lidt [edx+RM_IDT_ptr-bcopy_gdt] ; RM-like IDT
push ecx ; == 0, for cleaning the flags register
and esi,esi
jz pm_shuffle_16
popfd ; Clean the flags
jmp edi ; Protected mode entry
; We have a 16-bit entry point, so we need to return
; to 16-bit mode. Note: EDX already points to the GDT.
mov eax,edi
mov [edx+PM_CS16+2],ax
mov [edx+PM_DS16+2],ax
shr eax,16
mov [edx+PM_CS16+4],al
mov [edx+PM_CS16+7],ah
mov [edx+PM_DS16+4],al
mov [edx+PM_DS16+7],ah
mov eax,cr0
and al,~1
popfd ; Clean the flags
; No flag-changing instructions below...
mov dx,PM_DS16
mov ds,edx
mov es,edx
mov fs,edx
mov gs,edx
mov ss,edx
jmp PM_CS16:0
alignz 16
; GDT descriptor entry
%macro desc 1
PM_%1 equ bcopy_gdt.%1-bcopy_gdt
dw bcopy_gdt_size-1 ; Null descriptor - contains GDT
dd bcopy_gdt ; pointer for LGDT instruction
dw 0
; TSS segment to keep Intel VT happy. Intel VT is
; unhappy about anything that doesn't smell like a
; full-blown 32-bit OS.
desc TSS
dw 104-1, DummyTSS ; 08h 32-bit task state segment
dd 00008900h ; present, dpl 0, 104 bytes @DummyTSS
desc CS16
dd 0000ffffh ; 10h Code segment, use16, readable,
dd 00009b00h ; present, dpl 0, cover 64K
desc DS16
dd 0000ffffh ; 18h Data segment, use16, read/write,
dd 00009300h ; present, dpl 0, cover 64K
desc CS32
dd 0000ffffh ; 20h Code segment, use32, readable,
dd 00cf9b00h ; present, dpl 0, cover all 4G
desc DS32
dd 0000ffffh ; 28h Data segment, use32, read/write,
dd 00cf9300h ; present, dpl 0, cover all 4G
bcopy_gdt_size: equ $-bcopy_gdt
; Space for a dummy task state segment. It should never be actually
; accessed, but just in case it is, point to a chunk of memory that
; has a chance to not be used for anything real...
DummyTSS equ 0x580
align 4
RM_IDT_ptr: dw 0FFFFh ; Length (nonsense, but matches CPU)
dd 0 ; Offset
bcopyxx_stack equ 128 ; We want this much stack
section .rodata
global __syslinux_shuffler_size
extern __bcopyxx_len
align 4
dd __bcopyxx_len
bits 16
section .text16