Fixed MTP to work with TWRP

This commit is contained in:
awab228 2018-06-19 23:16:04 +02:00
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions

20
arch/x86/Kbuild Normal file
View file

@ -0,0 +1,20 @@
obj-$(CONFIG_KVM) += kvm/
# Xen paravirtualization support
obj-$(CONFIG_XEN) += xen/
# lguest paravirtualization support
obj-$(CONFIG_LGUEST_GUEST) += lguest/
obj-y += realmode/
obj-y += kernel/
obj-y += mm/
obj-y += crypto/
obj-y += vdso/
obj-$(CONFIG_IA32_EMULATION) += ia32/
obj-y += platform/
obj-y += net/
obj-$(CONFIG_KEXEC_FILE) += purgatory/

2515
arch/x86/Kconfig Normal file

File diff suppressed because it is too large Load diff

470
arch/x86/Kconfig.cpu Normal file
View file

@ -0,0 +1,470 @@
# Put here option for CPU selection and depending optimization
choice
prompt "Processor family"
default M686 if X86_32
default GENERIC_CPU if X86_64
config M486
bool "486"
depends on X86_32
---help---
This is the processor type of your CPU. This information is
used for optimizing purposes. In order to compile a kernel
that can run on all supported x86 CPU types (albeit not
optimally fast), you can specify "486" here.
Note that the 386 is no longer supported, this includes
AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI 486DLC/DLC2,
UMC 486SX-S and the NexGen Nx586.
The kernel will not necessarily run on earlier architectures than
the one you have chosen, e.g. a Pentium optimized kernel will run on
a PPro, but not necessarily on a i486.
Here are the settings recommended for greatest speed:
- "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or
SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S.
- "586" for generic Pentium CPUs lacking the TSC
(time stamp counter) register.
- "Pentium-Classic" for the Intel Pentium.
- "Pentium-MMX" for the Intel Pentium MMX.
- "Pentium-Pro" for the Intel Pentium Pro.
- "Pentium-II" for the Intel Pentium II or pre-Coppermine Celeron.
- "Pentium-III" for the Intel Pentium III or Coppermine Celeron.
- "Pentium-4" for the Intel Pentium 4 or P4-based Celeron.
- "K6" for the AMD K6, K6-II and K6-III (aka K6-3D).
- "Athlon" for the AMD K7 family (Athlon/Duron/Thunderbird).
- "Crusoe" for the Transmeta Crusoe series.
- "Efficeon" for the Transmeta Efficeon series.
- "Winchip-C6" for original IDT Winchip.
- "Winchip-2" for IDT Winchips with 3dNow! capabilities.
- "GeodeGX1" for Geode GX1 (Cyrix MediaGX).
- "Geode GX/LX" For AMD Geode GX and LX processors.
- "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3.
- "VIA C3-2" for VIA C3-2 "Nehemiah" (model 9 and above).
- "VIA C7" for VIA C7.
If you don't know what to do, choose "486".
config M586
bool "586/K5/5x86/6x86/6x86MX"
depends on X86_32
---help---
Select this for an 586 or 686 series processor such as the AMD K5,
the Cyrix 5x86, 6x86 and 6x86MX. This choice does not
assume the RDTSC (Read Time Stamp Counter) instruction.
config M586TSC
bool "Pentium-Classic"
depends on X86_32
---help---
Select this for a Pentium Classic processor with the RDTSC (Read
Time Stamp Counter) instruction for benchmarking.
config M586MMX
bool "Pentium-MMX"
depends on X86_32
---help---
Select this for a Pentium with the MMX graphics/multimedia
extended instructions.
config M686
bool "Pentium-Pro"
depends on X86_32
---help---
Select this for Intel Pentium Pro chips. This enables the use of
Pentium Pro extended instructions, and disables the init-time guard
against the f00f bug found in earlier Pentiums.
config MPENTIUMII
bool "Pentium-II/Celeron(pre-Coppermine)"
depends on X86_32
---help---
Select this for Intel chips based on the Pentium-II and
pre-Coppermine Celeron core. This option enables an unaligned
copy optimization, compiles the kernel with optimization flags
tailored for the chip, and applies any applicable Pentium Pro
optimizations.
config MPENTIUMIII
bool "Pentium-III/Celeron(Coppermine)/Pentium-III Xeon"
depends on X86_32
---help---
Select this for Intel chips based on the Pentium-III and
Celeron-Coppermine core. This option enables use of some
extended prefetch instructions in addition to the Pentium II
extensions.
config MPENTIUMM
bool "Pentium M"
depends on X86_32
---help---
Select this for Intel Pentium M (not Pentium-4 M)
notebook chips.
config MPENTIUM4
bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon"
depends on X86_32
---help---
Select this for Intel Pentium 4 chips. This includes the
Pentium 4, Pentium D, P4-based Celeron and Xeon, and
Pentium-4 M (not Pentium M) chips. This option enables compile
flags optimized for the chip, uses the correct cache line size, and
applies any applicable optimizations.
CPUIDs: F[0-6][1-A] (in /proc/cpuinfo show = cpu family : 15 )
Select this for:
Pentiums (Pentium 4, Pentium D, Celeron, Celeron D) corename:
-Willamette
-Northwood
-Mobile Pentium 4
-Mobile Pentium 4 M
-Extreme Edition (Gallatin)
-Prescott
-Prescott 2M
-Cedar Mill
-Presler
-Smithfiled
Xeons (Intel Xeon, Xeon MP, Xeon LV, Xeon MV) corename:
-Foster
-Prestonia
-Gallatin
-Nocona
-Irwindale
-Cranford
-Potomac
-Paxville
-Dempsey
config MK6
bool "K6/K6-II/K6-III"
depends on X86_32
---help---
Select this for an AMD K6-family processor. Enables use of
some extended instructions, and passes appropriate optimization
flags to GCC.
config MK7
bool "Athlon/Duron/K7"
depends on X86_32
---help---
Select this for an AMD Athlon K7-family processor. Enables use of
some extended instructions, and passes appropriate optimization
flags to GCC.
config MK8
bool "Opteron/Athlon64/Hammer/K8"
---help---
Select this for an AMD Opteron or Athlon64 Hammer-family processor.
Enables use of some extended instructions, and passes appropriate
optimization flags to GCC.
config MCRUSOE
bool "Crusoe"
depends on X86_32
---help---
Select this for a Transmeta Crusoe processor. Treats the processor
like a 586 with TSC, and sets some GCC optimization flags (like a
Pentium Pro with no alignment requirements).
config MEFFICEON
bool "Efficeon"
depends on X86_32
---help---
Select this for a Transmeta Efficeon processor.
config MWINCHIPC6
bool "Winchip-C6"
depends on X86_32
---help---
Select this for an IDT Winchip C6 chip. Linux and GCC
treat this chip as a 586TSC with some extended instructions
and alignment requirements.
config MWINCHIP3D
bool "Winchip-2/Winchip-2A/Winchip-3"
depends on X86_32
---help---
Select this for an IDT Winchip-2, 2A or 3. Linux and GCC
treat this chip as a 586TSC with some extended instructions
and alignment requirements. Also enable out of order memory
stores for this CPU, which can increase performance of some
operations.
config MELAN
bool "AMD Elan"
depends on X86_32
---help---
Select this for an AMD Elan processor.
Do not use this option for K6/Athlon/Opteron processors!
config MGEODEGX1
bool "GeodeGX1"
depends on X86_32
---help---
Select this for a Geode GX1 (Cyrix MediaGX) chip.
config MGEODE_LX
bool "Geode GX/LX"
depends on X86_32
---help---
Select this for AMD Geode GX and LX processors.
config MCYRIXIII
bool "CyrixIII/VIA-C3"
depends on X86_32
---help---
Select this for a Cyrix III or C3 chip. Presently Linux and GCC
treat this chip as a generic 586. Whilst the CPU is 686 class,
it lacks the cmov extension which gcc assumes is present when
generating 686 code.
Note that Nehemiah (Model 9) and above will not boot with this
kernel due to them lacking the 3DNow! instructions used in earlier
incarnations of the CPU.
config MVIAC3_2
bool "VIA C3-2 (Nehemiah)"
depends on X86_32
---help---
Select this for a VIA C3 "Nehemiah". Selecting this enables usage
of SSE and tells gcc to treat the CPU as a 686.
Note, this kernel will not boot on older (pre model 9) C3s.
config MVIAC7
bool "VIA C7"
depends on X86_32
---help---
Select this for a VIA C7. Selecting this uses the correct cache
shift and tells gcc to treat the CPU as a 686.
config MPSC
bool "Intel P4 / older Netburst based Xeon"
depends on X86_64
---help---
Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey
Xeon CPUs with Intel 64bit which is compatible with x86-64.
Note that the latest Xeons (Xeon 51xx and 53xx) are not based on the
Netburst core and shouldn't use this option. You can distinguish them
using the cpu family field
in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
config MCORE2
bool "Core 2/newer Xeon"
---help---
Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
53xx) CPUs. You can distinguish newer from older Xeons by the CPU
family in /proc/cpuinfo. Newer ones have 6 and older ones 15
(not a typo)
config MATOM
bool "Intel Atom"
---help---
Select this for the Intel Atom platform. Intel Atom CPUs have an
in-order pipelining architecture and thus can benefit from
accordingly optimized code. Use a recent GCC with specific Atom
support in order to fully benefit from selecting this option.
config GENERIC_CPU
bool "Generic-x86-64"
depends on X86_64
---help---
Generic x86-64 CPU.
Run equally well on all x86-64 CPUs.
endchoice
config X86_GENERIC
bool "Generic x86 support"
depends on X86_32
---help---
Instead of just including optimizations for the selected
x86 variant (e.g. PII, Crusoe or Athlon), include some more
generic optimizations as well. This will make the kernel
perform better on x86 CPUs other than that selected.
This is really intended for distributors who need more
generic optimizations.
#
# Define implied options from the CPU selection here
config X86_INTERNODE_CACHE_SHIFT
int
default "12" if X86_VSMP
default X86_L1_CACHE_SHIFT
config X86_L1_CACHE_SHIFT
int
default "7" if MPENTIUM4 || MPSC
default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
default "4" if MELAN || M486 || MGEODEGX1
default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
config X86_PPRO_FENCE
bool "PentiumPro memory ordering errata workaround"
depends on M686 || M586MMX || M586TSC || M586 || M486 || MGEODEGX1
---help---
Old PentiumPro multiprocessor systems had errata that could cause
memory operations to violate the x86 ordering standard in rare cases.
Enabling this option will attempt to work around some (but not all)
occurrences of this problem, at the cost of much heavier spinlock and
memory barrier operations.
If unsure, say n here. Even distro kernels should think twice before
enabling this: there are few systems, and an unlikely bug.
config X86_F00F_BUG
def_bool y
depends on M586MMX || M586TSC || M586 || M486
config X86_INVD_BUG
def_bool y
depends on M486
config X86_ALIGNMENT_16
def_bool y
depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
config X86_INTEL_USERCOPY
def_bool y
depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
config X86_USE_PPRO_CHECKSUM
def_bool y
depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
config X86_USE_3DNOW
def_bool y
depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML
#
# P6_NOPs are a relatively minor optimization that require a family >=
# 6 processor, except that it is broken on certain VIA chips.
# Furthermore, AMD chips prefer a totally different sequence of NOPs
# (which work on all CPUs). In addition, it looks like Virtual PC
# does not understand them.
#
# As a result, disallow these if we're not compiling for X86_64 (these
# NOPs do work on all x86-64 capable chips); the list of processors in
# the right-hand clause are the cores that benefit from this optimization.
#
config X86_P6_NOP
def_bool y
depends on X86_64
depends on (MCORE2 || MPENTIUM4 || MPSC)
config X86_TSC
def_bool y
depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64
config X86_CMPXCHG64
def_bool y
depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM
# this should be set for all -march=.. options where the compiler
# generates cmov.
config X86_CMOV
def_bool y
depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX)
config X86_MINIMUM_CPU_FAMILY
int
default "64" if X86_64
default "6" if X86_32 && X86_P6_NOP
default "5" if X86_32 && X86_CMPXCHG64
default "4"
config X86_DEBUGCTLMSR
def_bool y
depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486) && !UML
menuconfig PROCESSOR_SELECT
bool "Supported processor vendors" if EXPERT
---help---
This lets you choose what x86 vendor support code your kernel
will include.
config CPU_SUP_INTEL
default y
bool "Support Intel processors" if PROCESSOR_SELECT
---help---
This enables detection, tunings and quirks for Intel processors
You need this enabled if you want your kernel to run on an
Intel CPU. Disabling this option on other types of CPUs
makes the kernel a tiny bit smaller. Disabling it on an Intel
CPU might render the kernel unbootable.
If unsure, say N.
config CPU_SUP_CYRIX_32
default y
bool "Support Cyrix processors" if PROCESSOR_SELECT
depends on M486 || M586 || M586TSC || M586MMX || (EXPERT && !64BIT)
---help---
This enables detection, tunings and quirks for Cyrix processors
You need this enabled if you want your kernel to run on a
Cyrix CPU. Disabling this option on other types of CPUs
makes the kernel a tiny bit smaller. Disabling it on a Cyrix
CPU might render the kernel unbootable.
If unsure, say N.
config CPU_SUP_AMD
default y
bool "Support AMD processors" if PROCESSOR_SELECT
---help---
This enables detection, tunings and quirks for AMD processors
You need this enabled if you want your kernel to run on an
AMD CPU. Disabling this option on other types of CPUs
makes the kernel a tiny bit smaller. Disabling it on an AMD
CPU might render the kernel unbootable.
If unsure, say N.
config CPU_SUP_CENTAUR
default y
bool "Support Centaur processors" if PROCESSOR_SELECT
---help---
This enables detection, tunings and quirks for Centaur processors
You need this enabled if you want your kernel to run on a
Centaur CPU. Disabling this option on other types of CPUs
makes the kernel a tiny bit smaller. Disabling it on a Centaur
CPU might render the kernel unbootable.
If unsure, say N.
config CPU_SUP_TRANSMETA_32
default y
bool "Support Transmeta processors" if PROCESSOR_SELECT
depends on !64BIT
---help---
This enables detection, tunings and quirks for Transmeta processors
You need this enabled if you want your kernel to run on a
Transmeta CPU. Disabling this option on other types of CPUs
makes the kernel a tiny bit smaller. Disabling it on a Transmeta
CPU might render the kernel unbootable.
If unsure, say N.
config CPU_SUP_UMC_32
default y
bool "Support UMC processors" if PROCESSOR_SELECT
depends on M486 || (EXPERT && !64BIT)
---help---
This enables detection, tunings and quirks for UMC processors
You need this enabled if you want your kernel to run on a
UMC CPU. Disabling this option on other types of CPUs
makes the kernel a tiny bit smaller. Disabling it on a UMC
CPU might render the kernel unbootable.
If unsure, say N.

326
arch/x86/Kconfig.debug Normal file
View file

@ -0,0 +1,326 @@
menu "Kernel hacking"
config TRACE_IRQFLAGS_SUPPORT
def_bool y
source "lib/Kconfig.debug"
config STRICT_DEVMEM
bool "Filter access to /dev/mem"
---help---
If this option is disabled, you allow userspace (root) access to all
of memory, including kernel and userspace memory. Accidental
access to this is obviously disastrous, but specific access can
be used by people debugging the kernel. Note that with PAT support
enabled, even in this case there are restrictions on /dev/mem
use due to the cache aliasing requirements.
If this option is switched on, the /dev/mem file only allows
userspace access to PCI space and the BIOS code and data regions.
This is sufficient for dosemu and X and all common users of
/dev/mem.
If in doubt, say Y.
config X86_VERBOSE_BOOTUP
bool "Enable verbose x86 bootup info messages"
default y
---help---
Enables the informational output from the decompression stage
(e.g. bzImage) of the boot. If you disable this you will still
see errors. Disable this if you want silent bootup.
config EARLY_PRINTK
bool "Early printk" if EXPERT
default y
---help---
Write kernel log output directly into the VGA buffer or to a serial
port.
This is useful for kernel debugging when your machine crashes very
early before the console code is initialized. For normal operation
it is not recommended because it looks ugly and doesn't cooperate
with klogd/syslogd or the X server. You should normally N here,
unless you want to debug such a crash.
config EARLY_PRINTK_INTEL_MID
bool "Early printk for Intel MID platform support"
depends on EARLY_PRINTK && X86_INTEL_MID
config EARLY_PRINTK_DBGP
bool "Early printk via EHCI debug port"
depends on EARLY_PRINTK && PCI
---help---
Write kernel log output directly into the EHCI debug port.
This is useful for kernel debugging when your machine crashes very
early before the console code is initialized. For normal operation
it is not recommended because it looks ugly and doesn't cooperate
with klogd/syslogd or the X server. You should normally N here,
unless you want to debug such a crash. You need usb debug device.
config EARLY_PRINTK_EFI
bool "Early printk via the EFI framebuffer"
depends on EFI && EARLY_PRINTK
select FONT_SUPPORT
---help---
Write kernel log output directly into the EFI framebuffer.
This is useful for kernel debugging when your machine crashes very
early before the console code is initialized.
config X86_PTDUMP
bool "Export kernel pagetable layout to userspace via debugfs"
depends on DEBUG_KERNEL
select DEBUG_FS
---help---
Say Y here if you want to show the kernel pagetable layout in a
debugfs file. This information is only useful for kernel developers
who are working in architecture specific areas of the kernel.
It is probably not a good idea to enable this feature in a production
kernel.
If in doubt, say "N"
config EFI_PGT_DUMP
bool "Dump the EFI pagetable"
depends on EFI && X86_PTDUMP
---help---
Enable this if you want to dump the EFI page table before
enabling virtual mode. This can be used to debug miscellaneous
issues with the mapping of the EFI runtime regions into that
table.
config DEBUG_RODATA
bool "Write protect kernel read-only data structures"
default y
depends on DEBUG_KERNEL
---help---
Mark the kernel read-only data as write-protected in the pagetables,
in order to catch accidental (and incorrect) writes to such const
data. This is recommended so that we can catch kernel bugs sooner.
If in doubt, say "Y".
config DEBUG_RODATA_TEST
bool "Testcase for the DEBUG_RODATA feature"
depends on DEBUG_RODATA
default y
---help---
This option enables a testcase for the DEBUG_RODATA
feature as well as for the change_page_attr() infrastructure.
If in doubt, say "N"
config DEBUG_SET_MODULE_RONX
bool "Set loadable kernel module data as NX and text as RO"
depends on MODULES
---help---
This option helps catch unintended modifications to loadable
kernel module's text and read-only data. It also prevents execution
of module data. Such protection may interfere with run-time code
patching and dynamic kernel tracing - and they might also protect
against certain classes of kernel exploits.
If in doubt, say "N".
config DEBUG_NX_TEST
tristate "Testcase for the NX non-executable stack feature"
depends on DEBUG_KERNEL && m
---help---
This option enables a testcase for the CPU NX capability
and the software setup of this feature.
If in doubt, say "N"
config DOUBLEFAULT
default y
bool "Enable doublefault exception handler" if EXPERT
---help---
This option allows trapping of rare doublefault exceptions that
would otherwise cause a system to silently reboot. Disabling this
option saves about 4k and might cause you much additional grey
hair.
config DEBUG_TLBFLUSH
bool "Set upper limit of TLB entries to flush one-by-one"
depends on DEBUG_KERNEL
---help---
X86-only for now.
This option allows the user to tune the amount of TLB entries the
kernel flushes one-by-one instead of doing a full TLB flush. In
certain situations, the former is cheaper. This is controlled by the
tlb_flushall_shift knob under /sys/kernel/debug/x86. If you set it
to -1, the code flushes the whole TLB unconditionally. Otherwise,
for positive values of it, the kernel will use single TLB entry
invalidating instructions according to the following formula:
flush_entries <= active_tlb_entries / 2^tlb_flushall_shift
If in doubt, say "N".
config IOMMU_DEBUG
bool "Enable IOMMU debugging"
depends on GART_IOMMU && DEBUG_KERNEL
depends on X86_64
---help---
Force the IOMMU to on even when you have less than 4GB of
memory and add debugging code. On overflow always panic. And
allow to enable IOMMU leak tracing. Can be disabled at boot
time with iommu=noforce. This will also enable scatter gather
list merging. Currently not recommended for production
code. When you use it make sure you have a big enough
IOMMU/AGP aperture. Most of the options enabled by this can
be set more finegrained using the iommu= command line
options. See Documentation/x86/x86_64/boot-options.txt for more
details.
config IOMMU_STRESS
bool "Enable IOMMU stress-test mode"
---help---
This option disables various optimizations in IOMMU related
code to do real stress testing of the IOMMU code. This option
will cause a performance drop and should only be enabled for
testing.
config IOMMU_LEAK
bool "IOMMU leak tracing"
depends on IOMMU_DEBUG && DMA_API_DEBUG
---help---
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
config HAVE_MMIOTRACE_SUPPORT
def_bool y
config X86_DECODER_SELFTEST
bool "x86 instruction decoder selftest"
depends on DEBUG_KERNEL && KPROBES
depends on !COMPILE_TEST
---help---
Perform x86 instruction decoder selftests at build time.
This option is useful for checking the sanity of x86 instruction
decoder code.
If unsure, say "N".
#
# IO delay types:
#
config IO_DELAY_TYPE_0X80
int
default "0"
config IO_DELAY_TYPE_0XED
int
default "1"
config IO_DELAY_TYPE_UDELAY
int
default "2"
config IO_DELAY_TYPE_NONE
int
default "3"
choice
prompt "IO delay type"
default IO_DELAY_0X80
config IO_DELAY_0X80
bool "port 0x80 based port-IO delay [recommended]"
---help---
This is the traditional Linux IO delay used for in/out_p.
It is the most tested hence safest selection here.
config IO_DELAY_0XED
bool "port 0xed based port-IO delay"
---help---
Use port 0xed as the IO delay. This frees up port 0x80 which is
often used as a hardware-debug port.
config IO_DELAY_UDELAY
bool "udelay based port-IO delay"
---help---
Use udelay(2) as the IO delay method. This provides the delay
while not having any side-effect on the IO port space.
config IO_DELAY_NONE
bool "no port-IO delay"
---help---
No port-IO delay. Will break on old boxes that require port-IO
delay for certain operations. Should work on most new machines.
endchoice
if IO_DELAY_0X80
config DEFAULT_IO_DELAY_TYPE
int
default IO_DELAY_TYPE_0X80
endif
if IO_DELAY_0XED
config DEFAULT_IO_DELAY_TYPE
int
default IO_DELAY_TYPE_0XED
endif
if IO_DELAY_UDELAY
config DEFAULT_IO_DELAY_TYPE
int
default IO_DELAY_TYPE_UDELAY
endif
if IO_DELAY_NONE
config DEFAULT_IO_DELAY_TYPE
int
default IO_DELAY_TYPE_NONE
endif
config DEBUG_BOOT_PARAMS
bool "Debug boot parameters"
depends on DEBUG_KERNEL
depends on DEBUG_FS
---help---
This option will cause struct boot_params to be exported via debugfs.
config CPA_DEBUG
bool "CPA self-test code"
depends on DEBUG_KERNEL
---help---
Do change_page_attr() self-tests every 30 seconds.
config OPTIMIZE_INLINING
bool "Allow gcc to uninline functions marked 'inline'"
---help---
This option determines if the kernel forces gcc to inline the functions
developers have marked 'inline'. Doing so takes away freedom from gcc to
do what it thinks is best, which is desirable for the gcc 3.x series of
compilers. The gcc 4.x series have a rewritten inlining algorithm and
enabling this option will generate a smaller kernel there. Hopefully
this algorithm is so good that allowing gcc 4.x and above to make the
decision will become the default in the future. Until then this option
is there to test gcc for this.
If unsure, say N.
config DEBUG_NMI_SELFTEST
bool "NMI Selftest"
depends on DEBUG_KERNEL && X86_LOCAL_APIC
---help---
Enabling this option turns on a quick NMI selftest to verify
that the NMI behaves correctly.
This might help diagnose strange hangs that rely on NMI to
function properly.
If unsure, say N.
config X86_DEBUG_STATIC_CPU_HAS
bool "Debug alternatives"
depends on DEBUG_KERNEL
---help---
This option causes additional code to be generated which
fails if static_cpu_has() is used before alternatives have
run.
If unsure, say N.
endmenu

265
arch/x86/Makefile Normal file
View file

@ -0,0 +1,265 @@
# Unified Makefile for i386 and x86_64
# select defconfig based on actual architecture
ifeq ($(ARCH),x86)
ifeq ($(shell uname -m),x86_64)
KBUILD_DEFCONFIG := x86_64_defconfig
else
KBUILD_DEFCONFIG := i386_defconfig
endif
else
KBUILD_DEFCONFIG := $(ARCH)_defconfig
endif
# How to compile the 16-bit code. Note we always compile for -march=i386;
# that way we can complain to the user if the CPU is insufficient.
#
# The -m16 option is supported by GCC >= 4.9 and clang >= 3.5. For
# older versions of GCC, include an *assembly* header to make sure that
# gcc doesn't play any games behind our back.
CODE16GCC_CFLAGS := -m32 -Wa,$(srctree)/arch/x86/boot/code16gcc.h
M16_CFLAGS := $(call cc-option, -m16, $(CODE16GCC_CFLAGS))
REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -D__KERNEL__ \
-DDISABLE_BRANCH_PROFILING \
-Wall -Wstrict-prototypes -march=i386 -mregparm=3 \
-fno-strict-aliasing -fomit-frame-pointer -fno-pic \
-mno-mmx -mno-sse \
$(call cc-option, -ffreestanding) \
$(call cc-option, -fno-stack-protector) \
$(call cc-option, -mpreferred-stack-boundary=2)
export REALMODE_CFLAGS
# BITS is used as extension for files which are available in a 32 bit
# and a 64 bit version to simplify shared Makefiles.
# e.g.: obj-y += foo_$(BITS).o
export BITS
ifdef CONFIG_X86_NEED_RELOCS
LDFLAGS_vmlinux := --emit-relocs
endif
ifeq ($(CONFIG_X86_32),y)
BITS := 32
UTS_MACHINE := i386
CHECKFLAGS += -D__i386__
biarch := $(call cc-option,-m32)
KBUILD_AFLAGS += $(biarch)
KBUILD_CFLAGS += $(biarch)
KBUILD_CFLAGS += -msoft-float -mregparm=3 -freg-struct-return
# Never want PIC in a 32-bit kernel, prevent breakage with GCC built
# with nonstandard options
KBUILD_CFLAGS += -fno-pic
# prevent gcc from keeping the stack 16 byte aligned
KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2)
# Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
# a lot more stack due to the lack of sharing of stacklots:
KBUILD_CFLAGS += $(call cc-ifversion, -lt, 0400, \
$(call cc-option,-fno-unit-at-a-time))
# CPU-specific tuning. Anything which can be shared with UML should go here.
include $(srctree)/arch/x86/Makefile_32.cpu
KBUILD_CFLAGS += $(cflags-y)
# temporary until string.h is fixed
KBUILD_CFLAGS += -ffreestanding
else
BITS := 64
UTS_MACHINE := x86_64
CHECKFLAGS += -D__x86_64__ -m64
biarch := -m64
KBUILD_AFLAGS += -m64
KBUILD_CFLAGS += -m64
# Don't autogenerate traditional x87 instructions
KBUILD_CFLAGS += $(call cc-option,-mno-80387)
KBUILD_CFLAGS += $(call cc-option,-mno-fp-ret-in-387)
# Use -mpreferred-stack-boundary=3 if supported.
KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3)
# FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
cflags-$(CONFIG_MCORE2) += \
$(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \
$(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
KBUILD_CFLAGS += $(cflags-y)
KBUILD_CFLAGS += -mno-red-zone
KBUILD_CFLAGS += -mcmodel=kernel
# -funit-at-a-time shrinks the kernel .text considerably
# unfortunately it makes reading oopses harder.
KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time)
# this works around some issues with generating unwind tables in older gccs
# newer gccs do it by default
KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args)
endif
# Make sure compiler does not have buggy stack-protector support.
ifdef CONFIG_CC_STACKPROTECTOR
cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh
ifneq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(KBUILD_CPPFLAGS) $(biarch)),y)
$(warning stack-protector enabled but compiler support broken)
endif
endif
ifdef CONFIG_X86_X32
x32_ld_ok := $(call try-run,\
/bin/echo -e '1: .quad 1b' | \
$(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" - && \
$(OBJCOPY) -O elf32-x86-64 "$$TMP" "$$TMPO" && \
$(LD) -m elf32_x86_64 "$$TMPO" -o "$$TMP",y,n)
ifeq ($(x32_ld_ok),y)
CONFIG_X86_X32_ABI := y
KBUILD_AFLAGS += -DCONFIG_X86_X32_ABI
KBUILD_CFLAGS += -DCONFIG_X86_X32_ABI
else
$(warning CONFIG_X86_X32 enabled but no binutils support)
endif
endif
export CONFIG_X86_X32_ABI
# Don't unroll struct assignments with kmemcheck enabled
ifeq ($(CONFIG_KMEMCHECK),y)
KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy)
endif
# Stackpointer is addressed different for 32 bit and 64 bit x86
sp-$(CONFIG_X86_32) := esp
sp-$(CONFIG_X86_64) := rsp
# do binutils support CFI?
cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_endproc,-DCONFIG_AS_CFI=1)
# is .cfi_signal_frame supported too?
cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1)
cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1)
# does binutils support specific instructions?
asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
asinstr += $(call as-instr,crc32l %eax$(comma)%eax,-DCONFIG_AS_CRC32=1)
avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr)
KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr)
LDFLAGS := -m elf_$(UTS_MACHINE)
# Speed up the build
KBUILD_CFLAGS += -pipe
# Workaround for a gcc prelease that unfortunately was shipped in a suse release
KBUILD_CFLAGS += -Wno-sign-compare
#
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
# prevent gcc from generating any FP code by mistake
KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow
KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
KBUILD_CFLAGS += $(mflags-y)
KBUILD_AFLAGS += $(mflags-y)
archscripts: scripts_basic
$(Q)$(MAKE) $(build)=arch/x86/tools relocs
###
# Syscall table generation
archheaders:
$(Q)$(MAKE) $(build)=arch/x86/syscalls all
archprepare:
ifeq ($(CONFIG_KEXEC_FILE),y)
$(Q)$(MAKE) $(build)=arch/x86/purgatory arch/x86/purgatory/kexec-purgatory.c
endif
###
# Kernel objects
head-y := arch/x86/kernel/head_$(BITS).o
head-y += arch/x86/kernel/head$(BITS).o
head-y += arch/x86/kernel/head.o
libs-y += arch/x86/lib/
# See arch/x86/Kbuild for content of core part of the kernel
core-y += arch/x86/
# drivers-y are linked after core-y
drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/
drivers-$(CONFIG_PCI) += arch/x86/pci/
# must be linked after kernel/
drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/
# suspend and hibernation support
drivers-$(CONFIG_PM) += arch/x86/power/
drivers-$(CONFIG_FB) += arch/x86/video/
####
# boot loader support. Several targets are kept for legacy purposes
boot := arch/x86/boot
BOOT_TARGETS = bzlilo bzdisk fdimage fdimage144 fdimage288 isoimage
PHONY += bzImage $(BOOT_TARGETS)
# Default kernel to build
all: bzImage
# KBUILD_IMAGE specify target image being built
KBUILD_IMAGE := $(boot)/bzImage
bzImage: vmlinux
ifeq ($(CONFIG_X86_DECODER_SELFTEST),y)
$(Q)$(MAKE) $(build)=arch/x86/tools posttest
endif
$(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
$(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot
$(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@
$(BOOT_TARGETS): vmlinux
$(Q)$(MAKE) $(build)=$(boot) $@
PHONY += install
install:
$(Q)$(MAKE) $(build)=$(boot) $@
PHONY += vdso_install
vdso_install:
$(Q)$(MAKE) $(build)=arch/x86/vdso $@
archclean:
$(Q)rm -rf $(objtree)/arch/i386
$(Q)rm -rf $(objtree)/arch/x86_64
$(Q)$(MAKE) $(clean)=$(boot)
$(Q)$(MAKE) $(clean)=arch/x86/tools
$(Q)$(MAKE) $(clean)=arch/x86/purgatory
define archhelp
echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)'
echo ' install - Install kernel using'
echo ' (your) ~/bin/$(INSTALLKERNEL) or'
echo ' (distribution) /sbin/$(INSTALLKERNEL) or'
echo ' install to $$(INSTALL_PATH) and run lilo'
echo ' fdimage - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
echo ' fdimage144 - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
echo ' fdimage288 - Create 2.8MB boot floppy image (arch/x86/boot/fdimage)'
echo ' isoimage - Create a boot CD-ROM image (arch/x86/boot/image.iso)'
echo ' bzdisk/fdimage*/isoimage also accept:'
echo ' FDARGS="..." arguments for the booted kernel'
echo ' FDINITRD=file initrd for the booted kernel'
endef

60
arch/x86/Makefile.um Normal file
View file

@ -0,0 +1,60 @@
core-y += arch/x86/crypto/
ifeq ($(CONFIG_X86_32),y)
START := 0x8048000
LDFLAGS += -m elf_i386
ELF_ARCH := i386
ELF_FORMAT := elf32-i386
CHECKFLAGS += -D__i386__
KBUILD_CFLAGS += $(call cc-option,-m32)
KBUILD_AFLAGS += $(call cc-option,-m32)
LINK-y += $(call cc-option,-m32)
export LDFLAGS
LDS_EXTRA := -Ui386
export LDS_EXTRA
# First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y.
include $(srctree)/arch/x86/Makefile_32.cpu
# prevent gcc from keeping the stack 16 byte aligned. Taken from i386.
cflags-y += $(call cc-option,-mpreferred-stack-boundary=2)
# Prevent sprintf in nfsd from being converted to strcpy and resulting in
# an unresolved reference.
cflags-y += -ffreestanding
# Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
# a lot more stack due to the lack of sharing of stacklots. Also, gcc
# 4.3.0 needs -funit-at-a-time for extern inline functions.
KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then \
echo $(call cc-option,-fno-unit-at-a-time); \
else echo $(call cc-option,-funit-at-a-time); fi ;)
KBUILD_CFLAGS += $(cflags-y)
else
START := 0x60000000
KBUILD_CFLAGS += -fno-builtin -m64
CHECKFLAGS += -m64 -D__x86_64__
KBUILD_AFLAGS += -m64
LDFLAGS += -m elf_x86_64
KBUILD_CPPFLAGS += -m64
ELF_ARCH := i386:x86-64
ELF_FORMAT := elf64-x86-64
# Not on all 64-bit distros /lib is a symlink to /lib64. PLD is an example.
LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib64
LINK-y += -m64
# Do unit-at-a-time unconditionally on x86_64, following the host
KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time)
endif

70
arch/x86/Makefile_32.cpu Normal file
View file

@ -0,0 +1,70 @@
# CPU tuning section - shared with UML.
# Must change only cflags-y (or [yn]), not CFLAGS! That makes a difference for UML.
#-mtune exists since gcc 3.4
HAS_MTUNE := $(call cc-option-yn, -mtune=i386)
ifeq ($(HAS_MTUNE),y)
tune = $(call cc-option,-mtune=$(1),$(2))
else
tune = $(call cc-option,-mcpu=$(1),$(2))
endif
align := $(cc-option-align)
cflags-$(CONFIG_M486) += -march=i486
cflags-$(CONFIG_M586) += -march=i586
cflags-$(CONFIG_M586TSC) += -march=i586
cflags-$(CONFIG_M586MMX) += -march=pentium-mmx
cflags-$(CONFIG_M686) += -march=i686
cflags-$(CONFIG_MPENTIUMII) += -march=i686 $(call tune,pentium2)
cflags-$(CONFIG_MPENTIUMIII) += -march=i686 $(call tune,pentium3)
cflags-$(CONFIG_MPENTIUMM) += -march=i686 $(call tune,pentium3)
cflags-$(CONFIG_MPENTIUM4) += -march=i686 $(call tune,pentium4)
cflags-$(CONFIG_MK6) += -march=k6
# Please note, that patches that add -march=athlon-xp and friends are pointless.
# They make zero difference whatsosever to performance at this time.
cflags-$(CONFIG_MK7) += -march=athlon
cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon)
cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586)
cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586)
cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686)
cflags-$(CONFIG_MVIAC7) += -march=i686
cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2)
cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \
$(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
# AMD Elan support
cflags-$(CONFIG_MELAN) += -march=i486
# Geode GX1 support
cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx
cflags-$(CONFIG_MGEODE_LX) += $(call cc-option,-march=geode,-march=pentium-mmx)
# add at the end to overwrite eventual tuning options from earlier
# cpu entries
cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686))
# Work around the pentium-mmx code generator madness of gcc4.4.x which
# does stack alignment by generating horrible code _before_ the mcount
# prologue (push %ebp, mov %esp, %ebp) which breaks the function graph
# tracer assumptions. For i686, generic, core2 this is set by the
# compiler anyway
ifeq ($(CONFIG_FUNCTION_GRAPH_TRACER), y)
ADD_ACCUMULATE_OUTGOING_ARGS := y
endif
# Work around to a bug with asm goto with first implementations of it
# in gcc causing gcc to mess up the push and pop of the stack in some
# uses of asm goto.
ifeq ($(CONFIG_JUMP_LABEL), y)
ADD_ACCUMULATE_OUTGOING_ARGS := y
endif
cflags-$(ADD_ACCUMULATE_OUTGOING_ARGS) += $(call cc-option,-maccumulate-outgoing-args)
# Bug fix for binutils: this option is required in order to keep
# binutils from generating NOPL instructions against our will.
ifneq ($(CONFIG_X86_P6_NOP),y)
cflags-y += $(call cc-option,-Wa$(comma)-mtune=generic32,)
endif

187
arch/x86/boot/Makefile Normal file
View file

@ -0,0 +1,187 @@
#
# arch/x86/boot/Makefile
#
# This file is subject to the terms and conditions of the GNU General Public
# License. See the file "COPYING" in the main directory of this archive
# for more details.
#
# Copyright (C) 1994 by Linus Torvalds
# Changed by many, many contributors over the years.
#
# If you want to preset the SVGA mode, uncomment the next line and
# set SVGA_MODE to whatever number you want.
# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode.
# The number is the same as you would ordinarily press at bootup.
SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
targets := vmlinux.bin setup.bin setup.elf bzImage
targets += fdimage fdimage144 fdimage288 image.iso mtools.conf
subdir- := compressed
setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpuflags.o cpucheck.o
setup-y += early_serial_console.o edd.o header.o main.o mca.o memory.o
setup-y += pm.o pmjump.o printf.o regs.o string.o tty.o video.o
setup-y += video-mode.o version.o
setup-$(CONFIG_X86_APM_BOOT) += apm.o
# The link order of the video-*.o modules can matter. In particular,
# video-vga.o *must* be listed first, followed by video-vesa.o.
# Hardware-specific drivers should follow in the order they should be
# probed, and video-bios.o should typically be last.
setup-y += video-vga.o
setup-y += video-vesa.o
setup-y += video-bios.o
targets += $(setup-y)
hostprogs-y := tools/build
hostprogs-$(CONFIG_X86_FEATURE_NAMES) += mkcpustr
HOST_EXTRACFLAGS += -I$(srctree)/tools/include \
-include include/generated/autoconf.h \
-D__EXPORTED_HEADERS__
ifdef CONFIG_X86_FEATURE_NAMES
$(obj)/cpu.o: $(obj)/cpustr.h
quiet_cmd_cpustr = CPUSTR $@
cmd_cpustr = $(obj)/mkcpustr > $@
targets += cpustr.h
$(obj)/cpustr.h: $(obj)/mkcpustr FORCE
$(call if_changed,cpustr)
endif
clean-files += cpustr.h
# ---------------------------------------------------------------------------
KBUILD_CFLAGS := $(USERINCLUDE) $(REALMODE_CFLAGS) -D_SETUP
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
GCOV_PROFILE := n
$(obj)/bzImage: asflags-y := $(SVGA_MODE)
quiet_cmd_image = BUILD $@
cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \
$(obj)/zoffset.h $@
$(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE
$(call if_changed,image)
@echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S
$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
$(call if_changed,objcopy)
SETUP_OBJS = $(addprefix $(obj)/,$(setup-y))
sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|_end\)$$/\#define VO_\2 0x\1/p'
quiet_cmd_voffset = VOFFSET $@
cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@
targets += voffset.h
$(obj)/voffset.h: vmlinux FORCE
$(call if_changed,voffset)
sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p'
quiet_cmd_zoffset = ZOFFSET $@
cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@
targets += zoffset.h
$(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE
$(call if_changed,zoffset)
AFLAGS_header.o += -I$(obj)
$(obj)/header.o: $(obj)/voffset.h $(obj)/zoffset.h
LDFLAGS_setup.elf := -T
$(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE
$(call if_changed,ld)
OBJCOPYFLAGS_setup.bin := -O binary
$(obj)/setup.bin: $(obj)/setup.elf FORCE
$(call if_changed,objcopy)
$(obj)/compressed/vmlinux: FORCE
$(Q)$(MAKE) $(build)=$(obj)/compressed $@
# Set this if you want to pass append arguments to the
# bzdisk/fdimage/isoimage kernel
FDARGS =
# Set this if you want an initrd included with the
# bzdisk/fdimage/isoimage kernel
FDINITRD =
image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,)
$(obj)/mtools.conf: $(src)/mtools.conf.in
sed -e 's|@OBJ@|$(obj)|g' < $< > $@
# This requires write access to /dev/fd0
bzdisk: $(obj)/bzImage $(obj)/mtools.conf
MTOOLSRC=$(obj)/mtools.conf mformat a: ; sync
syslinux /dev/fd0 ; sync
echo '$(image_cmdline)' | \
MTOOLSRC=$(src)/mtools.conf mcopy - a:syslinux.cfg
if [ -f '$(FDINITRD)' ] ; then \
MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' a:initrd.img ; \
fi
MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage a:linux ; sync
# These require being root or having syslinux 2.02 or higher installed
fdimage fdimage144: $(obj)/bzImage $(obj)/mtools.conf
dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=1440
MTOOLSRC=$(obj)/mtools.conf mformat v: ; sync
syslinux $(obj)/fdimage ; sync
echo '$(image_cmdline)' | \
MTOOLSRC=$(obj)/mtools.conf mcopy - v:syslinux.cfg
if [ -f '$(FDINITRD)' ] ; then \
MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' v:initrd.img ; \
fi
MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage v:linux ; sync
fdimage288: $(obj)/bzImage $(obj)/mtools.conf
dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=2880
MTOOLSRC=$(obj)/mtools.conf mformat w: ; sync
syslinux $(obj)/fdimage ; sync
echo '$(image_cmdline)' | \
MTOOLSRC=$(obj)/mtools.conf mcopy - w:syslinux.cfg
if [ -f '$(FDINITRD)' ] ; then \
MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' w:initrd.img ; \
fi
MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage w:linux ; sync
isoimage: $(obj)/bzImage
-rm -rf $(obj)/isoimage
mkdir $(obj)/isoimage
for i in lib lib64 share end ; do \
if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \
cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \
break ; \
fi ; \
if [ $$i = end ] ; then exit 1 ; fi ; \
done
cp $(obj)/bzImage $(obj)/isoimage/linux
echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg
if [ -f '$(FDINITRD)' ] ; then \
cp '$(FDINITRD)' $(obj)/isoimage/initrd.img ; \
fi
mkisofs -J -r -o $(obj)/image.iso -b isolinux.bin -c boot.cat \
-no-emul-boot -boot-load-size 4 -boot-info-table \
$(obj)/isoimage
isohybrid $(obj)/image.iso 2>/dev/null || true
rm -rf $(obj)/isoimage
bzlilo: $(obj)/bzImage
if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi
if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi
cat $(obj)/bzImage > $(INSTALL_PATH)/vmlinuz
cp System.map $(INSTALL_PATH)/
if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi
install:
sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(obj)/bzImage \
System.map "$(INSTALL_PATH)"

165
arch/x86/boot/a20.c Normal file
View file

@ -0,0 +1,165 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007-2008 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Enable A20 gate (return -1 on failure)
*/
#include "boot.h"
#define MAX_8042_LOOPS 100000
#define MAX_8042_FF 32
static int empty_8042(void)
{
u8 status;
int loops = MAX_8042_LOOPS;
int ffs = MAX_8042_FF;
while (loops--) {
io_delay();
status = inb(0x64);
if (status == 0xff) {
/* FF is a plausible, but very unlikely status */
if (!--ffs)
return -1; /* Assume no KBC present */
}
if (status & 1) {
/* Read and discard input data */
io_delay();
(void)inb(0x60);
} else if (!(status & 2)) {
/* Buffers empty, finished! */
return 0;
}
}
return -1;
}
/* Returns nonzero if the A20 line is enabled. The memory address
used as a test is the int $0x80 vector, which should be safe. */
#define A20_TEST_ADDR (4*0x80)
#define A20_TEST_SHORT 32
#define A20_TEST_LONG 2097152 /* 2^21 */
static int a20_test(int loops)
{
int ok = 0;
int saved, ctr;
set_fs(0x0000);
set_gs(0xffff);
saved = ctr = rdfs32(A20_TEST_ADDR);
while (loops--) {
wrfs32(++ctr, A20_TEST_ADDR);
io_delay(); /* Serialize and make delay constant */
ok = rdgs32(A20_TEST_ADDR+0x10) ^ ctr;
if (ok)
break;
}
wrfs32(saved, A20_TEST_ADDR);
return ok;
}
/* Quick test to see if A20 is already enabled */
static int a20_test_short(void)
{
return a20_test(A20_TEST_SHORT);
}
/* Longer test that actually waits for A20 to come on line; this
is useful when dealing with the KBC or other slow external circuitry. */
static int a20_test_long(void)
{
return a20_test(A20_TEST_LONG);
}
static void enable_a20_bios(void)
{
struct biosregs ireg;
initregs(&ireg);
ireg.ax = 0x2401;
intcall(0x15, &ireg, NULL);
}
static void enable_a20_kbc(void)
{
empty_8042();
outb(0xd1, 0x64); /* Command write */
empty_8042();
outb(0xdf, 0x60); /* A20 on */
empty_8042();
outb(0xff, 0x64); /* Null command, but UHCI wants it */
empty_8042();
}
static void enable_a20_fast(void)
{
u8 port_a;
port_a = inb(0x92); /* Configuration port A */
port_a |= 0x02; /* Enable A20 */
port_a &= ~0x01; /* Do not reset machine */
outb(port_a, 0x92);
}
/*
* Actual routine to enable A20; return 0 on ok, -1 on failure
*/
#define A20_ENABLE_LOOPS 255 /* Number of times to try */
int enable_a20(void)
{
int loops = A20_ENABLE_LOOPS;
int kbc_err;
while (loops--) {
/* First, check to see if A20 is already enabled
(legacy free, etc.) */
if (a20_test_short())
return 0;
/* Next, try the BIOS (INT 0x15, AX=0x2401) */
enable_a20_bios();
if (a20_test_short())
return 0;
/* Try enabling A20 through the keyboard controller */
kbc_err = empty_8042();
if (a20_test_short())
return 0; /* BIOS worked, but with delayed reaction */
if (!kbc_err) {
enable_a20_kbc();
if (a20_test_long())
return 0;
}
/* Finally, try enabling the "fast A20 gate" */
enable_a20_fast();
if (a20_test_long())
return 0;
}
return -1;
}

75
arch/x86/boot/apm.c Normal file
View file

@ -0,0 +1,75 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* Original APM BIOS checking by Stephen Rothwell, May 1994
* (sfr@canb.auug.org.au)
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Get APM BIOS information
*/
#include "boot.h"
int query_apm_bios(void)
{
struct biosregs ireg, oreg;
/* APM BIOS installation check */
initregs(&ireg);
ireg.ah = 0x53;
intcall(0x15, &ireg, &oreg);
if (oreg.flags & X86_EFLAGS_CF)
return -1; /* No APM BIOS */
if (oreg.bx != 0x504d) /* "PM" signature */
return -1;
if (!(oreg.cx & 0x02)) /* 32 bits supported? */
return -1;
/* Disconnect first, just in case */
ireg.al = 0x04;
intcall(0x15, &ireg, NULL);
/* 32-bit connect */
ireg.al = 0x03;
intcall(0x15, &ireg, &oreg);
boot_params.apm_bios_info.cseg = oreg.ax;
boot_params.apm_bios_info.offset = oreg.ebx;
boot_params.apm_bios_info.cseg_16 = oreg.cx;
boot_params.apm_bios_info.dseg = oreg.dx;
boot_params.apm_bios_info.cseg_len = oreg.si;
boot_params.apm_bios_info.cseg_16_len = oreg.hsi;
boot_params.apm_bios_info.dseg_len = oreg.di;
if (oreg.flags & X86_EFLAGS_CF)
return -1;
/* Redo the installation check as the 32-bit connect;
some BIOSes return different flags this way... */
ireg.al = 0x00;
intcall(0x15, &ireg, &oreg);
if ((oreg.eflags & X86_EFLAGS_CF) || oreg.bx != 0x504d) {
/* Failure with 32-bit connect, try to disconect and ignore */
ireg.al = 0x04;
intcall(0x15, &ireg, NULL);
return -1;
}
boot_params.apm_bios_info.version = oreg.ax;
boot_params.apm_bios_info.flags = oreg.cx;
return 0;
}

82
arch/x86/boot/bioscall.S Normal file
View file

@ -0,0 +1,82 @@
/* -----------------------------------------------------------------------
*
* Copyright 2009-2014 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2 or (at your
* option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
/*
* "Glove box" for BIOS calls. Avoids the constant problems with BIOSes
* touching registers they shouldn't be.
*/
.code16
.section ".inittext","ax"
.globl intcall
.type intcall, @function
intcall:
/* Self-modify the INT instruction. Ugly, but works. */
cmpb %al, 3f
je 1f
movb %al, 3f
jmp 1f /* Synchronize pipeline */
1:
/* Save state */
pushfl
pushw %fs
pushw %gs
pushal
/* Copy input state to stack frame */
subw $44, %sp
movw %dx, %si
movw %sp, %di
movw $11, %cx
rep; movsd
/* Pop full state from the stack */
popal
popw %gs
popw %fs
popw %es
popw %ds
popfl
/* Actual INT */
.byte 0xcd /* INT opcode */
3: .byte 0
/* Push full state to the stack */
pushfl
pushw %ds
pushw %es
pushw %fs
pushw %gs
pushal
/* Re-establish C environment invariants */
cld
movzwl %sp, %esp
movw %cs, %ax
movw %ax, %ds
movw %ax, %es
/* Copy output state from stack frame */
movw 68(%esp), %di /* Original %cx == 3rd argument */
andw %di, %di
jz 4f
movw %sp, %si
movw $11, %cx
rep; movsd
4: addw $44, %sp
/* Restore state and return */
popal
popw %gs
popw %fs
popfl
retl
.size intcall, .-intcall

43
arch/x86/boot/bitops.h Normal file
View file

@ -0,0 +1,43 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Very simple bitops for the boot code.
*/
#ifndef BOOT_BITOPS_H
#define BOOT_BITOPS_H
#define _LINUX_BITOPS_H /* Inhibit inclusion of <linux/bitops.h> */
static inline int constant_test_bit(int nr, const void *addr)
{
const u32 *p = (const u32 *)addr;
return ((1UL << (nr & 31)) & (p[nr >> 5])) != 0;
}
static inline int variable_test_bit(int nr, const void *addr)
{
u8 v;
const u32 *p = (const u32 *)addr;
asm("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr));
return v;
}
#define test_bit(nr,addr) \
(__builtin_constant_p(nr) ? \
constant_test_bit((nr),(addr)) : \
variable_test_bit((nr),(addr)))
static inline void set_bit(int nr, void *addr)
{
asm("btsl %1,%0" : "+m" (*(u32 *)addr) : "Ir" (nr));
}
#endif /* BOOT_BITOPS_H */

359
arch/x86/boot/boot.h Normal file
View file

@ -0,0 +1,359 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Header file for the real-mode kernel code
*/
#ifndef BOOT_BOOT_H
#define BOOT_BOOT_H
#define STACK_SIZE 512 /* Minimum number of bytes for stack */
#ifndef __ASSEMBLY__
#include <stdarg.h>
#include <linux/types.h>
#include <linux/edd.h>
#include <asm/boot.h>
#include <asm/setup.h>
#include "bitops.h"
#include "ctype.h"
#include "cpuflags.h"
/* Useful macros */
#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
extern struct setup_header hdr;
extern struct boot_params boot_params;
#define cpu_relax() asm volatile("rep; nop")
/* Basic port I/O */
static inline void outb(u8 v, u16 port)
{
asm volatile("outb %0,%1" : : "a" (v), "dN" (port));
}
static inline u8 inb(u16 port)
{
u8 v;
asm volatile("inb %1,%0" : "=a" (v) : "dN" (port));
return v;
}
static inline void outw(u16 v, u16 port)
{
asm volatile("outw %0,%1" : : "a" (v), "dN" (port));
}
static inline u16 inw(u16 port)
{
u16 v;
asm volatile("inw %1,%0" : "=a" (v) : "dN" (port));
return v;
}
static inline void outl(u32 v, u16 port)
{
asm volatile("outl %0,%1" : : "a" (v), "dN" (port));
}
static inline u32 inl(u16 port)
{
u32 v;
asm volatile("inl %1,%0" : "=a" (v) : "dN" (port));
return v;
}
static inline void io_delay(void)
{
const u16 DELAY_PORT = 0x80;
asm volatile("outb %%al,%0" : : "dN" (DELAY_PORT));
}
/* These functions are used to reference data in other segments. */
static inline u16 ds(void)
{
u16 seg;
asm("movw %%ds,%0" : "=rm" (seg));
return seg;
}
static inline void set_fs(u16 seg)
{
asm volatile("movw %0,%%fs" : : "rm" (seg));
}
static inline u16 fs(void)
{
u16 seg;
asm volatile("movw %%fs,%0" : "=rm" (seg));
return seg;
}
static inline void set_gs(u16 seg)
{
asm volatile("movw %0,%%gs" : : "rm" (seg));
}
static inline u16 gs(void)
{
u16 seg;
asm volatile("movw %%gs,%0" : "=rm" (seg));
return seg;
}
typedef unsigned int addr_t;
static inline u8 rdfs8(addr_t addr)
{
u8 v;
asm volatile("movb %%fs:%1,%0" : "=q" (v) : "m" (*(u8 *)addr));
return v;
}
static inline u16 rdfs16(addr_t addr)
{
u16 v;
asm volatile("movw %%fs:%1,%0" : "=r" (v) : "m" (*(u16 *)addr));
return v;
}
static inline u32 rdfs32(addr_t addr)
{
u32 v;
asm volatile("movl %%fs:%1,%0" : "=r" (v) : "m" (*(u32 *)addr));
return v;
}
static inline void wrfs8(u8 v, addr_t addr)
{
asm volatile("movb %1,%%fs:%0" : "+m" (*(u8 *)addr) : "qi" (v));
}
static inline void wrfs16(u16 v, addr_t addr)
{
asm volatile("movw %1,%%fs:%0" : "+m" (*(u16 *)addr) : "ri" (v));
}
static inline void wrfs32(u32 v, addr_t addr)
{
asm volatile("movl %1,%%fs:%0" : "+m" (*(u32 *)addr) : "ri" (v));
}
static inline u8 rdgs8(addr_t addr)
{
u8 v;
asm volatile("movb %%gs:%1,%0" : "=q" (v) : "m" (*(u8 *)addr));
return v;
}
static inline u16 rdgs16(addr_t addr)
{
u16 v;
asm volatile("movw %%gs:%1,%0" : "=r" (v) : "m" (*(u16 *)addr));
return v;
}
static inline u32 rdgs32(addr_t addr)
{
u32 v;
asm volatile("movl %%gs:%1,%0" : "=r" (v) : "m" (*(u32 *)addr));
return v;
}
static inline void wrgs8(u8 v, addr_t addr)
{
asm volatile("movb %1,%%gs:%0" : "+m" (*(u8 *)addr) : "qi" (v));
}
static inline void wrgs16(u16 v, addr_t addr)
{
asm volatile("movw %1,%%gs:%0" : "+m" (*(u16 *)addr) : "ri" (v));
}
static inline void wrgs32(u32 v, addr_t addr)
{
asm volatile("movl %1,%%gs:%0" : "+m" (*(u32 *)addr) : "ri" (v));
}
/* Note: these only return true/false, not a signed return value! */
static inline int memcmp_fs(const void *s1, addr_t s2, size_t len)
{
u8 diff;
asm volatile("fs; repe; cmpsb; setnz %0"
: "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
return diff;
}
static inline int memcmp_gs(const void *s1, addr_t s2, size_t len)
{
u8 diff;
asm volatile("gs; repe; cmpsb; setnz %0"
: "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
return diff;
}
/* Heap -- available for dynamic lists. */
extern char _end[];
extern char *HEAP;
extern char *heap_end;
#define RESET_HEAP() ((void *)( HEAP = _end ))
static inline char *__get_heap(size_t s, size_t a, size_t n)
{
char *tmp;
HEAP = (char *)(((size_t)HEAP+(a-1)) & ~(a-1));
tmp = HEAP;
HEAP += s*n;
return tmp;
}
#define GET_HEAP(type, n) \
((type *)__get_heap(sizeof(type),__alignof__(type),(n)))
static inline bool heap_free(size_t n)
{
return (int)(heap_end-HEAP) >= (int)n;
}
/* copy.S */
void copy_to_fs(addr_t dst, void *src, size_t len);
void *copy_from_fs(void *dst, addr_t src, size_t len);
void copy_to_gs(addr_t dst, void *src, size_t len);
void *copy_from_gs(void *dst, addr_t src, size_t len);
/* a20.c */
int enable_a20(void);
/* apm.c */
int query_apm_bios(void);
/* bioscall.c */
struct biosregs {
union {
struct {
u32 edi;
u32 esi;
u32 ebp;
u32 _esp;
u32 ebx;
u32 edx;
u32 ecx;
u32 eax;
u32 _fsgs;
u32 _dses;
u32 eflags;
};
struct {
u16 di, hdi;
u16 si, hsi;
u16 bp, hbp;
u16 _sp, _hsp;
u16 bx, hbx;
u16 dx, hdx;
u16 cx, hcx;
u16 ax, hax;
u16 gs, fs;
u16 es, ds;
u16 flags, hflags;
};
struct {
u8 dil, dih, edi2, edi3;
u8 sil, sih, esi2, esi3;
u8 bpl, bph, ebp2, ebp3;
u8 _spl, _sph, _esp2, _esp3;
u8 bl, bh, ebx2, ebx3;
u8 dl, dh, edx2, edx3;
u8 cl, ch, ecx2, ecx3;
u8 al, ah, eax2, eax3;
};
};
};
void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg);
/* cmdline.c */
int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize);
int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option);
static inline int cmdline_find_option(const char *option, char *buffer, int bufsize)
{
unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
if (cmd_line_ptr >= 0x100000)
return -1; /* inaccessible */
return __cmdline_find_option(cmd_line_ptr, option, buffer, bufsize);
}
static inline int cmdline_find_option_bool(const char *option)
{
unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
if (cmd_line_ptr >= 0x100000)
return -1; /* inaccessible */
return __cmdline_find_option_bool(cmd_line_ptr, option);
}
/* cpu.c, cpucheck.c */
int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr);
int validate_cpu(void);
/* early_serial_console.c */
extern int early_serial_base;
void console_init(void);
/* edd.c */
void query_edd(void);
/* header.S */
void __attribute__((noreturn)) die(void);
/* mca.c */
int query_mca(void);
/* memory.c */
int detect_memory(void);
/* pm.c */
void __attribute__((noreturn)) go_to_protected_mode(void);
/* pmjump.S */
void __attribute__((noreturn))
protected_mode_jump(u32 entrypoint, u32 bootparams);
/* printf.c */
int sprintf(char *buf, const char *fmt, ...);
int vsprintf(char *buf, const char *fmt, va_list args);
int printf(const char *fmt, ...);
/* regs.c */
void initregs(struct biosregs *regs);
/* string.c */
int strcmp(const char *str1, const char *str2);
int strncmp(const char *cs, const char *ct, size_t count);
size_t strnlen(const char *s, size_t maxlen);
unsigned int atou(const char *s);
unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base);
size_t strlen(const char *s);
/* tty.c */
void puts(const char *);
void putchar(int);
int getchar(void);
void kbd_flush(void);
int getchar_timeout(void);
/* video.c */
void set_video(void);
/* video-mode.c */
int set_mode(u16 mode);
int mode_defined(u16 mode);
void probe_cards(int unsafe);
/* video-vesa.c */
void vesa_store_edid(void);
#endif /* __ASSEMBLY__ */
#endif /* BOOT_BOOT_H */

158
arch/x86/boot/cmdline.c Normal file
View file

@ -0,0 +1,158 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Simple command-line parser for early boot.
*/
#include "boot.h"
static inline int myisspace(u8 c)
{
return c <= ' '; /* Close enough approximation */
}
/*
* Find a non-boolean option, that is, "option=argument". In accordance
* with standard Linux practice, if this option is repeated, this returns
* the last instance on the command line.
*
* Returns the length of the argument (regardless of if it was
* truncated to fit in the buffer), or -1 on not found.
*/
int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize)
{
addr_t cptr;
char c;
int len = -1;
const char *opptr = NULL;
char *bufptr = buffer;
enum {
st_wordstart, /* Start of word/after whitespace */
st_wordcmp, /* Comparing this word */
st_wordskip, /* Miscompare, skip */
st_bufcpy /* Copying this to buffer */
} state = st_wordstart;
if (!cmdline_ptr)
return -1; /* No command line */
cptr = cmdline_ptr & 0xf;
set_fs(cmdline_ptr >> 4);
while (cptr < 0x10000 && (c = rdfs8(cptr++))) {
switch (state) {
case st_wordstart:
if (myisspace(c))
break;
/* else */
state = st_wordcmp;
opptr = option;
/* fall through */
case st_wordcmp:
if (c == '=' && !*opptr) {
len = 0;
bufptr = buffer;
state = st_bufcpy;
} else if (myisspace(c)) {
state = st_wordstart;
} else if (c != *opptr++) {
state = st_wordskip;
}
break;
case st_wordskip:
if (myisspace(c))
state = st_wordstart;
break;
case st_bufcpy:
if (myisspace(c)) {
state = st_wordstart;
} else {
if (len < bufsize-1)
*bufptr++ = c;
len++;
}
break;
}
}
if (bufsize)
*bufptr = '\0';
return len;
}
/*
* Find a boolean option (like quiet,noapic,nosmp....)
*
* Returns the position of that option (starts counting with 1)
* or 0 on not found
*/
int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option)
{
addr_t cptr;
char c;
int pos = 0, wstart = 0;
const char *opptr = NULL;
enum {
st_wordstart, /* Start of word/after whitespace */
st_wordcmp, /* Comparing this word */
st_wordskip, /* Miscompare, skip */
} state = st_wordstart;
if (!cmdline_ptr)
return -1; /* No command line */
cptr = cmdline_ptr & 0xf;
set_fs(cmdline_ptr >> 4);
while (cptr < 0x10000) {
c = rdfs8(cptr++);
pos++;
switch (state) {
case st_wordstart:
if (!c)
return 0;
else if (myisspace(c))
break;
state = st_wordcmp;
opptr = option;
wstart = pos;
/* fall through */
case st_wordcmp:
if (!*opptr)
if (!c || myisspace(c))
return wstart;
else
state = st_wordskip;
else if (!c)
return 0;
else if (c != *opptr++)
state = st_wordskip;
break;
case st_wordskip:
if (!c)
return 0;
else if (myisspace(c))
state = st_wordstart;
break;
}
}
return 0; /* Buffer overrun */
}

11
arch/x86/boot/code16gcc.h Normal file
View file

@ -0,0 +1,11 @@
#
# code16gcc.h
#
# This file is added to the assembler via -Wa when compiling 16-bit C code.
# This is done this way instead via asm() to make sure gcc does not reorder
# things around us.
#
# gcc 4.9+ has a real -m16 option so we can drop this hack long term.
#
.code16gcc

View file

@ -0,0 +1,87 @@
#
# linux/arch/x86/boot/compressed/Makefile
#
# create a compressed vmlinux image from the original vmlinux
#
targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4
KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
KBUILD_CFLAGS += -fno-strict-aliasing -fPIC
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
cflags-$(CONFIG_X86_32) := -march=i386
cflags-$(CONFIG_X86_64) := -mcmodel=small
KBUILD_CFLAGS += $(cflags-y)
KBUILD_CFLAGS += -mno-mmx -mno-sse
KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector)
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
GCOV_PROFILE := n
LDFLAGS := -m elf_$(UTS_MACHINE)
LDFLAGS_vmlinux := -T
hostprogs-y := mkpiggy
HOST_EXTRACFLAGS += -I$(srctree)/tools/include
vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
$(obj)/string.o $(obj)/cmdline.o \
$(obj)/piggy.o $(obj)/cpuflags.o
vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o
vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/aslr.o
$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o
vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
$(obj)/vmlinux: $(vmlinux-objs-y) FORCE
$(call if_changed,ld)
@:
OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
$(obj)/vmlinux.bin: vmlinux FORCE
$(call if_changed,objcopy)
targets += $(patsubst $(obj)/%,%,$(vmlinux-objs-y)) vmlinux.bin.all vmlinux.relocs
CMD_RELOCS = arch/x86/tools/relocs
quiet_cmd_relocs = RELOCS $@
cmd_relocs = $(CMD_RELOCS) $< > $@;$(CMD_RELOCS) --abs-relocs $<
$(obj)/vmlinux.relocs: vmlinux FORCE
$(call if_changed,relocs)
vmlinux.bin.all-y := $(obj)/vmlinux.bin
vmlinux.bin.all-$(CONFIG_X86_NEED_RELOCS) += $(obj)/vmlinux.relocs
$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
$(call if_changed,gzip)
$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE
$(call if_changed,bzip2)
$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE
$(call if_changed,lzma)
$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE
$(call if_changed,xzkern)
$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE
$(call if_changed,lzo)
$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE
$(call if_changed,lz4)
suffix-$(CONFIG_KERNEL_GZIP) := gz
suffix-$(CONFIG_KERNEL_BZIP2) := bz2
suffix-$(CONFIG_KERNEL_LZMA) := lzma
suffix-$(CONFIG_KERNEL_XZ) := xz
suffix-$(CONFIG_KERNEL_LZO) := lzo
suffix-$(CONFIG_KERNEL_LZ4) := lz4
RUN_SIZE = $(shell $(OBJDUMP) -h vmlinux | \
$(CONFIG_SHELL) $(srctree)/arch/x86/tools/calc_run_size.sh)
quiet_cmd_mkpiggy = MKPIGGY $@
cmd_mkpiggy = $(obj)/mkpiggy $< $(RUN_SIZE) > $@ || ( rm -f $@ ; false )
targets += piggy.S
$(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE
$(call if_changed,mkpiggy)

View file

@ -0,0 +1,336 @@
#include "misc.h"
#include <asm/msr.h>
#include <asm/archrandom.h>
#include <asm/e820.h>
#include <generated/compile.h>
#include <linux/module.h>
#include <linux/uts.h>
#include <linux/utsname.h>
#include <generated/utsrelease.h>
/* Simplified build-specific string for starting entropy. */
static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
#define I8254_PORT_CONTROL 0x43
#define I8254_PORT_COUNTER0 0x40
#define I8254_CMD_READBACK 0xC0
#define I8254_SELECT_COUNTER0 0x02
#define I8254_STATUS_NOTREADY 0x40
static inline u16 i8254(void)
{
u16 status, timer;
do {
outb(I8254_PORT_CONTROL,
I8254_CMD_READBACK | I8254_SELECT_COUNTER0);
status = inb(I8254_PORT_COUNTER0);
timer = inb(I8254_PORT_COUNTER0);
timer |= inb(I8254_PORT_COUNTER0) << 8;
} while (status & I8254_STATUS_NOTREADY);
return timer;
}
static unsigned long rotate_xor(unsigned long hash, const void *area,
size_t size)
{
size_t i;
unsigned long *ptr = (unsigned long *)area;
for (i = 0; i < size / sizeof(hash); i++) {
/* Rotate by odd number of bits and XOR. */
hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
hash ^= ptr[i];
}
return hash;
}
/* Attempt to create a simple but unpredictable starting entropy. */
static unsigned long get_random_boot(void)
{
unsigned long hash = 0;
hash = rotate_xor(hash, build_str, sizeof(build_str));
hash = rotate_xor(hash, real_mode, sizeof(*real_mode));
return hash;
}
static unsigned long get_random_long(void)
{
#ifdef CONFIG_X86_64
const unsigned long mix_const = 0x5d6008cbf3848dd3UL;
#else
const unsigned long mix_const = 0x3f39e593UL;
#endif
unsigned long raw, random = get_random_boot();
bool use_i8254 = true;
debug_putstr("KASLR using");
if (has_cpuflag(X86_FEATURE_RDRAND)) {
debug_putstr(" RDRAND");
if (rdrand_long(&raw)) {
random ^= raw;
use_i8254 = false;
}
}
if (has_cpuflag(X86_FEATURE_TSC)) {
debug_putstr(" RDTSC");
rdtscll(raw);
random ^= raw;
use_i8254 = false;
}
if (use_i8254) {
debug_putstr(" i8254");
random ^= i8254();
}
/* Circular multiply for better bit diffusion */
asm("mul %3"
: "=a" (random), "=d" (raw)
: "a" (random), "rm" (mix_const));
random += raw;
debug_putstr("...\n");
return random;
}
struct mem_vector {
unsigned long start;
unsigned long size;
};
#define MEM_AVOID_MAX 5
static struct mem_vector mem_avoid[MEM_AVOID_MAX];
static bool mem_contains(struct mem_vector *region, struct mem_vector *item)
{
/* Item at least partially before region. */
if (item->start < region->start)
return false;
/* Item at least partially after region. */
if (item->start + item->size > region->start + region->size)
return false;
return true;
}
static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
{
/* Item one is entirely before item two. */
if (one->start + one->size <= two->start)
return false;
/* Item one is entirely after item two. */
if (one->start >= two->start + two->size)
return false;
return true;
}
static void mem_avoid_init(unsigned long input, unsigned long input_size,
unsigned long output, unsigned long output_size)
{
u64 initrd_start, initrd_size;
u64 cmd_line, cmd_line_size;
unsigned long unsafe, unsafe_len;
char *ptr;
/*
* Avoid the region that is unsafe to overlap during
* decompression (see calculations at top of misc.c).
*/
unsafe_len = (output_size >> 12) + 32768 + 18;
unsafe = (unsigned long)input + input_size - unsafe_len;
mem_avoid[0].start = unsafe;
mem_avoid[0].size = unsafe_len;
/* Avoid initrd. */
initrd_start = (u64)real_mode->ext_ramdisk_image << 32;
initrd_start |= real_mode->hdr.ramdisk_image;
initrd_size = (u64)real_mode->ext_ramdisk_size << 32;
initrd_size |= real_mode->hdr.ramdisk_size;
mem_avoid[1].start = initrd_start;
mem_avoid[1].size = initrd_size;
/* Avoid kernel command line. */
cmd_line = (u64)real_mode->ext_cmd_line_ptr << 32;
cmd_line |= real_mode->hdr.cmd_line_ptr;
/* Calculate size of cmd_line. */
ptr = (char *)(unsigned long)cmd_line;
for (cmd_line_size = 0; ptr[cmd_line_size++]; )
;
mem_avoid[2].start = cmd_line;
mem_avoid[2].size = cmd_line_size;
/* Avoid heap memory. */
mem_avoid[3].start = (unsigned long)free_mem_ptr;
mem_avoid[3].size = BOOT_HEAP_SIZE;
/* Avoid stack memory. */
mem_avoid[4].start = (unsigned long)free_mem_end_ptr;
mem_avoid[4].size = BOOT_STACK_SIZE;
}
/* Does this memory vector overlap a known avoided area? */
static bool mem_avoid_overlap(struct mem_vector *img)
{
int i;
struct setup_data *ptr;
for (i = 0; i < MEM_AVOID_MAX; i++) {
if (mem_overlaps(img, &mem_avoid[i]))
return true;
}
/* Avoid all entries in the setup_data linked list. */
ptr = (struct setup_data *)(unsigned long)real_mode->hdr.setup_data;
while (ptr) {
struct mem_vector avoid;
avoid.start = (unsigned long)ptr;
avoid.size = sizeof(*ptr) + ptr->len;
if (mem_overlaps(img, &avoid))
return true;
ptr = (struct setup_data *)(unsigned long)ptr->next;
}
return false;
}
static unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET /
CONFIG_PHYSICAL_ALIGN];
static unsigned long slot_max;
static void slots_append(unsigned long addr)
{
/* Overflowing the slots list should be impossible. */
if (slot_max >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET /
CONFIG_PHYSICAL_ALIGN)
return;
slots[slot_max++] = addr;
}
static unsigned long slots_fetch_random(void)
{
/* Handle case of no slots stored. */
if (slot_max == 0)
return 0;
return slots[get_random_long() % slot_max];
}
static void process_e820_entry(struct e820entry *entry,
unsigned long minimum,
unsigned long image_size)
{
struct mem_vector region, img;
/* Skip non-RAM entries. */
if (entry->type != E820_RAM)
return;
/* Ignore entries entirely above our maximum. */
if (entry->addr >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET)
return;
/* Ignore entries entirely below our minimum. */
if (entry->addr + entry->size < minimum)
return;
region.start = entry->addr;
region.size = entry->size;
/* Potentially raise address to minimum location. */
if (region.start < minimum)
region.start = minimum;
/* Potentially raise address to meet alignment requirements. */
region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
/* Did we raise the address above the bounds of this e820 region? */
if (region.start > entry->addr + entry->size)
return;
/* Reduce size by any delta from the original address. */
region.size -= region.start - entry->addr;
/* Reduce maximum size to fit end of image within maximum limit. */
if (region.start + region.size > CONFIG_RANDOMIZE_BASE_MAX_OFFSET)
region.size = CONFIG_RANDOMIZE_BASE_MAX_OFFSET - region.start;
/* Walk each aligned slot and check for avoided areas. */
for (img.start = region.start, img.size = image_size ;
mem_contains(&region, &img) ;
img.start += CONFIG_PHYSICAL_ALIGN) {
if (mem_avoid_overlap(&img))
continue;
slots_append(img.start);
}
}
static unsigned long find_random_addr(unsigned long minimum,
unsigned long size)
{
int i;
unsigned long addr;
/* Make sure minimum is aligned. */
minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
/* Verify potential e820 positions, appending to slots list. */
for (i = 0; i < real_mode->e820_entries; i++) {
process_e820_entry(&real_mode->e820_map[i], minimum, size);
}
return slots_fetch_random();
}
unsigned char *choose_kernel_location(unsigned char *input,
unsigned long input_size,
unsigned char *output,
unsigned long output_size)
{
unsigned long choice = (unsigned long)output;
unsigned long random;
#ifdef CONFIG_HIBERNATION
if (!cmdline_find_option_bool("kaslr")) {
debug_putstr("KASLR disabled by default...\n");
goto out;
}
#else
if (cmdline_find_option_bool("nokaslr")) {
debug_putstr("KASLR disabled by cmdline...\n");
goto out;
}
#endif
/* Record the various known unsafe memory ranges. */
mem_avoid_init((unsigned long)input, input_size,
(unsigned long)output, output_size);
/* Walk e820 and find a random address. */
random = find_random_addr(choice, output_size);
if (!random) {
debug_putstr("KASLR could not find suitable E820 region...\n");
goto out;
}
/* Always enforce the minimum. */
if (random < choice)
goto out;
choice = random;
out:
return (unsigned char *)choice;
}

View file

@ -0,0 +1,33 @@
#include "misc.h"
#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE
static unsigned long fs;
static inline void set_fs(unsigned long seg)
{
fs = seg << 4; /* shift it back */
}
typedef unsigned long addr_t;
static inline char rdfs8(addr_t addr)
{
return *((char *)(fs + addr));
}
#include "../cmdline.c"
static unsigned long get_cmd_line_ptr(void)
{
unsigned long cmd_line_ptr = real_mode->hdr.cmd_line_ptr;
cmd_line_ptr |= (u64)real_mode->ext_cmd_line_ptr << 32;
return cmd_line_ptr;
}
int cmdline_find_option(const char *option, char *buffer, int bufsize)
{
return __cmdline_find_option(get_cmd_line_ptr(), option, buffer, bufsize);
}
int cmdline_find_option_bool(const char *option)
{
return __cmdline_find_option_bool(get_cmd_line_ptr(), option);
}
#endif

View file

@ -0,0 +1,12 @@
#ifdef CONFIG_RANDOMIZE_BASE
#include "../cpuflags.c"
bool has_cpuflag(int flag)
{
get_cpuflags();
return test_bit(flag, cpu.flags);
}
#endif

View file

@ -0,0 +1,5 @@
#include "misc.h"
int early_serial_base;
#include "../early_serial_console.c"

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,122 @@
#ifndef BOOT_COMPRESSED_EBOOT_H
#define BOOT_COMPRESSED_EBOOT_H
#define SEG_TYPE_DATA (0 << 3)
#define SEG_TYPE_READ_WRITE (1 << 1)
#define SEG_TYPE_CODE (1 << 3)
#define SEG_TYPE_EXEC_READ (1 << 1)
#define SEG_TYPE_TSS ((1 << 3) | (1 << 0))
#define SEG_OP_SIZE_32BIT (1 << 0)
#define SEG_GRANULARITY_4KB (1 << 0)
#define DESC_TYPE_CODE_DATA (1 << 0)
#define EFI_CONSOLE_OUT_DEVICE_GUID \
EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x0, 0x90, 0x27, \
0x3f, 0xc1, 0x4d)
#define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0
#define PIXEL_BGR_RESERVED_8BIT_PER_COLOR 1
#define PIXEL_BIT_MASK 2
#define PIXEL_BLT_ONLY 3
#define PIXEL_FORMAT_MAX 4
struct efi_pixel_bitmask {
u32 red_mask;
u32 green_mask;
u32 blue_mask;
u32 reserved_mask;
};
struct efi_graphics_output_mode_info {
u32 version;
u32 horizontal_resolution;
u32 vertical_resolution;
int pixel_format;
struct efi_pixel_bitmask pixel_information;
u32 pixels_per_scan_line;
} __packed;
struct efi_graphics_output_protocol_mode_32 {
u32 max_mode;
u32 mode;
u32 info;
u32 size_of_info;
u64 frame_buffer_base;
u32 frame_buffer_size;
} __packed;
struct efi_graphics_output_protocol_mode_64 {
u32 max_mode;
u32 mode;
u64 info;
u64 size_of_info;
u64 frame_buffer_base;
u64 frame_buffer_size;
} __packed;
struct efi_graphics_output_protocol_mode {
u32 max_mode;
u32 mode;
unsigned long info;
unsigned long size_of_info;
u64 frame_buffer_base;
unsigned long frame_buffer_size;
} __packed;
struct efi_graphics_output_protocol_32 {
u32 query_mode;
u32 set_mode;
u32 blt;
u32 mode;
};
struct efi_graphics_output_protocol_64 {
u64 query_mode;
u64 set_mode;
u64 blt;
u64 mode;
};
struct efi_graphics_output_protocol {
void *query_mode;
unsigned long set_mode;
unsigned long blt;
struct efi_graphics_output_protocol_mode *mode;
};
struct efi_uga_draw_protocol_32 {
u32 get_mode;
u32 set_mode;
u32 blt;
};
struct efi_uga_draw_protocol_64 {
u64 get_mode;
u64 set_mode;
u64 blt;
};
struct efi_uga_draw_protocol {
void *get_mode;
void *set_mode;
void *blt;
};
struct efi_config {
u64 image_handle;
u64 table;
u64 allocate_pool;
u64 allocate_pages;
u64 get_memory_map;
u64 free_pool;
u64 free_pages;
u64 locate_handle;
u64 handle_protocol;
u64 exit_boot_services;
u64 text_output;
efi_status_t (*call)(unsigned long, ...);
bool is64;
} __packed;
#endif /* BOOT_COMPRESSED_EBOOT_H */

View file

@ -0,0 +1,86 @@
/*
* EFI call stub for IA32.
*
* This stub allows us to make EFI calls in physical mode with interrupts
* turned off. Note that this implementation is different from the one in
* arch/x86/platform/efi/efi_stub_32.S because we're _already_ in physical
* mode at this point.
*/
#include <linux/linkage.h>
#include <asm/page_types.h>
/*
* efi_call_phys(void *, ...) is a function with variable parameters.
* All the callers of this function assure that all the parameters are 4-bytes.
*/
/*
* In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save.
* So we'd better save all of them at the beginning of this function and restore
* at the end no matter how many we use, because we can not assure EFI runtime
* service functions will comply with gcc calling convention, too.
*/
.text
ENTRY(efi_call_phys)
/*
* 0. The function can only be called in Linux kernel. So CS has been
* set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
* the values of these registers are the same. And, the corresponding
* GDT entries are identical. So I will do nothing about segment reg
* and GDT, but change GDT base register in prelog and epilog.
*/
/*
* 1. Because we haven't been relocated by this point we need to
* use relative addressing.
*/
call 1f
1: popl %edx
subl $1b, %edx
/*
* 2. Now on the top of stack is the return
* address in the caller of efi_call_phys(), then parameter 1,
* parameter 2, ..., param n. To make things easy, we save the return
* address of efi_call_phys in a global variable.
*/
popl %ecx
movl %ecx, saved_return_addr(%edx)
/* get the function pointer into ECX*/
popl %ecx
movl %ecx, efi_rt_function_ptr(%edx)
/*
* 3. Call the physical function.
*/
call *%ecx
/*
* 4. Balance the stack. And because EAX contain the return value,
* we'd better not clobber it. We need to calculate our address
* again because %ecx and %edx are not preserved across EFI function
* calls.
*/
call 1f
1: popl %edx
subl $1b, %edx
movl efi_rt_function_ptr(%edx), %ecx
pushl %ecx
/*
* 10. Push the saved return address onto the stack and return.
*/
movl saved_return_addr(%edx), %ecx
pushl %ecx
ret
ENDPROC(efi_call_phys)
.previous
.data
saved_return_addr:
.long 0
efi_rt_function_ptr:
.long 0

View file

@ -0,0 +1,5 @@
#include <asm/segment.h>
#include <asm/msr.h>
#include <asm/processor-flags.h>
#include "../../platform/efi/efi_stub_64.S"

View file

@ -0,0 +1,196 @@
/*
* Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming
*
* Early support for invoking 32-bit EFI services from a 64-bit kernel.
*
* Because this thunking occurs before ExitBootServices() we have to
* restore the firmware's 32-bit GDT before we make EFI serivce calls,
* since the firmware's 32-bit IDT is still currently installed and it
* needs to be able to service interrupts.
*
* On the plus side, we don't have to worry about mangling 64-bit
* addresses into 32-bits because we're executing with an identify
* mapped pagetable and haven't transitioned to 64-bit virtual addresses
* yet.
*/
#include <linux/linkage.h>
#include <asm/msr.h>
#include <asm/page_types.h>
#include <asm/processor-flags.h>
#include <asm/segment.h>
.code64
.text
ENTRY(efi64_thunk)
push %rbp
push %rbx
subq $8, %rsp
leaq efi_exit32(%rip), %rax
movl %eax, 4(%rsp)
leaq efi_gdt64(%rip), %rax
movl %eax, (%rsp)
movl %eax, 2(%rax) /* Fixup the gdt base address */
movl %ds, %eax
push %rax
movl %es, %eax
push %rax
movl %ss, %eax
push %rax
/*
* Convert x86-64 ABI params to i386 ABI
*/
subq $32, %rsp
movl %esi, 0x0(%rsp)
movl %edx, 0x4(%rsp)
movl %ecx, 0x8(%rsp)
movq %r8, %rsi
movl %esi, 0xc(%rsp)
movq %r9, %rsi
movl %esi, 0x10(%rsp)
sgdt save_gdt(%rip)
leaq 1f(%rip), %rbx
movq %rbx, func_rt_ptr(%rip)
/*
* Switch to gdt with 32-bit segments. This is the firmware GDT
* that was installed when the kernel started executing. This
* pointer was saved at the EFI stub entry point in head_64.S.
*/
leaq efi32_boot_gdt(%rip), %rax
lgdt (%rax)
pushq $__KERNEL_CS
leaq efi_enter32(%rip), %rax
pushq %rax
lretq
1: addq $32, %rsp
lgdt save_gdt(%rip)
pop %rbx
movl %ebx, %ss
pop %rbx
movl %ebx, %es
pop %rbx
movl %ebx, %ds
/*
* Convert 32-bit status code into 64-bit.
*/
test %rax, %rax
jz 1f
movl %eax, %ecx
andl $0x0fffffff, %ecx
andl $0xf0000000, %eax
shl $32, %rax
or %rcx, %rax
1:
addq $8, %rsp
pop %rbx
pop %rbp
ret
ENDPROC(efi64_thunk)
ENTRY(efi_exit32)
movq func_rt_ptr(%rip), %rax
push %rax
mov %rdi, %rax
ret
ENDPROC(efi_exit32)
.code32
/*
* EFI service pointer must be in %edi.
*
* The stack should represent the 32-bit calling convention.
*/
ENTRY(efi_enter32)
movl $__KERNEL_DS, %eax
movl %eax, %ds
movl %eax, %es
movl %eax, %ss
/* Reload pgtables */
movl %cr3, %eax
movl %eax, %cr3
/* Disable paging */
movl %cr0, %eax
btrl $X86_CR0_PG_BIT, %eax
movl %eax, %cr0
/* Disable long mode via EFER */
movl $MSR_EFER, %ecx
rdmsr
btrl $_EFER_LME, %eax
wrmsr
call *%edi
/* We must preserve return value */
movl %eax, %edi
/*
* Some firmware will return with interrupts enabled. Be sure to
* disable them before we switch GDTs.
*/
cli
movl 56(%esp), %eax
movl %eax, 2(%eax)
lgdtl (%eax)
movl %cr4, %eax
btsl $(X86_CR4_PAE_BIT), %eax
movl %eax, %cr4
movl %cr3, %eax
movl %eax, %cr3
movl $MSR_EFER, %ecx
rdmsr
btsl $_EFER_LME, %eax
wrmsr
xorl %eax, %eax
lldt %ax
movl 60(%esp), %eax
pushl $__KERNEL_CS
pushl %eax
/* Enable paging */
movl %cr0, %eax
btsl $X86_CR0_PG_BIT, %eax
movl %eax, %cr0
lret
ENDPROC(efi_enter32)
.data
.balign 8
.global efi32_boot_gdt
efi32_boot_gdt: .word 0
.quad 0
save_gdt: .word 0
.quad 0
func_rt_ptr: .quad 0
.global efi_gdt64
efi_gdt64:
.word efi_gdt64_end - efi_gdt64
.long 0 /* Filled out by user */
.word 0
.quad 0x0000000000000000 /* NULL descriptor */
.quad 0x00af9a000000ffff /* __KERNEL_CS */
.quad 0x00cf92000000ffff /* __KERNEL_DS */
.quad 0x0080890000000000 /* TS descriptor */
.quad 0x0000000000000000 /* TS continued */
efi_gdt64_end:

View file

@ -0,0 +1,247 @@
/*
* linux/boot/head.S
*
* Copyright (C) 1991, 1992, 1993 Linus Torvalds
*/
/*
* head.S contains the 32-bit startup code.
*
* NOTE!!! Startup happens at absolute address 0x00001000, which is also where
* the page directory will exist. The startup code will be overwritten by
* the page directory. [According to comments etc elsewhere on a compressed
* kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
*
* Page 0 is deliberately kept safe, since System Management Mode code in
* laptops may need to access the BIOS data stored there. This is also
* useful for future device drivers that either access the BIOS via VM86
* mode.
*/
/*
* High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
*/
.text
#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/page_types.h>
#include <asm/boot.h>
#include <asm/asm-offsets.h>
__HEAD
ENTRY(startup_32)
#ifdef CONFIG_EFI_STUB
jmp preferred_addr
/*
* We don't need the return address, so set up the stack so
* efi_main() can find its arguments.
*/
ENTRY(efi_pe_entry)
add $0x4, %esp
call 1f
1: popl %esi
subl $1b, %esi
popl %ecx
movl %ecx, efi32_config(%esi) /* Handle */
popl %ecx
movl %ecx, efi32_config+8(%esi) /* EFI System table pointer */
/* Relocate efi_config->call() */
leal efi32_config(%esi), %eax
add %esi, 88(%eax)
pushl %eax
call make_boot_params
cmpl $0, %eax
je fail
movl %esi, BP_code32_start(%eax)
popl %ecx
pushl %eax
pushl %ecx
jmp 2f /* Skip efi_config initialization */
ENTRY(efi32_stub_entry)
add $0x4, %esp
popl %ecx
popl %edx
call 1f
1: popl %esi
subl $1b, %esi
movl %ecx, efi32_config(%esi) /* Handle */
movl %edx, efi32_config+8(%esi) /* EFI System table pointer */
/* Relocate efi_config->call() */
leal efi32_config(%esi), %eax
add %esi, 88(%eax)
pushl %eax
2:
call efi_main
cmpl $0, %eax
movl %eax, %esi
jne 2f
fail:
/* EFI init failed, so hang. */
hlt
jmp fail
2:
movl BP_code32_start(%esi), %eax
leal preferred_addr(%eax), %eax
jmp *%eax
preferred_addr:
#endif
cld
/*
* Test KEEP_SEGMENTS flag to see if the bootloader is asking
* us to not reload segments
*/
testb $(1<<6), BP_loadflags(%esi)
jnz 1f
cli
movl $__BOOT_DS, %eax
movl %eax, %ds
movl %eax, %es
movl %eax, %fs
movl %eax, %gs
movl %eax, %ss
1:
/*
* Calculate the delta between where we were compiled to run
* at and where we were actually loaded at. This can only be done
* with a short local call on x86. Nothing else will tell us what
* address we are running at. The reserved chunk of the real-mode
* data at 0x1e4 (defined as a scratch field) are used as the stack
* for this calculation. Only 4 bytes are needed.
*/
leal (BP_scratch+4)(%esi), %esp
call 1f
1: popl %ebp
subl $1b, %ebp
/*
* %ebp contains the address we are loaded at by the boot loader and %ebx
* contains the address where we should move the kernel image temporarily
* for safe in-place decompression.
*/
#ifdef CONFIG_RELOCATABLE
movl %ebp, %ebx
movl BP_kernel_alignment(%esi), %eax
decl %eax
addl %eax, %ebx
notl %eax
andl %eax, %ebx
cmpl $LOAD_PHYSICAL_ADDR, %ebx
jge 1f
#endif
movl $LOAD_PHYSICAL_ADDR, %ebx
1:
/* Target address to relocate to for decompression */
addl $z_extract_offset, %ebx
/* Set up the stack */
leal boot_stack_end(%ebx), %esp
/* Zero EFLAGS */
pushl $0
popfl
/*
* Copy the compressed kernel to the end of our buffer
* where decompression in place becomes safe.
*/
pushl %esi
leal (_bss-4)(%ebp), %esi
leal (_bss-4)(%ebx), %edi
movl $(_bss - startup_32), %ecx
shrl $2, %ecx
std
rep movsl
cld
popl %esi
/*
* Jump to the relocated address.
*/
leal relocated(%ebx), %eax
jmp *%eax
ENDPROC(startup_32)
.text
relocated:
/*
* Clear BSS (stack is currently empty)
*/
xorl %eax, %eax
leal _bss(%ebx), %edi
leal _ebss(%ebx), %ecx
subl %edi, %ecx
shrl $2, %ecx
rep stosl
/*
* Adjust our own GOT
*/
leal _got(%ebx), %edx
leal _egot(%ebx), %ecx
1:
cmpl %ecx, %edx
jae 2f
addl %ebx, (%edx)
addl $4, %edx
jmp 1b
2:
/*
* Do the decompression, and jump to the new kernel..
*/
/* push arguments for decompress_kernel: */
pushl $z_run_size /* size of kernel with .bss and .brk */
pushl $z_output_len /* decompressed length, end of relocs */
leal z_extract_offset_negative(%ebx), %ebp
pushl %ebp /* output address */
pushl $z_input_len /* input_len */
leal input_data(%ebx), %eax
pushl %eax /* input_data */
leal boot_heap(%ebx), %eax
pushl %eax /* heap area */
pushl %esi /* real mode pointer */
call decompress_kernel /* returns kernel location in %eax */
addl $28, %esp
/*
* Jump to the decompressed kernel.
*/
xorl %ebx, %ebx
jmp *%eax
#ifdef CONFIG_EFI_STUB
.data
efi32_config:
.fill 11,8,0
.long efi_call_phys
.long 0
.byte 0
#endif
/*
* Stack and heap for uncompression
*/
.bss
.balign 4
boot_heap:
.fill BOOT_HEAP_SIZE, 1, 0
boot_stack:
.fill BOOT_STACK_SIZE, 1, 0
boot_stack_end:

View file

@ -0,0 +1,479 @@
/*
* linux/boot/head.S
*
* Copyright (C) 1991, 1992, 1993 Linus Torvalds
*/
/*
* head.S contains the 32-bit startup code.
*
* NOTE!!! Startup happens at absolute address 0x00001000, which is also where
* the page directory will exist. The startup code will be overwritten by
* the page directory. [According to comments etc elsewhere on a compressed
* kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
*
* Page 0 is deliberately kept safe, since System Management Mode code in
* laptops may need to access the BIOS data stored there. This is also
* useful for future device drivers that either access the BIOS via VM86
* mode.
*/
/*
* High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
*/
.code32
.text
#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/boot.h>
#include <asm/msr.h>
#include <asm/processor-flags.h>
#include <asm/asm-offsets.h>
__HEAD
.code32
ENTRY(startup_32)
/*
* 32bit entry is 0 and it is ABI so immutable!
* If we come here directly from a bootloader,
* kernel(text+data+bss+brk) ramdisk, zero_page, command line
* all need to be under the 4G limit.
*/
cld
/*
* Test KEEP_SEGMENTS flag to see if the bootloader is asking
* us to not reload segments
*/
testb $(1<<6), BP_loadflags(%esi)
jnz 1f
cli
movl $(__BOOT_DS), %eax
movl %eax, %ds
movl %eax, %es
movl %eax, %ss
1:
/*
* Calculate the delta between where we were compiled to run
* at and where we were actually loaded at. This can only be done
* with a short local call on x86. Nothing else will tell us what
* address we are running at. The reserved chunk of the real-mode
* data at 0x1e4 (defined as a scratch field) are used as the stack
* for this calculation. Only 4 bytes are needed.
*/
leal (BP_scratch+4)(%esi), %esp
call 1f
1: popl %ebp
subl $1b, %ebp
/* setup a stack and make sure cpu supports long mode. */
movl $boot_stack_end, %eax
addl %ebp, %eax
movl %eax, %esp
call verify_cpu
testl %eax, %eax
jnz no_longmode
/*
* Compute the delta between where we were compiled to run at
* and where the code will actually run at.
*
* %ebp contains the address we are loaded at by the boot loader and %ebx
* contains the address where we should move the kernel image temporarily
* for safe in-place decompression.
*/
#ifdef CONFIG_RELOCATABLE
movl %ebp, %ebx
movl BP_kernel_alignment(%esi), %eax
decl %eax
addl %eax, %ebx
notl %eax
andl %eax, %ebx
cmpl $LOAD_PHYSICAL_ADDR, %ebx
jge 1f
#endif
movl $LOAD_PHYSICAL_ADDR, %ebx
1:
/* Target address to relocate to for decompression */
addl $z_extract_offset, %ebx
/*
* Prepare for entering 64 bit mode
*/
/* Load new GDT with the 64bit segments using 32bit descriptor */
leal gdt(%ebp), %eax
movl %eax, gdt+2(%ebp)
lgdt gdt(%ebp)
/* Enable PAE mode */
movl %cr4, %eax
orl $X86_CR4_PAE, %eax
movl %eax, %cr4
/*
* Build early 4G boot pagetable
*/
/* Initialize Page tables to 0 */
leal pgtable(%ebx), %edi
xorl %eax, %eax
movl $((4096*6)/4), %ecx
rep stosl
/* Build Level 4 */
leal pgtable + 0(%ebx), %edi
leal 0x1007 (%edi), %eax
movl %eax, 0(%edi)
/* Build Level 3 */
leal pgtable + 0x1000(%ebx), %edi
leal 0x1007(%edi), %eax
movl $4, %ecx
1: movl %eax, 0x00(%edi)
addl $0x00001000, %eax
addl $8, %edi
decl %ecx
jnz 1b
/* Build Level 2 */
leal pgtable + 0x2000(%ebx), %edi
movl $0x00000183, %eax
movl $2048, %ecx
1: movl %eax, 0(%edi)
addl $0x00200000, %eax
addl $8, %edi
decl %ecx
jnz 1b
/* Enable the boot page tables */
leal pgtable(%ebx), %eax
movl %eax, %cr3
/* Enable Long mode in EFER (Extended Feature Enable Register) */
movl $MSR_EFER, %ecx
rdmsr
btsl $_EFER_LME, %eax
wrmsr
/* After gdt is loaded */
xorl %eax, %eax
lldt %ax
movl $0x20, %eax
ltr %ax
/*
* Setup for the jump to 64bit mode
*
* When the jump is performend we will be in long mode but
* in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
* (and in turn EFER.LMA = 1). To jump into 64bit mode we use
* the new gdt/idt that has __KERNEL_CS with CS.L = 1.
* We place all of the values on our mini stack so lret can
* used to perform that far jump.
*/
pushl $__KERNEL_CS
leal startup_64(%ebp), %eax
#ifdef CONFIG_EFI_MIXED
movl efi32_config(%ebp), %ebx
cmp $0, %ebx
jz 1f
leal handover_entry(%ebp), %eax
1:
#endif
pushl %eax
/* Enter paged protected Mode, activating Long Mode */
movl $(X86_CR0_PG | X86_CR0_PE), %eax /* Enable Paging and Protected mode */
movl %eax, %cr0
/* Jump from 32bit compatibility mode into 64bit mode. */
lret
ENDPROC(startup_32)
#ifdef CONFIG_EFI_MIXED
.org 0x190
ENTRY(efi32_stub_entry)
add $0x4, %esp /* Discard return address */
popl %ecx
popl %edx
popl %esi
leal (BP_scratch+4)(%esi), %esp
call 1f
1: pop %ebp
subl $1b, %ebp
movl %ecx, efi32_config(%ebp)
movl %edx, efi32_config+8(%ebp)
sgdtl efi32_boot_gdt(%ebp)
leal efi32_config(%ebp), %eax
movl %eax, efi_config(%ebp)
jmp startup_32
ENDPROC(efi32_stub_entry)
#endif
.code64
.org 0x200
ENTRY(startup_64)
/*
* 64bit entry is 0x200 and it is ABI so immutable!
* We come here either from startup_32 or directly from a
* 64bit bootloader.
* If we come here from a bootloader, kernel(text+data+bss+brk),
* ramdisk, zero_page, command line could be above 4G.
* We depend on an identity mapped page table being provided
* that maps our entire kernel(text+data+bss+brk), zero page
* and command line.
*/
#ifdef CONFIG_EFI_STUB
/*
* The entry point for the PE/COFF executable is efi_pe_entry, so
* only legacy boot loaders will execute this jmp.
*/
jmp preferred_addr
ENTRY(efi_pe_entry)
movq %rcx, efi64_config(%rip) /* Handle */
movq %rdx, efi64_config+8(%rip) /* EFI System table pointer */
leaq efi64_config(%rip), %rax
movq %rax, efi_config(%rip)
call 1f
1: popq %rbp
subq $1b, %rbp
/*
* Relocate efi_config->call().
*/
addq %rbp, efi64_config+88(%rip)
movq %rax, %rdi
call make_boot_params
cmpq $0,%rax
je fail
mov %rax, %rsi
leaq startup_32(%rip), %rax
movl %eax, BP_code32_start(%rsi)
jmp 2f /* Skip the relocation */
handover_entry:
call 1f
1: popq %rbp
subq $1b, %rbp
/*
* Relocate efi_config->call().
*/
movq efi_config(%rip), %rax
addq %rbp, 88(%rax)
2:
movq efi_config(%rip), %rdi
call efi_main
movq %rax,%rsi
cmpq $0,%rax
jne 2f
fail:
/* EFI init failed, so hang. */
hlt
jmp fail
2:
movl BP_code32_start(%esi), %eax
leaq preferred_addr(%rax), %rax
jmp *%rax
preferred_addr:
#endif
/* Setup data segments. */
xorl %eax, %eax
movl %eax, %ds
movl %eax, %es
movl %eax, %ss
movl %eax, %fs
movl %eax, %gs
/*
* Compute the decompressed kernel start address. It is where
* we were loaded at aligned to a 2M boundary. %rbp contains the
* decompressed kernel start address.
*
* If it is a relocatable kernel then decompress and run the kernel
* from load address aligned to 2MB addr, otherwise decompress and
* run the kernel from LOAD_PHYSICAL_ADDR
*
* We cannot rely on the calculation done in 32-bit mode, since we
* may have been invoked via the 64-bit entry point.
*/
/* Start with the delta to where the kernel will run at. */
#ifdef CONFIG_RELOCATABLE
leaq startup_32(%rip) /* - $startup_32 */, %rbp
movl BP_kernel_alignment(%rsi), %eax
decl %eax
addq %rax, %rbp
notq %rax
andq %rax, %rbp
cmpq $LOAD_PHYSICAL_ADDR, %rbp
jge 1f
#endif
movq $LOAD_PHYSICAL_ADDR, %rbp
1:
/* Target address to relocate to for decompression */
leaq z_extract_offset(%rbp), %rbx
/* Set up the stack */
leaq boot_stack_end(%rbx), %rsp
/* Zero EFLAGS */
pushq $0
popfq
/*
* Copy the compressed kernel to the end of our buffer
* where decompression in place becomes safe.
*/
pushq %rsi
leaq (_bss-8)(%rip), %rsi
leaq (_bss-8)(%rbx), %rdi
movq $_bss /* - $startup_32 */, %rcx
shrq $3, %rcx
std
rep movsq
cld
popq %rsi
/*
* Jump to the relocated address.
*/
leaq relocated(%rbx), %rax
jmp *%rax
#ifdef CONFIG_EFI_STUB
.org 0x390
ENTRY(efi64_stub_entry)
movq %rdi, efi64_config(%rip) /* Handle */
movq %rsi, efi64_config+8(%rip) /* EFI System table pointer */
leaq efi64_config(%rip), %rax
movq %rax, efi_config(%rip)
movq %rdx, %rsi
jmp handover_entry
ENDPROC(efi64_stub_entry)
#endif
.text
relocated:
/*
* Clear BSS (stack is currently empty)
*/
xorl %eax, %eax
leaq _bss(%rip), %rdi
leaq _ebss(%rip), %rcx
subq %rdi, %rcx
shrq $3, %rcx
rep stosq
/*
* Adjust our own GOT
*/
leaq _got(%rip), %rdx
leaq _egot(%rip), %rcx
1:
cmpq %rcx, %rdx
jae 2f
addq %rbx, (%rdx)
addq $8, %rdx
jmp 1b
2:
/*
* Do the decompression, and jump to the new kernel..
*/
pushq %rsi /* Save the real mode argument */
movq $z_run_size, %r9 /* size of kernel with .bss and .brk */
pushq %r9
movq %rsi, %rdi /* real mode address */
leaq boot_heap(%rip), %rsi /* malloc area for uncompression */
leaq input_data(%rip), %rdx /* input_data */
movl $z_input_len, %ecx /* input_len */
movq %rbp, %r8 /* output target address */
movq $z_output_len, %r9 /* decompressed length, end of relocs */
call decompress_kernel /* returns kernel location in %rax */
popq %r9
popq %rsi
/*
* Jump to the decompressed kernel.
*/
jmp *%rax
.code32
no_longmode:
/* This isn't an x86-64 CPU so hang */
1:
hlt
jmp 1b
#include "../../kernel/verify_cpu.S"
.data
gdt:
.word gdt_end - gdt
.long gdt
.word 0
.quad 0x0000000000000000 /* NULL descriptor */
.quad 0x00af9a000000ffff /* __KERNEL_CS */
.quad 0x00cf92000000ffff /* __KERNEL_DS */
.quad 0x0080890000000000 /* TS descriptor */
.quad 0x0000000000000000 /* TS continued */
gdt_end:
#ifdef CONFIG_EFI_STUB
efi_config:
.quad 0
#ifdef CONFIG_EFI_MIXED
.global efi32_config
efi32_config:
.fill 11,8,0
.quad efi64_thunk
.byte 0
#endif
.global efi64_config
efi64_config:
.fill 11,8,0
.quad efi_call
.byte 1
#endif /* CONFIG_EFI_STUB */
/*
* Stack and heap for uncompression
*/
.bss
.balign 4
boot_heap:
.fill BOOT_HEAP_SIZE, 1, 0
boot_stack:
.fill BOOT_STACK_SIZE, 1, 0
boot_stack_end:
/*
* Space for page tables (not in .bss so not zeroed)
*/
.section ".pgtable","a",@nobits
.balign 4096
pgtable:
.fill 6*4096, 1, 0

View file

@ -0,0 +1,422 @@
/*
* misc.c
*
* This is a collection of several routines from gzip-1.0.3
* adapted for Linux.
*
* malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
* puts by Nick Holloway 1993, better puts by Martin Mares 1995
* High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
*/
#include "misc.h"
#include "../string.h"
/* WARNING!!
* This code is compiled with -fPIC and it is relocated dynamically
* at run time, but no relocation processing is performed.
* This means that it is not safe to place pointers in static structures.
*/
/*
* Getting to provable safe in place decompression is hard.
* Worst case behaviours need to be analyzed.
* Background information:
*
* The file layout is:
* magic[2]
* method[1]
* flags[1]
* timestamp[4]
* extraflags[1]
* os[1]
* compressed data blocks[N]
* crc[4] orig_len[4]
*
* resulting in 18 bytes of non compressed data overhead.
*
* Files divided into blocks
* 1 bit (last block flag)
* 2 bits (block type)
*
* 1 block occurs every 32K -1 bytes or when there 50% compression
* has been achieved. The smallest block type encoding is always used.
*
* stored:
* 32 bits length in bytes.
*
* fixed:
* magic fixed tree.
* symbols.
*
* dynamic:
* dynamic tree encoding.
* symbols.
*
*
* The buffer for decompression in place is the length of the
* uncompressed data, plus a small amount extra to keep the algorithm safe.
* The compressed data is placed at the end of the buffer. The output
* pointer is placed at the start of the buffer and the input pointer
* is placed where the compressed data starts. Problems will occur
* when the output pointer overruns the input pointer.
*
* The output pointer can only overrun the input pointer if the input
* pointer is moving faster than the output pointer. A condition only
* triggered by data whose compressed form is larger than the uncompressed
* form.
*
* The worst case at the block level is a growth of the compressed data
* of 5 bytes per 32767 bytes.
*
* The worst case internal to a compressed block is very hard to figure.
* The worst case can at least be boundined by having one bit that represents
* 32764 bytes and then all of the rest of the bytes representing the very
* very last byte.
*
* All of which is enough to compute an amount of extra data that is required
* to be safe. To avoid problems at the block level allocating 5 extra bytes
* per 32767 bytes of data is sufficient. To avoind problems internal to a
* block adding an extra 32767 bytes (the worst case uncompressed block size)
* is sufficient, to ensure that in the worst case the decompressed data for
* block will stop the byte before the compressed data for a block begins.
* To avoid problems with the compressed data's meta information an extra 18
* bytes are needed. Leading to the formula:
*
* extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size.
*
* Adding 8 bytes per 32K is a bit excessive but much easier to calculate.
* Adding 32768 instead of 32767 just makes for round numbers.
* Adding the decompressor_size is necessary as it musht live after all
* of the data as well. Last I measured the decompressor is about 14K.
* 10K of actual data and 4K of bss.
*
*/
/*
* gzip declarations
*/
#define STATIC static
#undef memcpy
/*
* Use a normal definition of memset() from string.c. There are already
* included header files which expect a definition of memset() and by
* the time we define memset macro, it is too late.
*/
#undef memset
#define memzero(s, n) memset((s), 0, (n))
static void error(char *m);
/*
* This is set up by the setup-routine at boot-time
*/
struct boot_params *real_mode; /* Pointer to real-mode data */
memptr free_mem_ptr;
memptr free_mem_end_ptr;
static char *vidmem;
static int vidport;
static int lines, cols;
#ifdef CONFIG_KERNEL_GZIP
#include "../../../../lib/decompress_inflate.c"
#endif
#ifdef CONFIG_KERNEL_BZIP2
#include "../../../../lib/decompress_bunzip2.c"
#endif
#ifdef CONFIG_KERNEL_LZMA
#include "../../../../lib/decompress_unlzma.c"
#endif
#ifdef CONFIG_KERNEL_XZ
#include "../../../../lib/decompress_unxz.c"
#endif
#ifdef CONFIG_KERNEL_LZO
#include "../../../../lib/decompress_unlzo.c"
#endif
#ifdef CONFIG_KERNEL_LZ4
#include "../../../../lib/decompress_unlz4.c"
#endif
static void scroll(void)
{
int i;
memcpy(vidmem, vidmem + cols * 2, (lines - 1) * cols * 2);
for (i = (lines - 1) * cols * 2; i < lines * cols * 2; i += 2)
vidmem[i] = ' ';
}
#define XMTRDY 0x20
#define TXR 0 /* Transmit register (WRITE) */
#define LSR 5 /* Line Status */
static void serial_putchar(int ch)
{
unsigned timeout = 0xffff;
while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout)
cpu_relax();
outb(ch, early_serial_base + TXR);
}
void __putstr(const char *s)
{
int x, y, pos;
char c;
if (early_serial_base) {
const char *str = s;
while (*str) {
if (*str == '\n')
serial_putchar('\r');
serial_putchar(*str++);
}
}
if (real_mode->screen_info.orig_video_mode == 0 &&
lines == 0 && cols == 0)
return;
x = real_mode->screen_info.orig_x;
y = real_mode->screen_info.orig_y;
while ((c = *s++) != '\0') {
if (c == '\n') {
x = 0;
if (++y >= lines) {
scroll();
y--;
}
} else {
vidmem[(x + cols * y) * 2] = c;
if (++x >= cols) {
x = 0;
if (++y >= lines) {
scroll();
y--;
}
}
}
}
real_mode->screen_info.orig_x = x;
real_mode->screen_info.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
outb(14, vidport);
outb(0xff & (pos >> 9), vidport+1);
outb(15, vidport);
outb(0xff & (pos >> 1), vidport+1);
}
static void error(char *x)
{
error_putstr("\n\n");
error_putstr(x);
error_putstr("\n\n -- System halted");
while (1)
asm("hlt");
}
#if CONFIG_X86_NEED_RELOCS
static void handle_relocations(void *output, unsigned long output_len)
{
int *reloc;
unsigned long delta, map, ptr;
unsigned long min_addr = (unsigned long)output;
unsigned long max_addr = min_addr + output_len;
/*
* Calculate the delta between where vmlinux was linked to load
* and where it was actually loaded.
*/
delta = min_addr - LOAD_PHYSICAL_ADDR;
if (!delta) {
debug_putstr("No relocation needed... ");
return;
}
debug_putstr("Performing relocations... ");
/*
* The kernel contains a table of relocation addresses. Those
* addresses have the final load address of the kernel in virtual
* memory. We are currently working in the self map. So we need to
* create an adjustment for kernel memory addresses to the self map.
* This will involve subtracting out the base address of the kernel.
*/
map = delta - __START_KERNEL_map;
/*
* Process relocations: 32 bit relocations first then 64 bit after.
* Two sets of binary relocations are added to the end of the kernel
* before compression. Each relocation table entry is the kernel
* address of the location which needs to be updated stored as a
* 32-bit value which is sign extended to 64 bits.
*
* Format is:
*
* kernel bits...
* 0 - zero terminator for 64 bit relocations
* 64 bit relocation repeated
* 0 - zero terminator for 32 bit relocations
* 32 bit relocation repeated
*
* So we work backwards from the end of the decompressed image.
*/
for (reloc = output + output_len - sizeof(*reloc); *reloc; reloc--) {
int extended = *reloc;
extended += map;
ptr = (unsigned long)extended;
if (ptr < min_addr || ptr > max_addr)
error("32-bit relocation outside of kernel!\n");
*(uint32_t *)ptr += delta;
}
#ifdef CONFIG_X86_64
for (reloc--; *reloc; reloc--) {
long extended = *reloc;
extended += map;
ptr = (unsigned long)extended;
if (ptr < min_addr || ptr > max_addr)
error("64-bit relocation outside of kernel!\n");
*(uint64_t *)ptr += delta;
}
#endif
}
#else
static inline void handle_relocations(void *output, unsigned long output_len)
{ }
#endif
static void parse_elf(void *output)
{
#ifdef CONFIG_X86_64
Elf64_Ehdr ehdr;
Elf64_Phdr *phdrs, *phdr;
#else
Elf32_Ehdr ehdr;
Elf32_Phdr *phdrs, *phdr;
#endif
void *dest;
int i;
memcpy(&ehdr, output, sizeof(ehdr));
if (ehdr.e_ident[EI_MAG0] != ELFMAG0 ||
ehdr.e_ident[EI_MAG1] != ELFMAG1 ||
ehdr.e_ident[EI_MAG2] != ELFMAG2 ||
ehdr.e_ident[EI_MAG3] != ELFMAG3) {
error("Kernel is not a valid ELF file");
return;
}
debug_putstr("Parsing ELF... ");
phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum);
if (!phdrs)
error("Failed to allocate space for phdrs");
memcpy(phdrs, output + ehdr.e_phoff, sizeof(*phdrs) * ehdr.e_phnum);
for (i = 0; i < ehdr.e_phnum; i++) {
phdr = &phdrs[i];
switch (phdr->p_type) {
case PT_LOAD:
#ifdef CONFIG_RELOCATABLE
dest = output;
dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR);
#else
dest = (void *)(phdr->p_paddr);
#endif
memcpy(dest,
output + phdr->p_offset,
phdr->p_filesz);
break;
default: /* Ignore other PT_* */ break;
}
}
free(phdrs);
}
asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
unsigned char *input_data,
unsigned long input_len,
unsigned char *output,
unsigned long output_len,
unsigned long run_size)
{
unsigned char *output_orig = output;
real_mode = rmode;
sanitize_boot_params(real_mode);
if (real_mode->screen_info.orig_video_mode == 7) {
vidmem = (char *) 0xb0000;
vidport = 0x3b4;
} else {
vidmem = (char *) 0xb8000;
vidport = 0x3d4;
}
lines = real_mode->screen_info.orig_video_lines;
cols = real_mode->screen_info.orig_video_cols;
console_init();
debug_putstr("early console in decompress_kernel\n");
free_mem_ptr = heap; /* Heap */
free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
/*
* The memory hole needed for the kernel is the larger of either
* the entire decompressed kernel plus relocation table, or the
* entire decompressed kernel plus .bss and .brk sections.
*/
output = choose_kernel_location(input_data, input_len, output,
output_len > run_size ? output_len
: run_size);
/* Validate memory location choices. */
if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
error("Destination address inappropriately aligned");
#ifdef CONFIG_X86_64
if (heap > 0x3fffffffffffUL)
error("Destination address too large");
#else
if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff))
error("Destination address too large");
#endif
#ifndef CONFIG_RELOCATABLE
if ((unsigned long)output != LOAD_PHYSICAL_ADDR)
error("Wrong destination address");
#endif
debug_putstr("\nDecompressing Linux... ");
decompress(input_data, input_len, NULL, NULL, output, NULL, error);
parse_elf(output);
/*
* 32-bit always performs relocations. 64-bit relocations are only
* needed if kASLR has chosen a different load address.
*/
if (!IS_ENABLED(CONFIG_X86_64) || output != output_orig)
handle_relocations(output, output_len);
debug_putstr("done.\nBooting the kernel.\n");
return output;
}

View file

@ -0,0 +1,86 @@
#ifndef BOOT_COMPRESSED_MISC_H
#define BOOT_COMPRESSED_MISC_H
/*
* we have to be careful, because no indirections are allowed here, and
* paravirt_ops is a kind of one. As it will only run in baremetal anyway,
* we just keep it from happening
*/
#undef CONFIG_PARAVIRT
#ifdef CONFIG_X86_32
#define _ASM_X86_DESC_H 1
#endif
#include <linux/linkage.h>
#include <linux/screen_info.h>
#include <linux/elf.h>
#include <linux/io.h>
#include <asm/page.h>
#include <asm/boot.h>
#include <asm/bootparam.h>
#include <asm/bootparam_utils.h>
#define BOOT_BOOT_H
#include "../ctype.h"
#ifdef CONFIG_X86_64
#define memptr long
#else
#define memptr unsigned
#endif
/* misc.c */
extern memptr free_mem_ptr;
extern memptr free_mem_end_ptr;
extern struct boot_params *real_mode; /* Pointer to real-mode data */
void __putstr(const char *s);
#define error_putstr(__x) __putstr(__x)
#ifdef CONFIG_X86_VERBOSE_BOOTUP
#define debug_putstr(__x) __putstr(__x)
#else
static inline void debug_putstr(const char *s)
{ }
#endif
#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE
/* cmdline.c */
int cmdline_find_option(const char *option, char *buffer, int bufsize);
int cmdline_find_option_bool(const char *option);
#endif
#if CONFIG_RANDOMIZE_BASE
/* aslr.c */
unsigned char *choose_kernel_location(unsigned char *input,
unsigned long input_size,
unsigned char *output,
unsigned long output_size);
/* cpuflags.c */
bool has_cpuflag(int flag);
#else
static inline
unsigned char *choose_kernel_location(unsigned char *input,
unsigned long input_size,
unsigned char *output,
unsigned long output_size)
{
return output;
}
#endif
#ifdef CONFIG_EARLY_PRINTK
/* early_serial_console.c */
extern int early_serial_base;
void console_init(void);
#else
static const int early_serial_base;
static inline void console_init(void)
{ }
#endif
#endif

View file

@ -0,0 +1,104 @@
/* ----------------------------------------------------------------------- *
*
* Copyright (C) 2009 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*
* H. Peter Anvin <hpa@linux.intel.com>
*
* ----------------------------------------------------------------------- */
/*
* Compute the desired load offset from a compressed program; outputs
* a small assembly wrapper with the appropriate symbols defined.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <inttypes.h>
#include <tools/le_byteshift.h>
int main(int argc, char *argv[])
{
uint32_t olen;
long ilen;
unsigned long offs;
unsigned long run_size;
FILE *f = NULL;
int retval = 1;
if (argc < 3) {
fprintf(stderr, "Usage: %s compressed_file run_size\n",
argv[0]);
goto bail;
}
/* Get the information for the compressed kernel image first */
f = fopen(argv[1], "r");
if (!f) {
perror(argv[1]);
goto bail;
}
if (fseek(f, -4L, SEEK_END)) {
perror(argv[1]);
}
if (fread(&olen, sizeof(olen), 1, f) != 1) {
perror(argv[1]);
goto bail;
}
ilen = ftell(f);
olen = get_unaligned_le32(&olen);
/*
* Now we have the input (compressed) and output (uncompressed)
* sizes, compute the necessary decompression offset...
*/
offs = (olen > ilen) ? olen - ilen : 0;
offs += olen >> 12; /* Add 8 bytes for each 32K block */
offs += 64*1024 + 128; /* Add 64K + 128 bytes slack */
offs = (offs+4095) & ~4095; /* Round to a 4K boundary */
run_size = atoi(argv[2]);
printf(".section \".rodata..compressed\",\"a\",@progbits\n");
printf(".globl z_input_len\n");
printf("z_input_len = %lu\n", ilen);
printf(".globl z_output_len\n");
printf("z_output_len = %lu\n", (unsigned long)olen);
printf(".globl z_extract_offset\n");
printf("z_extract_offset = 0x%lx\n", offs);
/* z_extract_offset_negative allows simplification of head_32.S */
printf(".globl z_extract_offset_negative\n");
printf("z_extract_offset_negative = -0x%lx\n", offs);
printf(".globl z_run_size\n");
printf("z_run_size = %lu\n", run_size);
printf(".globl input_data, input_data_end\n");
printf("input_data:\n");
printf(".incbin \"%s\"\n", argv[1]);
printf("input_data_end:\n");
retval = 0;
bail:
if (f)
fclose(f);
return retval;
}

View file

@ -0,0 +1,41 @@
#include "../string.c"
#ifdef CONFIG_X86_32
void *memcpy(void *dest, const void *src, size_t n)
{
int d0, d1, d2;
asm volatile(
"rep ; movsl\n\t"
"movl %4,%%ecx\n\t"
"rep ; movsb\n\t"
: "=&c" (d0), "=&D" (d1), "=&S" (d2)
: "0" (n >> 2), "g" (n & 3), "1" (dest), "2" (src)
: "memory");
return dest;
}
#else
void *memcpy(void *dest, const void *src, size_t n)
{
long d0, d1, d2;
asm volatile(
"rep ; movsq\n\t"
"movq %4,%%rcx\n\t"
"rep ; movsb\n\t"
: "=&c" (d0), "=&D" (d1), "=&S" (d2)
: "0" (n >> 3), "g" (n & 7), "1" (dest), "2" (src)
: "memory");
return dest;
}
#endif
void *memset(void *s, int c, size_t n)
{
int i;
char *ss = s;
for (i = 0; i < n; i++)
ss[i] = c;
return s;
}

View file

@ -0,0 +1,74 @@
#include <asm-generic/vmlinux.lds.h>
OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
#undef i386
#include <asm/cache.h>
#include <asm/page_types.h>
#ifdef CONFIG_X86_64
OUTPUT_ARCH(i386:x86-64)
ENTRY(startup_64)
#else
OUTPUT_ARCH(i386)
ENTRY(startup_32)
#endif
SECTIONS
{
/* Be careful parts of head_64.S assume startup_32 is at
* address 0.
*/
. = 0;
.head.text : {
_head = . ;
HEAD_TEXT
_ehead = . ;
}
.rodata..compressed : {
*(.rodata..compressed)
}
.text : {
_text = .; /* Text */
*(.text)
*(.text.*)
_etext = . ;
}
.rodata : {
_rodata = . ;
*(.rodata) /* read-only data */
*(.rodata.*)
_erodata = . ;
}
.got : {
_got = .;
KEEP(*(.got.plt))
KEEP(*(.got))
_egot = .;
}
.data : {
_data = . ;
*(.data)
*(.data.*)
_edata = . ;
}
. = ALIGN(L1_CACHE_BYTES);
.bss : {
_bss = . ;
*(.bss)
*(.bss.*)
*(COMMON)
. = ALIGN(8); /* For convenience during zeroing */
_ebss = .;
}
#ifdef CONFIG_X86_64
. = ALIGN(PAGE_SIZE);
.pgtable : {
_pgtable = . ;
*(.pgtable)
_epgtable = . ;
}
#endif
_end = .;
}

87
arch/x86/boot/copy.S Normal file
View file

@ -0,0 +1,87 @@
/* ----------------------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
#include <linux/linkage.h>
/*
* Memory copy routines
*/
.code16
.text
GLOBAL(memcpy)
pushw %si
pushw %di
movw %ax, %di
movw %dx, %si
pushw %cx
shrw $2, %cx
rep; movsl
popw %cx
andw $3, %cx
rep; movsb
popw %di
popw %si
retl
ENDPROC(memcpy)
GLOBAL(memset)
pushw %di
movw %ax, %di
movzbl %dl, %eax
imull $0x01010101,%eax
pushw %cx
shrw $2, %cx
rep; stosl
popw %cx
andw $3, %cx
rep; stosb
popw %di
retl
ENDPROC(memset)
GLOBAL(copy_from_fs)
pushw %ds
pushw %fs
popw %ds
calll memcpy
popw %ds
retl
ENDPROC(copy_from_fs)
GLOBAL(copy_to_fs)
pushw %es
pushw %fs
popw %es
calll memcpy
popw %es
retl
ENDPROC(copy_to_fs)
#if 0 /* Not currently used, but can be enabled as needed */
GLOBAL(copy_from_gs)
pushw %ds
pushw %gs
popw %ds
calll memcpy
popw %ds
retl
ENDPROC(copy_from_gs)
GLOBAL(copy_to_gs)
pushw %es
pushw %gs
popw %es
calll memcpy
popw %es
retl
ENDPROC(copy_to_gs)
#endif

99
arch/x86/boot/cpu.c Normal file
View file

@ -0,0 +1,99 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007-2008 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* arch/x86/boot/cpu.c
*
* Check for obligatory CPU features and abort if the features are not
* present.
*/
#include "boot.h"
#ifdef CONFIG_X86_FEATURE_NAMES
#include "cpustr.h"
#endif
static char *cpu_name(int level)
{
static char buf[6];
if (level == 64) {
return "x86-64";
} else {
if (level == 15)
level = 6;
sprintf(buf, "i%d86", level);
return buf;
}
}
static void show_cap_strs(u32 *err_flags)
{
int i, j;
#ifdef CONFIG_X86_FEATURE_NAMES
const unsigned char *msg_strs = (const unsigned char *)x86_cap_strs;
for (i = 0; i < NCAPINTS; i++) {
u32 e = err_flags[i];
for (j = 0; j < 32; j++) {
if (msg_strs[0] < i ||
(msg_strs[0] == i && msg_strs[1] < j)) {
/* Skip to the next string */
msg_strs += 2;
while (*msg_strs++)
;
}
if (e & 1) {
if (msg_strs[0] == i &&
msg_strs[1] == j &&
msg_strs[2])
printf("%s ", msg_strs+2);
else
printf("%d:%d ", i, j);
}
e >>= 1;
}
}
#else
for (i = 0; i < NCAPINTS; i++) {
u32 e = err_flags[i];
for (j = 0; j < 32; j++) {
if (e & 1)
printf("%d:%d ", i, j);
e >>= 1;
}
}
#endif
}
int validate_cpu(void)
{
u32 *err_flags;
int cpu_level, req_level;
check_cpu(&cpu_level, &req_level, &err_flags);
if (cpu_level < req_level) {
printf("This kernel requires an %s CPU, ",
cpu_name(req_level));
printf("but only detected an %s CPU.\n",
cpu_name(cpu_level));
return -1;
}
if (err_flags) {
puts("This kernel requires the following features "
"not present on the CPU:\n");
show_cap_strs(err_flags);
putchar('\n');
return -1;
} else {
return 0;
}
}

187
arch/x86/boot/cpucheck.c Normal file
View file

@ -0,0 +1,187 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Check for obligatory CPU features and abort if the features are not
* present. This code should be compilable as 16-, 32- or 64-bit
* code, so be very careful with types and inline assembly.
*
* This code should not contain any messages; that requires an
* additional wrapper.
*
* As written, this code is not safe for inclusion into the kernel
* proper (after FPU initialization, in particular).
*/
#ifdef _SETUP
# include "boot.h"
#endif
#include <linux/types.h>
#include <asm/processor-flags.h>
#include <asm/required-features.h>
#include <asm/msr-index.h>
#include "string.h"
static u32 err_flags[NCAPINTS];
static const int req_level = CONFIG_X86_MINIMUM_CPU_FAMILY;
static const u32 req_flags[NCAPINTS] =
{
REQUIRED_MASK0,
REQUIRED_MASK1,
0, /* REQUIRED_MASK2 not implemented in this file */
0, /* REQUIRED_MASK3 not implemented in this file */
REQUIRED_MASK4,
0, /* REQUIRED_MASK5 not implemented in this file */
REQUIRED_MASK6,
0, /* REQUIRED_MASK7 not implemented in this file */
};
#define A32(a, b, c, d) (((d) << 24)+((c) << 16)+((b) << 8)+(a))
static int is_amd(void)
{
return cpu_vendor[0] == A32('A', 'u', 't', 'h') &&
cpu_vendor[1] == A32('e', 'n', 't', 'i') &&
cpu_vendor[2] == A32('c', 'A', 'M', 'D');
}
static int is_centaur(void)
{
return cpu_vendor[0] == A32('C', 'e', 'n', 't') &&
cpu_vendor[1] == A32('a', 'u', 'r', 'H') &&
cpu_vendor[2] == A32('a', 'u', 'l', 's');
}
static int is_transmeta(void)
{
return cpu_vendor[0] == A32('G', 'e', 'n', 'u') &&
cpu_vendor[1] == A32('i', 'n', 'e', 'T') &&
cpu_vendor[2] == A32('M', 'x', '8', '6');
}
static int is_intel(void)
{
return cpu_vendor[0] == A32('G', 'e', 'n', 'u') &&
cpu_vendor[1] == A32('i', 'n', 'e', 'I') &&
cpu_vendor[2] == A32('n', 't', 'e', 'l');
}
/* Returns a bitmask of which words we have error bits in */
static int check_cpuflags(void)
{
u32 err;
int i;
err = 0;
for (i = 0; i < NCAPINTS; i++) {
err_flags[i] = req_flags[i] & ~cpu.flags[i];
if (err_flags[i])
err |= 1 << i;
}
return err;
}
/*
* Returns -1 on error.
*
* *cpu_level is set to the current CPU level; *req_level to the required
* level. x86-64 is considered level 64 for this purpose.
*
* *err_flags_ptr is set to the flags error array if there are flags missing.
*/
int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
{
int err;
memset(&cpu.flags, 0, sizeof cpu.flags);
cpu.level = 3;
if (has_eflag(X86_EFLAGS_AC))
cpu.level = 4;
get_cpuflags();
err = check_cpuflags();
if (test_bit(X86_FEATURE_LM, cpu.flags))
cpu.level = 64;
if (err == 0x01 &&
!(err_flags[0] &
~((1 << X86_FEATURE_XMM)|(1 << X86_FEATURE_XMM2))) &&
is_amd()) {
/* If this is an AMD and we're only missing SSE+SSE2, try to
turn them on */
u32 ecx = MSR_K7_HWCR;
u32 eax, edx;
asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
eax &= ~(1 << 15);
asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
get_cpuflags(); /* Make sure it really did something */
err = check_cpuflags();
} else if (err == 0x01 &&
!(err_flags[0] & ~(1 << X86_FEATURE_CX8)) &&
is_centaur() && cpu.model >= 6) {
/* If this is a VIA C3, we might have to enable CX8
explicitly */
u32 ecx = MSR_VIA_FCR;
u32 eax, edx;
asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
eax |= (1<<1)|(1<<7);
asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
set_bit(X86_FEATURE_CX8, cpu.flags);
err = check_cpuflags();
} else if (err == 0x01 && is_transmeta()) {
/* Transmeta might have masked feature bits in word 0 */
u32 ecx = 0x80860004;
u32 eax, edx;
u32 level = 1;
asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
asm("wrmsr" : : "a" (~0), "d" (edx), "c" (ecx));
asm("cpuid"
: "+a" (level), "=d" (cpu.flags[0])
: : "ecx", "ebx");
asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
err = check_cpuflags();
} else if (err == 0x01 &&
!(err_flags[0] & ~(1 << X86_FEATURE_PAE)) &&
is_intel() && cpu.level == 6 &&
(cpu.model == 9 || cpu.model == 13)) {
/* PAE is disabled on this Pentium M but can be forced */
if (cmdline_find_option_bool("forcepae")) {
puts("WARNING: Forcing PAE in CPU flags\n");
set_bit(X86_FEATURE_PAE, cpu.flags);
err = check_cpuflags();
}
else {
puts("WARNING: PAE disabled. Use parameter 'forcepae' to enable at your own risk!\n");
}
}
if (err_flags_ptr)
*err_flags_ptr = err ? err_flags : NULL;
if (cpu_level_ptr)
*cpu_level_ptr = cpu.level;
if (req_level_ptr)
*req_level_ptr = req_level;
return (cpu.level < req_level || err) ? -1 : 0;
}

119
arch/x86/boot/cpuflags.c Normal file
View file

@ -0,0 +1,119 @@
#include <linux/types.h>
#include "bitops.h"
#include <asm/processor-flags.h>
#include <asm/required-features.h>
#include <asm/msr-index.h>
#include "cpuflags.h"
struct cpu_features cpu;
u32 cpu_vendor[3];
static bool loaded_flags;
static int has_fpu(void)
{
u16 fcw = -1, fsw = -1;
unsigned long cr0;
asm volatile("mov %%cr0,%0" : "=r" (cr0));
if (cr0 & (X86_CR0_EM|X86_CR0_TS)) {
cr0 &= ~(X86_CR0_EM|X86_CR0_TS);
asm volatile("mov %0,%%cr0" : : "r" (cr0));
}
asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
: "+m" (fsw), "+m" (fcw));
return fsw == 0 && (fcw & 0x103f) == 0x003f;
}
/*
* For building the 16-bit code we want to explicitly specify 32-bit
* push/pop operations, rather than just saying 'pushf' or 'popf' and
* letting the compiler choose. But this is also included from the
* compressed/ directory where it may be 64-bit code, and thus needs
* to be 'pushfq' or 'popfq' in that case.
*/
#ifdef __x86_64__
#define PUSHF "pushfq"
#define POPF "popfq"
#else
#define PUSHF "pushfl"
#define POPF "popfl"
#endif
int has_eflag(unsigned long mask)
{
unsigned long f0, f1;
asm volatile(PUSHF " \n\t"
PUSHF " \n\t"
"pop %0 \n\t"
"mov %0,%1 \n\t"
"xor %2,%1 \n\t"
"push %1 \n\t"
POPF " \n\t"
PUSHF " \n\t"
"pop %1 \n\t"
POPF
: "=&r" (f0), "=&r" (f1)
: "ri" (mask));
return !!((f0^f1) & mask);
}
/* Handle x86_32 PIC using ebx. */
#if defined(__i386__) && defined(__PIC__)
# define EBX_REG "=r"
#else
# define EBX_REG "=b"
#endif
static inline void cpuid(u32 id, u32 *a, u32 *b, u32 *c, u32 *d)
{
asm volatile(".ifnc %%ebx,%3 ; movl %%ebx,%3 ; .endif \n\t"
"cpuid \n\t"
".ifnc %%ebx,%3 ; xchgl %%ebx,%3 ; .endif \n\t"
: "=a" (*a), "=c" (*c), "=d" (*d), EBX_REG (*b)
: "a" (id)
);
}
void get_cpuflags(void)
{
u32 max_intel_level, max_amd_level;
u32 tfms;
u32 ignored;
if (loaded_flags)
return;
loaded_flags = true;
if (has_fpu())
set_bit(X86_FEATURE_FPU, cpu.flags);
if (has_eflag(X86_EFLAGS_ID)) {
cpuid(0x0, &max_intel_level, &cpu_vendor[0], &cpu_vendor[2],
&cpu_vendor[1]);
if (max_intel_level >= 0x00000001 &&
max_intel_level <= 0x0000ffff) {
cpuid(0x1, &tfms, &ignored, &cpu.flags[4],
&cpu.flags[0]);
cpu.level = (tfms >> 8) & 15;
cpu.model = (tfms >> 4) & 15;
if (cpu.level >= 6)
cpu.model += ((tfms >> 16) & 0xf) << 4;
}
cpuid(0x80000000, &max_amd_level, &ignored, &ignored,
&ignored);
if (max_amd_level >= 0x80000001 &&
max_amd_level <= 0x8000ffff) {
cpuid(0x80000001, &ignored, &ignored, &cpu.flags[6],
&cpu.flags[1]);
}
}
}

19
arch/x86/boot/cpuflags.h Normal file
View file

@ -0,0 +1,19 @@
#ifndef BOOT_CPUFLAGS_H
#define BOOT_CPUFLAGS_H
#include <asm/cpufeature.h>
#include <asm/processor-flags.h>
struct cpu_features {
int level; /* Family, or 64 for x86-64 */
int model;
u32 flags[NCAPINTS];
};
extern struct cpu_features cpu;
extern u32 cpu_vendor[3];
int has_eflag(unsigned long mask);
void get_cpuflags(void);
#endif

21
arch/x86/boot/ctype.h Normal file
View file

@ -0,0 +1,21 @@
#ifndef BOOT_ISDIGIT_H
#define BOOT_ISDIGIT_H
static inline int isdigit(int ch)
{
return (ch >= '0') && (ch <= '9');
}
static inline int isxdigit(int ch)
{
if (isdigit(ch))
return true;
if ((ch >= 'a') && (ch <= 'f'))
return true;
return (ch >= 'A') && (ch <= 'F');
}
#endif

View file

@ -0,0 +1,151 @@
#include "boot.h"
#define DEFAULT_SERIAL_PORT 0x3f8 /* ttyS0 */
#define XMTRDY 0x20
#define DLAB 0x80
#define TXR 0 /* Transmit register (WRITE) */
#define RXR 0 /* Receive register (READ) */
#define IER 1 /* Interrupt Enable */
#define IIR 2 /* Interrupt ID */
#define FCR 2 /* FIFO control */
#define LCR 3 /* Line control */
#define MCR 4 /* Modem control */
#define LSR 5 /* Line Status */
#define MSR 6 /* Modem Status */
#define DLL 0 /* Divisor Latch Low */
#define DLH 1 /* Divisor latch High */
#define DEFAULT_BAUD 9600
static void early_serial_init(int port, int baud)
{
unsigned char c;
unsigned divisor;
outb(0x3, port + LCR); /* 8n1 */
outb(0, port + IER); /* no interrupt */
outb(0, port + FCR); /* no fifo */
outb(0x3, port + MCR); /* DTR + RTS */
divisor = 115200 / baud;
c = inb(port + LCR);
outb(c | DLAB, port + LCR);
outb(divisor & 0xff, port + DLL);
outb((divisor >> 8) & 0xff, port + DLH);
outb(c & ~DLAB, port + LCR);
early_serial_base = port;
}
static void parse_earlyprintk(void)
{
int baud = DEFAULT_BAUD;
char arg[32];
int pos = 0;
int port = 0;
if (cmdline_find_option("earlyprintk", arg, sizeof arg) > 0) {
char *e;
if (!strncmp(arg, "serial", 6)) {
port = DEFAULT_SERIAL_PORT;
pos += 6;
}
if (arg[pos] == ',')
pos++;
/*
* make sure we have
* "serial,0x3f8,115200"
* "serial,ttyS0,115200"
* "ttyS0,115200"
*/
if (pos == 7 && !strncmp(arg + pos, "0x", 2)) {
port = simple_strtoull(arg + pos, &e, 16);
if (port == 0 || arg + pos == e)
port = DEFAULT_SERIAL_PORT;
else
pos = e - arg;
} else if (!strncmp(arg + pos, "ttyS", 4)) {
static const int bases[] = { 0x3f8, 0x2f8 };
int idx = 0;
if (!strncmp(arg + pos, "ttyS", 4))
pos += 4;
if (arg[pos++] == '1')
idx = 1;
port = bases[idx];
}
if (arg[pos] == ',')
pos++;
baud = simple_strtoull(arg + pos, &e, 0);
if (baud == 0 || arg + pos == e)
baud = DEFAULT_BAUD;
}
if (port)
early_serial_init(port, baud);
}
#define BASE_BAUD (1843200/16)
static unsigned int probe_baud(int port)
{
unsigned char lcr, dll, dlh;
unsigned int quot;
lcr = inb(port + LCR);
outb(lcr | DLAB, port + LCR);
dll = inb(port + DLL);
dlh = inb(port + DLH);
outb(lcr, port + LCR);
quot = (dlh << 8) | dll;
return BASE_BAUD / quot;
}
static void parse_console_uart8250(void)
{
char optstr[64], *options;
int baud = DEFAULT_BAUD;
int port = 0;
/*
* console=uart8250,io,0x3f8,115200n8
* need to make sure it is last one console !
*/
if (cmdline_find_option("console", optstr, sizeof optstr) <= 0)
return;
options = optstr;
if (!strncmp(options, "uart8250,io,", 12))
port = simple_strtoull(options + 12, &options, 0);
else if (!strncmp(options, "uart,io,", 8))
port = simple_strtoull(options + 8, &options, 0);
else
return;
if (options && (options[0] == ','))
baud = simple_strtoull(options + 1, &options, 0);
else
baud = probe_baud(port);
if (port)
early_serial_init(port, baud);
}
void console_init(void)
{
parse_earlyprintk();
if (!early_serial_base)
parse_console_uart8250();
}

182
arch/x86/boot/edd.c Normal file
View file

@ -0,0 +1,182 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Get EDD BIOS disk information
*/
#include "boot.h"
#include <linux/edd.h>
#include "string.h"
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
/*
* Read the MBR (first sector) from a specific device.
*/
static int read_mbr(u8 devno, void *buf)
{
struct biosregs ireg, oreg;
initregs(&ireg);
ireg.ax = 0x0201; /* Legacy Read, one sector */
ireg.cx = 0x0001; /* Sector 0-0-1 */
ireg.dl = devno;
ireg.bx = (size_t)buf;
intcall(0x13, &ireg, &oreg);
return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */
}
static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig)
{
int sector_size;
char *mbrbuf_ptr, *mbrbuf_end;
u32 buf_base, mbr_base;
extern char _end[];
u16 mbr_magic;
sector_size = ei->params.bytes_per_sector;
if (!sector_size)
sector_size = 512; /* Best available guess */
/* Produce a naturally aligned buffer on the heap */
buf_base = (ds() << 4) + (u32)&_end;
mbr_base = (buf_base+sector_size-1) & ~(sector_size-1);
mbrbuf_ptr = _end + (mbr_base-buf_base);
mbrbuf_end = mbrbuf_ptr + sector_size;
/* Make sure we actually have space on the heap... */
if (!(boot_params.hdr.loadflags & CAN_USE_HEAP))
return -1;
if (mbrbuf_end > (char *)(size_t)boot_params.hdr.heap_end_ptr)
return -1;
memset(mbrbuf_ptr, 0, sector_size);
if (read_mbr(devno, mbrbuf_ptr))
return -1;
*mbrsig = *(u32 *)&mbrbuf_ptr[EDD_MBR_SIG_OFFSET];
mbr_magic = *(u16 *)&mbrbuf_ptr[510];
/* check for valid MBR magic */
return mbr_magic == 0xAA55 ? 0 : -1;
}
static int get_edd_info(u8 devno, struct edd_info *ei)
{
struct biosregs ireg, oreg;
memset(ei, 0, sizeof *ei);
/* Check Extensions Present */
initregs(&ireg);
ireg.ah = 0x41;
ireg.bx = EDDMAGIC1;
ireg.dl = devno;
intcall(0x13, &ireg, &oreg);
if (oreg.eflags & X86_EFLAGS_CF)
return -1; /* No extended information */
if (oreg.bx != EDDMAGIC2)
return -1;
ei->device = devno;
ei->version = oreg.ah; /* EDD version number */
ei->interface_support = oreg.cx; /* EDD functionality subsets */
/* Extended Get Device Parameters */
ei->params.length = sizeof(ei->params);
ireg.ah = 0x48;
ireg.si = (size_t)&ei->params;
intcall(0x13, &ireg, &oreg);
/* Get legacy CHS parameters */
/* Ralf Brown recommends setting ES:DI to 0:0 */
ireg.ah = 0x08;
ireg.es = 0;
intcall(0x13, &ireg, &oreg);
if (!(oreg.eflags & X86_EFLAGS_CF)) {
ei->legacy_max_cylinder = oreg.ch + ((oreg.cl & 0xc0) << 2);
ei->legacy_max_head = oreg.dh;
ei->legacy_sectors_per_track = oreg.cl & 0x3f;
}
return 0;
}
void query_edd(void)
{
char eddarg[8];
int do_mbr = 1;
#ifdef CONFIG_EDD_OFF
int do_edd = 0;
#else
int do_edd = 1;
#endif
int be_quiet;
int devno;
struct edd_info ei, *edp;
u32 *mbrptr;
if (cmdline_find_option("edd", eddarg, sizeof eddarg) > 0) {
if (!strcmp(eddarg, "skipmbr") || !strcmp(eddarg, "skip")) {
do_edd = 1;
do_mbr = 0;
}
else if (!strcmp(eddarg, "off"))
do_edd = 0;
else if (!strcmp(eddarg, "on"))
do_edd = 1;
}
be_quiet = cmdline_find_option_bool("quiet");
edp = boot_params.eddbuf;
mbrptr = boot_params.edd_mbr_sig_buffer;
if (!do_edd)
return;
/* Bugs in OnBoard or AddOnCards Bios may hang the EDD probe,
* so give a hint if this happens.
*/
if (!be_quiet)
printf("Probing EDD (edd=off to disable)... ");
for (devno = 0x80; devno < 0x80+EDD_MBR_SIG_MAX; devno++) {
/*
* Scan the BIOS-supported hard disks and query EDD
* information...
*/
if (!get_edd_info(devno, &ei)
&& boot_params.eddbuf_entries < EDDMAXNR) {
memcpy(edp, &ei, sizeof ei);
edp++;
boot_params.eddbuf_entries++;
}
if (do_mbr && !read_mbr_sig(devno, &ei, mbrptr++))
boot_params.edd_mbr_sig_buf_entries = devno-0x80+1;
}
if (!be_quiet)
printf("ok\n");
}
#endif

528
arch/x86/boot/header.S Normal file
View file

@ -0,0 +1,528 @@
/*
* header.S
*
* Copyright (C) 1991, 1992 Linus Torvalds
*
* Based on bootsect.S and setup.S
* modified by more people than can be counted
*
* Rewritten as a common file by H. Peter Anvin (Apr 2007)
*
* BIG FAT NOTE: We're in real mode using 64k segments. Therefore segment
* addresses must be multiplied by 16 to obtain their respective linear
* addresses. To avoid confusion, linear addresses are written using leading
* hex while segment addresses are written as segment:offset.
*
*/
#include <asm/segment.h>
#include <generated/utsrelease.h>
#include <asm/boot.h>
#include <asm/e820.h>
#include <asm/page_types.h>
#include <asm/setup.h>
#include <asm/bootparam.h>
#include "boot.h"
#include "voffset.h"
#include "zoffset.h"
BOOTSEG = 0x07C0 /* original address of boot-sector */
SYSSEG = 0x1000 /* historical load address >> 4 */
#ifndef SVGA_MODE
#define SVGA_MODE ASK_VGA
#endif
#ifndef ROOT_RDONLY
#define ROOT_RDONLY 1
#endif
.code16
.section ".bstext", "ax"
.global bootsect_start
bootsect_start:
#ifdef CONFIG_EFI_STUB
# "MZ", MS-DOS header
.byte 0x4d
.byte 0x5a
#endif
# Normalize the start address
ljmp $BOOTSEG, $start2
start2:
movw %cs, %ax
movw %ax, %ds
movw %ax, %es
movw %ax, %ss
xorw %sp, %sp
sti
cld
movw $bugger_off_msg, %si
msg_loop:
lodsb
andb %al, %al
jz bs_die
movb $0xe, %ah
movw $7, %bx
int $0x10
jmp msg_loop
bs_die:
# Allow the user to press a key, then reboot
xorw %ax, %ax
int $0x16
int $0x19
# int 0x19 should never return. In case it does anyway,
# invoke the BIOS reset code...
ljmp $0xf000,$0xfff0
#ifdef CONFIG_EFI_STUB
.org 0x3c
#
# Offset to the PE header.
#
.long pe_header
#endif /* CONFIG_EFI_STUB */
.section ".bsdata", "a"
bugger_off_msg:
.ascii "Use a boot loader.\r\n"
.ascii "\n"
.ascii "Remove disk and press any key to reboot...\r\n"
.byte 0
#ifdef CONFIG_EFI_STUB
pe_header:
.ascii "PE"
.word 0
coff_header:
#ifdef CONFIG_X86_32
.word 0x14c # i386
#else
.word 0x8664 # x86-64
#endif
.word 4 # nr_sections
.long 0 # TimeDateStamp
.long 0 # PointerToSymbolTable
.long 1 # NumberOfSymbols
.word section_table - optional_header # SizeOfOptionalHeader
#ifdef CONFIG_X86_32
.word 0x306 # Characteristics.
# IMAGE_FILE_32BIT_MACHINE |
# IMAGE_FILE_DEBUG_STRIPPED |
# IMAGE_FILE_EXECUTABLE_IMAGE |
# IMAGE_FILE_LINE_NUMS_STRIPPED
#else
.word 0x206 # Characteristics
# IMAGE_FILE_DEBUG_STRIPPED |
# IMAGE_FILE_EXECUTABLE_IMAGE |
# IMAGE_FILE_LINE_NUMS_STRIPPED
#endif
optional_header:
#ifdef CONFIG_X86_32
.word 0x10b # PE32 format
#else
.word 0x20b # PE32+ format
#endif
.byte 0x02 # MajorLinkerVersion
.byte 0x14 # MinorLinkerVersion
# Filled in by build.c
.long 0 # SizeOfCode
.long 0 # SizeOfInitializedData
.long 0 # SizeOfUninitializedData
# Filled in by build.c
.long 0x0000 # AddressOfEntryPoint
.long 0x0200 # BaseOfCode
#ifdef CONFIG_X86_32
.long 0 # data
#endif
extra_header_fields:
#ifdef CONFIG_X86_32
.long 0 # ImageBase
#else
.quad 0 # ImageBase
#endif
.long CONFIG_PHYSICAL_ALIGN # SectionAlignment
.long 0x20 # FileAlignment
.word 0 # MajorOperatingSystemVersion
.word 0 # MinorOperatingSystemVersion
.word 0 # MajorImageVersion
.word 0 # MinorImageVersion
.word 0 # MajorSubsystemVersion
.word 0 # MinorSubsystemVersion
.long 0 # Win32VersionValue
#
# The size of the bzImage is written in tools/build.c
#
.long 0 # SizeOfImage
.long 0x200 # SizeOfHeaders
.long 0 # CheckSum
.word 0xa # Subsystem (EFI application)
.word 0 # DllCharacteristics
#ifdef CONFIG_X86_32
.long 0 # SizeOfStackReserve
.long 0 # SizeOfStackCommit
.long 0 # SizeOfHeapReserve
.long 0 # SizeOfHeapCommit
#else
.quad 0 # SizeOfStackReserve
.quad 0 # SizeOfStackCommit
.quad 0 # SizeOfHeapReserve
.quad 0 # SizeOfHeapCommit
#endif
.long 0 # LoaderFlags
.long 0x6 # NumberOfRvaAndSizes
.quad 0 # ExportTable
.quad 0 # ImportTable
.quad 0 # ResourceTable
.quad 0 # ExceptionTable
.quad 0 # CertificationTable
.quad 0 # BaseRelocationTable
# Section table
section_table:
#
# The offset & size fields are filled in by build.c.
#
.ascii ".setup"
.byte 0
.byte 0
.long 0
.long 0x0 # startup_{32,64}
.long 0 # Size of initialized data
# on disk
.long 0x0 # startup_{32,64}
.long 0 # PointerToRelocations
.long 0 # PointerToLineNumbers
.word 0 # NumberOfRelocations
.word 0 # NumberOfLineNumbers
.long 0x60500020 # Characteristics (section flags)
#
# The EFI application loader requires a relocation section
# because EFI applications must be relocatable. The .reloc
# offset & size fields are filled in by build.c.
#
.ascii ".reloc"
.byte 0
.byte 0
.long 0
.long 0
.long 0 # SizeOfRawData
.long 0 # PointerToRawData
.long 0 # PointerToRelocations
.long 0 # PointerToLineNumbers
.word 0 # NumberOfRelocations
.word 0 # NumberOfLineNumbers
.long 0x42100040 # Characteristics (section flags)
#
# The offset & size fields are filled in by build.c.
#
.ascii ".text"
.byte 0
.byte 0
.byte 0
.long 0
.long 0x0 # startup_{32,64}
.long 0 # Size of initialized data
# on disk
.long 0x0 # startup_{32,64}
.long 0 # PointerToRelocations
.long 0 # PointerToLineNumbers
.word 0 # NumberOfRelocations
.word 0 # NumberOfLineNumbers
.long 0x60500020 # Characteristics (section flags)
#
# The offset & size fields are filled in by build.c.
#
.ascii ".bss"
.byte 0
.byte 0
.byte 0
.byte 0
.long 0
.long 0x0
.long 0 # Size of initialized data
# on disk
.long 0x0
.long 0 # PointerToRelocations
.long 0 # PointerToLineNumbers
.word 0 # NumberOfRelocations
.word 0 # NumberOfLineNumbers
.long 0xc8000080 # Characteristics (section flags)
#endif /* CONFIG_EFI_STUB */
# Kernel attributes; used by setup. This is part 1 of the
# header, from the old boot sector.
.section ".header", "a"
.globl sentinel
sentinel: .byte 0xff, 0xff /* Used to detect broken loaders */
.globl hdr
hdr:
setup_sects: .byte 0 /* Filled in by build.c */
root_flags: .word ROOT_RDONLY
syssize: .long 0 /* Filled in by build.c */
ram_size: .word 0 /* Obsolete */
vid_mode: .word SVGA_MODE
root_dev: .word 0 /* Filled in by build.c */
boot_flag: .word 0xAA55
# offset 512, entry point
.globl _start
_start:
# Explicitly enter this as bytes, or the assembler
# tries to generate a 3-byte jump here, which causes
# everything else to push off to the wrong offset.
.byte 0xeb # short (2-byte) jump
.byte start_of_setup-1f
1:
# Part 2 of the header, from the old setup.S
.ascii "HdrS" # header signature
.word 0x020d # header version number (>= 0x0105)
# or else old loadlin-1.5 will fail)
.globl realmode_swtch
realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
start_sys_seg: .word SYSSEG # obsolete and meaningless, but just
# in case something decided to "use" it
.word kernel_version-512 # pointing to kernel version string
# above section of header is compatible
# with loadlin-1.5 (header v1.5). Don't
# change it.
type_of_loader: .byte 0 # 0 means ancient bootloader, newer
# bootloaders know to change this.
# See Documentation/x86/boot.txt for
# assigned ids
# flags, unused bits must be zero (RFU) bit within loadflags
loadflags:
.byte LOADED_HIGH # The kernel is to be loaded high
setup_move_size: .word 0x8000 # size to move, when setup is not
# loaded at 0x90000. We will move setup
# to 0x90000 then just before jumping
# into the kernel. However, only the
# loader knows how much data behind
# us also needs to be loaded.
code32_start: # here loaders can put a different
# start address for 32-bit code.
.long 0x100000 # 0x100000 = default for big kernel
ramdisk_image: .long 0 # address of loaded ramdisk image
# Here the loader puts the 32-bit
# address where it loaded the image.
# This only will be read by the kernel.
ramdisk_size: .long 0 # its size in bytes
bootsect_kludge:
.long 0 # obsolete
heap_end_ptr: .word _end+STACK_SIZE-512
# (Header version 0x0201 or later)
# space from here (exclusive) down to
# end of setup code can be used by setup
# for local heap purposes.
ext_loader_ver:
.byte 0 # Extended boot loader version
ext_loader_type:
.byte 0 # Extended boot loader type
cmd_line_ptr: .long 0 # (Header version 0x0202 or later)
# If nonzero, a 32-bit pointer
# to the kernel command line.
# The command line should be
# located between the start of
# setup and the end of low
# memory (0xa0000), or it may
# get overwritten before it
# gets read. If this field is
# used, there is no longer
# anything magical about the
# 0x90000 segment; the setup
# can be located anywhere in
# low memory 0x10000 or higher.
initrd_addr_max: .long 0x7fffffff
# (Header version 0x0203 or later)
# The highest safe address for
# the contents of an initrd
# The current kernel allows up to 4 GB,
# but leave it at 2 GB to avoid
# possible bootloader bugs.
kernel_alignment: .long CONFIG_PHYSICAL_ALIGN #physical addr alignment
#required for protected mode
#kernel
#ifdef CONFIG_RELOCATABLE
relocatable_kernel: .byte 1
#else
relocatable_kernel: .byte 0
#endif
min_alignment: .byte MIN_KERNEL_ALIGN_LG2 # minimum alignment
xloadflags:
#ifdef CONFIG_X86_64
# define XLF0 XLF_KERNEL_64 /* 64-bit kernel */
#else
# define XLF0 0
#endif
#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_X86_64)
/* kernel/boot_param/ramdisk could be loaded above 4g */
# define XLF1 XLF_CAN_BE_LOADED_ABOVE_4G
#else
# define XLF1 0
#endif
#ifdef CONFIG_EFI_STUB
# ifdef CONFIG_EFI_MIXED
# define XLF23 (XLF_EFI_HANDOVER_32|XLF_EFI_HANDOVER_64)
# else
# ifdef CONFIG_X86_64
# define XLF23 XLF_EFI_HANDOVER_64 /* 64-bit EFI handover ok */
# else
# define XLF23 XLF_EFI_HANDOVER_32 /* 32-bit EFI handover ok */
# endif
# endif
#else
# define XLF23 0
#endif
#if defined(CONFIG_X86_64) && defined(CONFIG_EFI) && defined(CONFIG_KEXEC)
# define XLF4 XLF_EFI_KEXEC
#else
# define XLF4 0
#endif
.word XLF0 | XLF1 | XLF23 | XLF4
cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
#added with boot protocol
#version 2.06
hardware_subarch: .long 0 # subarchitecture, added with 2.07
# default to 0 for normal x86 PC
hardware_subarch_data: .quad 0
payload_offset: .long ZO_input_data
payload_length: .long ZO_z_input_len
setup_data: .quad 0 # 64-bit physical pointer to
# single linked list of
# struct setup_data
pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr
#define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_extract_offset)
#define VO_INIT_SIZE (VO__end - VO__text)
#if ZO_INIT_SIZE > VO_INIT_SIZE
#define INIT_SIZE ZO_INIT_SIZE
#else
#define INIT_SIZE VO_INIT_SIZE
#endif
init_size: .long INIT_SIZE # kernel initialization size
handover_offset: .long 0 # Filled in by build.c
# End of setup header #####################################################
.section ".entrytext", "ax"
start_of_setup:
# Force %es = %ds
movw %ds, %ax
movw %ax, %es
cld
# Apparently some ancient versions of LILO invoked the kernel with %ss != %ds,
# which happened to work by accident for the old code. Recalculate the stack
# pointer if %ss is invalid. Otherwise leave it alone, LOADLIN sets up the
# stack behind its own code, so we can't blindly put it directly past the heap.
movw %ss, %dx
cmpw %ax, %dx # %ds == %ss?
movw %sp, %dx
je 2f # -> assume %sp is reasonably set
# Invalid %ss, make up a new stack
movw $_end, %dx
testb $CAN_USE_HEAP, loadflags
jz 1f
movw heap_end_ptr, %dx
1: addw $STACK_SIZE, %dx
jnc 2f
xorw %dx, %dx # Prevent wraparound
2: # Now %dx should point to the end of our stack space
andw $~3, %dx # dword align (might as well...)
jnz 3f
movw $0xfffc, %dx # Make sure we're not zero
3: movw %ax, %ss
movzwl %dx, %esp # Clear upper half of %esp
sti # Now we should have a working stack
# We will have entered with %cs = %ds+0x20, normalize %cs so
# it is on par with the other segments.
pushw %ds
pushw $6f
lretw
6:
# Check signature at end of setup
cmpl $0x5a5aaa55, setup_sig
jne setup_bad
# Zero the bss
movw $__bss_start, %di
movw $_end+3, %cx
xorl %eax, %eax
subw %di, %cx
shrw $2, %cx
rep; stosl
# Jump to C code (should not return)
calll main
# Setup corrupt somehow...
setup_bad:
movl $setup_corrupt, %eax
calll puts
# Fall through...
.globl die
.type die, @function
die:
hlt
jmp die
.size die, .-die
.section ".initdata", "a"
setup_corrupt:
.byte 7
.string "No setup signature found...\n"

59
arch/x86/boot/install.sh Normal file
View file

@ -0,0 +1,59 @@
#!/bin/sh
#
# This file is subject to the terms and conditions of the GNU General Public
# License. See the file "COPYING" in the main directory of this archive
# for more details.
#
# Copyright (C) 1995 by Linus Torvalds
#
# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
#
# "make install" script for i386 architecture
#
# Arguments:
# $1 - kernel version
# $2 - kernel image file
# $3 - kernel map file
# $4 - default install path (blank if root directory)
#
verify () {
if [ ! -f "$1" ]; then
echo "" 1>&2
echo " *** Missing file: $1" 1>&2
echo ' *** You need to run "make" before "make install".' 1>&2
echo "" 1>&2
exit 1
fi
}
# Make sure the files actually exist
verify "$2"
verify "$3"
# User may have a custom install script
if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
# Default install - same as make zlilo
if [ -f $4/vmlinuz ]; then
mv $4/vmlinuz $4/vmlinuz.old
fi
if [ -f $4/System.map ]; then
mv $4/System.map $4/System.old
fi
cat $2 > $4/vmlinuz
cp $3 $4/System.map
if [ -x /sbin/lilo ]; then
/sbin/lilo
elif [ -x /etc/lilo/install ]; then
/etc/lilo/install
else
sync
echo "Cannot find LILO."
fi

185
arch/x86/boot/main.c Normal file
View file

@ -0,0 +1,185 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Main module for the real-mode kernel code
*/
#include "boot.h"
#include "string.h"
struct boot_params boot_params __attribute__((aligned(16)));
char *HEAP = _end;
char *heap_end = _end; /* Default end of heap = no heap */
/*
* Copy the header into the boot parameter block. Since this
* screws up the old-style command line protocol, adjust by
* filling in the new-style command line pointer instead.
*/
static void copy_boot_params(void)
{
struct old_cmdline {
u16 cl_magic;
u16 cl_offset;
};
const struct old_cmdline * const oldcmd =
(const struct old_cmdline *)OLD_CL_ADDRESS;
BUILD_BUG_ON(sizeof boot_params != 4096);
memcpy(&boot_params.hdr, &hdr, sizeof hdr);
if (!boot_params.hdr.cmd_line_ptr &&
oldcmd->cl_magic == OLD_CL_MAGIC) {
/* Old-style command line protocol. */
u16 cmdline_seg;
/* Figure out if the command line falls in the region
of memory that an old kernel would have copied up
to 0x90000... */
if (oldcmd->cl_offset < boot_params.hdr.setup_move_size)
cmdline_seg = ds();
else
cmdline_seg = 0x9000;
boot_params.hdr.cmd_line_ptr =
(cmdline_seg << 4) + oldcmd->cl_offset;
}
}
/*
* Query the keyboard lock status as given by the BIOS, and
* set the keyboard repeat rate to maximum. Unclear why the latter
* is done here; this might be possible to kill off as stale code.
*/
static void keyboard_init(void)
{
struct biosregs ireg, oreg;
initregs(&ireg);
ireg.ah = 0x02; /* Get keyboard status */
intcall(0x16, &ireg, &oreg);
boot_params.kbd_status = oreg.al;
ireg.ax = 0x0305; /* Set keyboard repeat rate */
intcall(0x16, &ireg, NULL);
}
/*
* Get Intel SpeedStep (IST) information.
*/
static void query_ist(void)
{
struct biosregs ireg, oreg;
/* Some older BIOSes apparently crash on this call, so filter
it from machines too old to have SpeedStep at all. */
if (cpu.level < 6)
return;
initregs(&ireg);
ireg.ax = 0xe980; /* IST Support */
ireg.edx = 0x47534943; /* Request value */
intcall(0x15, &ireg, &oreg);
boot_params.ist_info.signature = oreg.eax;
boot_params.ist_info.command = oreg.ebx;
boot_params.ist_info.event = oreg.ecx;
boot_params.ist_info.perf_level = oreg.edx;
}
/*
* Tell the BIOS what CPU mode we intend to run in.
*/
static void set_bios_mode(void)
{
#ifdef CONFIG_X86_64
struct biosregs ireg;
initregs(&ireg);
ireg.ax = 0xec00;
ireg.bx = 2;
intcall(0x15, &ireg, NULL);
#endif
}
static void init_heap(void)
{
char *stack_end;
if (boot_params.hdr.loadflags & CAN_USE_HEAP) {
asm("leal %P1(%%esp),%0"
: "=r" (stack_end) : "i" (-STACK_SIZE));
heap_end = (char *)
((size_t)boot_params.hdr.heap_end_ptr + 0x200);
if (heap_end > stack_end)
heap_end = stack_end;
} else {
/* Boot protocol 2.00 only, no heap available */
puts("WARNING: Ancient bootloader, some functionality "
"may be limited!\n");
}
}
void main(void)
{
/* First, copy the boot header into the "zeropage" */
copy_boot_params();
/* Initialize the early-boot console */
console_init();
if (cmdline_find_option_bool("debug"))
puts("early console in setup code\n");
/* End of heap check */
init_heap();
/* Make sure we have all the proper CPU support */
if (validate_cpu()) {
puts("Unable to boot - please use a kernel appropriate "
"for your CPU.\n");
die();
}
/* Tell the BIOS what CPU mode we intend to run in. */
set_bios_mode();
/* Detect memory layout */
detect_memory();
/* Set keyboard repeat rate (why?) and query the lock flags */
keyboard_init();
/* Query MCA information */
query_mca();
/* Query Intel SpeedStep (IST) information */
query_ist();
/* Query APM information */
#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE)
query_apm_bios();
#endif
/* Query EDD information */
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
query_edd();
#endif
/* Set the video mode */
set_video();
/* Do the last things and invoke protected mode */
go_to_protected_mode();
}

38
arch/x86/boot/mca.c Normal file
View file

@ -0,0 +1,38 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Get the MCA system description table
*/
#include "boot.h"
int query_mca(void)
{
struct biosregs ireg, oreg;
u16 len;
initregs(&ireg);
ireg.ah = 0xc0;
intcall(0x15, &ireg, &oreg);
if (oreg.eflags & X86_EFLAGS_CF)
return -1; /* No MCA present */
set_fs(oreg.es);
len = rdfs16(oreg.bx);
if (len > sizeof(boot_params.sys_desc_table))
len = sizeof(boot_params.sys_desc_table);
copy_from_fs(&boot_params.sys_desc_table, oreg.bx, len);
return 0;
}

136
arch/x86/boot/memory.c Normal file
View file

@ -0,0 +1,136 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Memory detection code
*/
#include "boot.h"
#define SMAP 0x534d4150 /* ASCII "SMAP" */
static int detect_memory_e820(void)
{
int count = 0;
struct biosregs ireg, oreg;
struct e820entry *desc = boot_params.e820_map;
static struct e820entry buf; /* static so it is zeroed */
initregs(&ireg);
ireg.ax = 0xe820;
ireg.cx = sizeof buf;
ireg.edx = SMAP;
ireg.di = (size_t)&buf;
/*
* Note: at least one BIOS is known which assumes that the
* buffer pointed to by one e820 call is the same one as
* the previous call, and only changes modified fields. Therefore,
* we use a temporary buffer and copy the results entry by entry.
*
* This routine deliberately does not try to account for
* ACPI 3+ extended attributes. This is because there are
* BIOSes in the field which report zero for the valid bit for
* all ranges, and we don't currently make any use of the
* other attribute bits. Revisit this if we see the extended
* attribute bits deployed in a meaningful way in the future.
*/
do {
intcall(0x15, &ireg, &oreg);
ireg.ebx = oreg.ebx; /* for next iteration... */
/* BIOSes which terminate the chain with CF = 1 as opposed
to %ebx = 0 don't always report the SMAP signature on
the final, failing, probe. */
if (oreg.eflags & X86_EFLAGS_CF)
break;
/* Some BIOSes stop returning SMAP in the middle of
the search loop. We don't know exactly how the BIOS
screwed up the map at that point, we might have a
partial map, the full map, or complete garbage, so
just return failure. */
if (oreg.eax != SMAP) {
count = 0;
break;
}
*desc++ = buf;
count++;
} while (ireg.ebx && count < ARRAY_SIZE(boot_params.e820_map));
return boot_params.e820_entries = count;
}
static int detect_memory_e801(void)
{
struct biosregs ireg, oreg;
initregs(&ireg);
ireg.ax = 0xe801;
intcall(0x15, &ireg, &oreg);
if (oreg.eflags & X86_EFLAGS_CF)
return -1;
/* Do we really need to do this? */
if (oreg.cx || oreg.dx) {
oreg.ax = oreg.cx;
oreg.bx = oreg.dx;
}
if (oreg.ax > 15*1024) {
return -1; /* Bogus! */
} else if (oreg.ax == 15*1024) {
boot_params.alt_mem_k = (oreg.bx << 6) + oreg.ax;
} else {
/*
* This ignores memory above 16MB if we have a memory
* hole there. If someone actually finds a machine
* with a memory hole at 16MB and no support for
* 0E820h they should probably generate a fake e820
* map.
*/
boot_params.alt_mem_k = oreg.ax;
}
return 0;
}
static int detect_memory_88(void)
{
struct biosregs ireg, oreg;
initregs(&ireg);
ireg.ah = 0x88;
intcall(0x15, &ireg, &oreg);
boot_params.screen_info.ext_mem_k = oreg.ax;
return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */
}
int detect_memory(void)
{
int err = -1;
if (detect_memory_e820() > 0)
err = 0;
if (!detect_memory_e801())
err = 0;
if (!detect_memory_88())
err = 0;
return err;
}

52
arch/x86/boot/mkcpustr.c Normal file
View file

@ -0,0 +1,52 @@
/* ----------------------------------------------------------------------- *
*
* Copyright 2008 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2 or (at your
* option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
/*
* This is a host program to preprocess the CPU strings into a
* compact format suitable for the setup code.
*/
#include <stdio.h>
#include "../include/asm/required-features.h"
#include "../include/asm/disabled-features.h"
#include "../include/asm/cpufeature.h"
#include "../kernel/cpu/capflags.c"
int main(void)
{
int i, j;
const char *str;
printf("static const char x86_cap_strs[] =\n");
for (i = 0; i < NCAPINTS; i++) {
for (j = 0; j < 32; j++) {
str = x86_cap_flags[i*32+j];
if (i == NCAPINTS-1 && j == 31) {
/* The last entry must be unconditional; this
also consumes the compiler-added null
character */
if (!str)
str = "";
printf("\t\"\\x%02x\\x%02x\"\"%s\"\n",
i, j, str);
} else if (str) {
printf("#if REQUIRED_MASK%d & (1 << %d)\n"
"\t\"\\x%02x\\x%02x\"\"%s\\0\"\n"
"#endif\n",
i, j, i, j, str);
}
}
}
printf("\t;\n");
return 0;
}

View file

@ -0,0 +1,17 @@
#
# mtools configuration file for "make (b)zdisk"
#
# Actual floppy drive
drive a:
file="/dev/fd0"
# 1.44 MB floppy disk image
drive v:
file="@OBJ@/fdimage" cylinders=80 heads=2 sectors=18 filter
# 2.88 MB floppy disk image (mostly for virtual uses)
drive w:
file="@OBJ@/fdimage" cylinders=80 heads=2 sectors=36 filter

126
arch/x86/boot/pm.c Normal file
View file

@ -0,0 +1,126 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Prepare the machine for transition to protected mode.
*/
#include "boot.h"
#include <asm/segment.h>
/*
* Invoke the realmode switch hook if present; otherwise
* disable all interrupts.
*/
static void realmode_switch_hook(void)
{
if (boot_params.hdr.realmode_swtch) {
asm volatile("lcallw *%0"
: : "m" (boot_params.hdr.realmode_swtch)
: "eax", "ebx", "ecx", "edx");
} else {
asm volatile("cli");
outb(0x80, 0x70); /* Disable NMI */
io_delay();
}
}
/*
* Disable all interrupts at the legacy PIC.
*/
static void mask_all_interrupts(void)
{
outb(0xff, 0xa1); /* Mask all interrupts on the secondary PIC */
io_delay();
outb(0xfb, 0x21); /* Mask all but cascade on the primary PIC */
io_delay();
}
/*
* Reset IGNNE# if asserted in the FPU.
*/
static void reset_coprocessor(void)
{
outb(0, 0xf0);
io_delay();
outb(0, 0xf1);
io_delay();
}
/*
* Set up the GDT
*/
struct gdt_ptr {
u16 len;
u32 ptr;
} __attribute__((packed));
static void setup_gdt(void)
{
/* There are machines which are known to not boot with the GDT
being 8-byte unaligned. Intel recommends 16 byte alignment. */
static const u64 boot_gdt[] __attribute__((aligned(16))) = {
/* CS: code, read/execute, 4 GB, base 0 */
[GDT_ENTRY_BOOT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff),
/* DS: data, read/write, 4 GB, base 0 */
[GDT_ENTRY_BOOT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff),
/* TSS: 32-bit tss, 104 bytes, base 4096 */
/* We only have a TSS here to keep Intel VT happy;
we don't actually use it for anything. */
[GDT_ENTRY_BOOT_TSS] = GDT_ENTRY(0x0089, 4096, 103),
};
/* Xen HVM incorrectly stores a pointer to the gdt_ptr, instead
of the gdt_ptr contents. Thus, make it static so it will
stay in memory, at least long enough that we switch to the
proper kernel GDT. */
static struct gdt_ptr gdt;
gdt.len = sizeof(boot_gdt)-1;
gdt.ptr = (u32)&boot_gdt + (ds() << 4);
asm volatile("lgdtl %0" : : "m" (gdt));
}
/*
* Set up the IDT
*/
static void setup_idt(void)
{
static const struct gdt_ptr null_idt = {0, 0};
asm volatile("lidtl %0" : : "m" (null_idt));
}
/*
* Actual invocation sequence
*/
void go_to_protected_mode(void)
{
/* Hook before leaving real mode, also disables interrupts */
realmode_switch_hook();
/* Enable the A20 gate */
if (enable_a20()) {
puts("A20 gate not responding, unable to boot...\n");
die();
}
/* Reset coprocessor (IGNNE#) */
reset_coprocessor();
/* Mask all interrupts in the PIC */
mask_all_interrupts();
/* Actual transition to protected mode... */
setup_idt();
setup_gdt();
protected_mode_jump(boot_params.hdr.code32_start,
(u32)&boot_params + (ds() << 4));
}

77
arch/x86/boot/pmjump.S Normal file
View file

@ -0,0 +1,77 @@
/* ----------------------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* The actual transition into protected mode
*/
#include <asm/boot.h>
#include <asm/processor-flags.h>
#include <asm/segment.h>
#include <linux/linkage.h>
.text
.code16
/*
* void protected_mode_jump(u32 entrypoint, u32 bootparams);
*/
GLOBAL(protected_mode_jump)
movl %edx, %esi # Pointer to boot_params table
xorl %ebx, %ebx
movw %cs, %bx
shll $4, %ebx
addl %ebx, 2f
jmp 1f # Short jump to serialize on 386/486
1:
movw $__BOOT_DS, %cx
movw $__BOOT_TSS, %di
movl %cr0, %edx
orb $X86_CR0_PE, %dl # Protected mode
movl %edx, %cr0
# Transition to 32-bit mode
.byte 0x66, 0xea # ljmpl opcode
2: .long in_pm32 # offset
.word __BOOT_CS # segment
ENDPROC(protected_mode_jump)
.code32
.section ".text32","ax"
GLOBAL(in_pm32)
# Set up data segments for flat 32-bit mode
movl %ecx, %ds
movl %ecx, %es
movl %ecx, %fs
movl %ecx, %gs
movl %ecx, %ss
# The 32-bit code sets up its own stack, but this way we do have
# a valid stack if some debugging hack wants to use it.
addl %ebx, %esp
# Set up TR to make Intel VT happy
ltr %di
# Clear registers to allow for future extensions to the
# 32-bit boot protocol
xorl %ecx, %ecx
xorl %edx, %edx
xorl %ebx, %ebx
xorl %ebp, %ebp
xorl %edi, %edi
# Set up LDTR to make Intel VT happy
lldt %cx
jmpl *%eax # Jump to the 32-bit entrypoint
ENDPROC(in_pm32)

309
arch/x86/boot/printf.c Normal file
View file

@ -0,0 +1,309 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Oh, it's a waste of space, but oh-so-yummy for debugging. This
* version of printf() does not include 64-bit support. "Live with
* it."
*
*/
#include "boot.h"
static int skip_atoi(const char **s)
{
int i = 0;
while (isdigit(**s))
i = i * 10 + *((*s)++) - '0';
return i;
}
#define ZEROPAD 1 /* pad with zero */
#define SIGN 2 /* unsigned/signed long */
#define PLUS 4 /* show plus */
#define SPACE 8 /* space if plus */
#define LEFT 16 /* left justified */
#define SMALL 32 /* Must be 32 == 0x20 */
#define SPECIAL 64 /* 0x */
#define __do_div(n, base) ({ \
int __res; \
__res = ((unsigned long) n) % (unsigned) base; \
n = ((unsigned long) n) / (unsigned) base; \
__res; })
static char *number(char *str, long num, int base, int size, int precision,
int type)
{
/* we are called with base 8, 10 or 16, only, thus don't need "G..." */
static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
char tmp[66];
char c, sign, locase;
int i;
/* locase = 0 or 0x20. ORing digits or letters with 'locase'
* produces same digits or (maybe lowercased) letters */
locase = (type & SMALL);
if (type & LEFT)
type &= ~ZEROPAD;
if (base < 2 || base > 16)
return NULL;
c = (type & ZEROPAD) ? '0' : ' ';
sign = 0;
if (type & SIGN) {
if (num < 0) {
sign = '-';
num = -num;
size--;
} else if (type & PLUS) {
sign = '+';
size--;
} else if (type & SPACE) {
sign = ' ';
size--;
}
}
if (type & SPECIAL) {
if (base == 16)
size -= 2;
else if (base == 8)
size--;
}
i = 0;
if (num == 0)
tmp[i++] = '0';
else
while (num != 0)
tmp[i++] = (digits[__do_div(num, base)] | locase);
if (i > precision)
precision = i;
size -= precision;
if (!(type & (ZEROPAD + LEFT)))
while (size-- > 0)
*str++ = ' ';
if (sign)
*str++ = sign;
if (type & SPECIAL) {
if (base == 8)
*str++ = '0';
else if (base == 16) {
*str++ = '0';
*str++ = ('X' | locase);
}
}
if (!(type & LEFT))
while (size-- > 0)
*str++ = c;
while (i < precision--)
*str++ = '0';
while (i-- > 0)
*str++ = tmp[i];
while (size-- > 0)
*str++ = ' ';
return str;
}
int vsprintf(char *buf, const char *fmt, va_list args)
{
int len;
unsigned long num;
int i, base;
char *str;
const char *s;
int flags; /* flags to number() */
int field_width; /* width of output field */
int precision; /* min. # of digits for integers; max
number of chars for from string */
int qualifier; /* 'h', 'l', or 'L' for integer fields */
for (str = buf; *fmt; ++fmt) {
if (*fmt != '%') {
*str++ = *fmt;
continue;
}
/* process flags */
flags = 0;
repeat:
++fmt; /* this also skips first '%' */
switch (*fmt) {
case '-':
flags |= LEFT;
goto repeat;
case '+':
flags |= PLUS;
goto repeat;
case ' ':
flags |= SPACE;
goto repeat;
case '#':
flags |= SPECIAL;
goto repeat;
case '0':
flags |= ZEROPAD;
goto repeat;
}
/* get field width */
field_width = -1;
if (isdigit(*fmt))
field_width = skip_atoi(&fmt);
else if (*fmt == '*') {
++fmt;
/* it's the next argument */
field_width = va_arg(args, int);
if (field_width < 0) {
field_width = -field_width;
flags |= LEFT;
}
}
/* get the precision */
precision = -1;
if (*fmt == '.') {
++fmt;
if (isdigit(*fmt))
precision = skip_atoi(&fmt);
else if (*fmt == '*') {
++fmt;
/* it's the next argument */
precision = va_arg(args, int);
}
if (precision < 0)
precision = 0;
}
/* get the conversion qualifier */
qualifier = -1;
if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L') {
qualifier = *fmt;
++fmt;
}
/* default base */
base = 10;
switch (*fmt) {
case 'c':
if (!(flags & LEFT))
while (--field_width > 0)
*str++ = ' ';
*str++ = (unsigned char)va_arg(args, int);
while (--field_width > 0)
*str++ = ' ';
continue;
case 's':
s = va_arg(args, char *);
len = strnlen(s, precision);
if (!(flags & LEFT))
while (len < field_width--)
*str++ = ' ';
for (i = 0; i < len; ++i)
*str++ = *s++;
while (len < field_width--)
*str++ = ' ';
continue;
case 'p':
if (field_width == -1) {
field_width = 2 * sizeof(void *);
flags |= ZEROPAD;
}
str = number(str,
(unsigned long)va_arg(args, void *), 16,
field_width, precision, flags);
continue;
case 'n':
if (qualifier == 'l') {
long *ip = va_arg(args, long *);
*ip = (str - buf);
} else {
int *ip = va_arg(args, int *);
*ip = (str - buf);
}
continue;
case '%':
*str++ = '%';
continue;
/* integer number formats - set up the flags and "break" */
case 'o':
base = 8;
break;
case 'x':
flags |= SMALL;
case 'X':
base = 16;
break;
case 'd':
case 'i':
flags |= SIGN;
case 'u':
break;
default:
*str++ = '%';
if (*fmt)
*str++ = *fmt;
else
--fmt;
continue;
}
if (qualifier == 'l')
num = va_arg(args, unsigned long);
else if (qualifier == 'h') {
num = (unsigned short)va_arg(args, int);
if (flags & SIGN)
num = (short)num;
} else if (flags & SIGN)
num = va_arg(args, int);
else
num = va_arg(args, unsigned int);
str = number(str, num, base, field_width, precision, flags);
}
*str = '\0';
return str - buf;
}
int sprintf(char *buf, const char *fmt, ...)
{
va_list args;
int i;
va_start(args, fmt);
i = vsprintf(buf, fmt, args);
va_end(args);
return i;
}
int printf(const char *fmt, ...)
{
char printf_buf[1024];
va_list args;
int printed;
va_start(args, fmt);
printed = vsprintf(printf_buf, fmt, args);
va_end(args);
puts(printf_buf);
return printed;
}

30
arch/x86/boot/regs.c Normal file
View file

@ -0,0 +1,30 @@
/* -----------------------------------------------------------------------
*
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2 or (at your
* option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
/*
* Simple helper function for initializing a register set.
*
* Note that this sets EFLAGS_CF in the input register set; this
* makes it easier to catch functions which do nothing but don't
* explicitly set CF.
*/
#include "boot.h"
#include "string.h"
void initregs(struct biosregs *reg)
{
memset(reg, 0, sizeof *reg);
reg->eflags |= X86_EFLAGS_CF;
reg->ds = ds();
reg->es = ds();
reg->fs = fs();
reg->gs = gs();
}

64
arch/x86/boot/setup.ld Normal file
View file

@ -0,0 +1,64 @@
/*
* setup.ld
*
* Linker script for the i386 setup code
*/
OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
OUTPUT_ARCH(i386)
ENTRY(_start)
SECTIONS
{
. = 0;
.bstext : { *(.bstext) }
.bsdata : { *(.bsdata) }
. = 495;
.header : { *(.header) }
.entrytext : { *(.entrytext) }
.inittext : { *(.inittext) }
.initdata : { *(.initdata) }
__end_init = .;
.text : { *(.text) }
.text32 : { *(.text32) }
. = ALIGN(16);
.rodata : { *(.rodata*) }
.videocards : {
video_cards = .;
*(.videocards)
video_cards_end = .;
}
. = ALIGN(16);
.data : { *(.data*) }
.signature : {
setup_sig = .;
LONG(0x5a5aaa55)
}
. = ALIGN(16);
.bss :
{
__bss_start = .;
*(.bss)
__bss_end = .;
}
. = ALIGN(16);
_end = .;
/DISCARD/ : { *(.note*) }
/*
* The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
*/
. = ASSERT(_end <= 0x8000, "Setup too big!");
. = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!");
/* Necessary for the very-old-loader check to work... */
. = ASSERT(__end_init <= 5*512, "init sections too big!");
}

157
arch/x86/boot/string.c Normal file
View file

@ -0,0 +1,157 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Very basic string functions
*/
#include <linux/types.h>
#include "ctype.h"
int memcmp(const void *s1, const void *s2, size_t len)
{
u8 diff;
asm("repe; cmpsb; setnz %0"
: "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
return diff;
}
int strcmp(const char *str1, const char *str2)
{
const unsigned char *s1 = (const unsigned char *)str1;
const unsigned char *s2 = (const unsigned char *)str2;
int delta = 0;
while (*s1 || *s2) {
delta = *s2 - *s1;
if (delta)
return delta;
s1++;
s2++;
}
return 0;
}
int strncmp(const char *cs, const char *ct, size_t count)
{
unsigned char c1, c2;
while (count) {
c1 = *cs++;
c2 = *ct++;
if (c1 != c2)
return c1 < c2 ? -1 : 1;
if (!c1)
break;
count--;
}
return 0;
}
size_t strnlen(const char *s, size_t maxlen)
{
const char *es = s;
while (*es && maxlen) {
es++;
maxlen--;
}
return (es - s);
}
unsigned int atou(const char *s)
{
unsigned int i = 0;
while (isdigit(*s))
i = i * 10 + (*s++ - '0');
return i;
}
/* Works only for digits and letters, but small and fast */
#define TOLOWER(x) ((x) | 0x20)
static unsigned int simple_guess_base(const char *cp)
{
if (cp[0] == '0') {
if (TOLOWER(cp[1]) == 'x' && isxdigit(cp[2]))
return 16;
else
return 8;
} else {
return 10;
}
}
/**
* simple_strtoull - convert a string to an unsigned long long
* @cp: The start of the string
* @endp: A pointer to the end of the parsed string will be placed here
* @base: The number base to use
*/
unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base)
{
unsigned long long result = 0;
if (!base)
base = simple_guess_base(cp);
if (base == 16 && cp[0] == '0' && TOLOWER(cp[1]) == 'x')
cp += 2;
while (isxdigit(*cp)) {
unsigned int value;
value = isdigit(*cp) ? *cp - '0' : TOLOWER(*cp) - 'a' + 10;
if (value >= base)
break;
result = result * base + value;
cp++;
}
if (endp)
*endp = (char *)cp;
return result;
}
/**
* strlen - Find the length of a string
* @s: The string to be sized
*/
size_t strlen(const char *s)
{
const char *sc;
for (sc = s; *sc != '\0'; ++sc)
/* nothing */;
return sc - s;
}
/**
* strstr - Find the first substring in a %NUL terminated string
* @s1: The string to be searched
* @s2: The string to search for
*/
char *strstr(const char *s1, const char *s2)
{
size_t l1, l2;
l2 = strlen(s2);
if (!l2)
return (char *)s1;
l1 = strlen(s1);
while (l1 >= l2) {
l1--;
if (!memcmp(s1, s2, l2))
return (char *)s1;
s1++;
}
return NULL;
}

21
arch/x86/boot/string.h Normal file
View file

@ -0,0 +1,21 @@
#ifndef BOOT_STRING_H
#define BOOT_STRING_H
/* Undef any of these macros coming from string_32.h. */
#undef memcpy
#undef memset
#undef memcmp
void *memcpy(void *dst, const void *src, size_t len);
void *memset(void *dst, int c, size_t len);
int memcmp(const void *s1, const void *s2, size_t len);
/*
* Access builtin version by default. If one needs to use optimized version,
* do "undef memcpy" in .c file and link against right string.c
*/
#define memcpy(d,s,l) __builtin_memcpy(d,s,l)
#define memset(d,c,l) __builtin_memset(d,c,l)
#define memcmp __builtin_memcmp
#endif /* BOOT_STRING_H */

435
arch/x86/boot/tools/build.c Normal file
View file

@ -0,0 +1,435 @@
/*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright (C) 1997 Martin Mares
* Copyright (C) 2007 H. Peter Anvin
*/
/*
* This file builds a disk-image from three different files:
*
* - setup: 8086 machine code, sets up system parm
* - system: 80386 code for actual system
* - zoffset.h: header with ZO_* defines
*
* It does some checking that all files are of the correct type, and writes
* the result to the specified destination, removing headers and padding to
* the right amount. It also writes some system data to stdout.
*/
/*
* Changes by tytso to allow root device specification
* High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
* Cross compiling fixes by Gertjan van Wingerde, July 1996
* Rewritten by Martin Mares, April 1997
* Substantially overhauled by H. Peter Anvin, April 2007
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <tools/le_byteshift.h>
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
#define DEFAULT_MAJOR_ROOT 0
#define DEFAULT_MINOR_ROOT 0
#define DEFAULT_ROOT_DEV (DEFAULT_MAJOR_ROOT << 8 | DEFAULT_MINOR_ROOT)
/* Minimal number of setup sectors */
#define SETUP_SECT_MIN 5
#define SETUP_SECT_MAX 64
/* This must be large enough to hold the entire setup */
u8 buf[SETUP_SECT_MAX*512];
int is_big_kernel;
#define PECOFF_RELOC_RESERVE 0x20
unsigned long efi32_stub_entry;
unsigned long efi64_stub_entry;
unsigned long efi_pe_entry;
unsigned long startup_64;
/*----------------------------------------------------------------------*/
static const u32 crctab32[] = {
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419,
0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4,
0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07,
0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856,
0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3,
0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a,
0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599,
0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190,
0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e,
0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed,
0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3,
0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010,
0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17,
0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6,
0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615,
0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344,
0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a,
0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1,
0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c,
0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe,
0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c,
0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b,
0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1,
0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278,
0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66,
0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8,
0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b,
0x2d02ef8d
};
static u32 partial_crc32_one(u8 c, u32 crc)
{
return crctab32[(crc ^ c) & 0xff] ^ (crc >> 8);
}
static u32 partial_crc32(const u8 *s, int len, u32 crc)
{
while (len--)
crc = partial_crc32_one(*s++, crc);
return crc;
}
static void die(const char * str, ...)
{
va_list args;
va_start(args, str);
vfprintf(stderr, str, args);
fputc('\n', stderr);
exit(1);
}
static void usage(void)
{
die("Usage: build setup system zoffset.h image");
}
#ifdef CONFIG_EFI_STUB
static void update_pecoff_section_header_fields(char *section_name, u32 vma, u32 size, u32 datasz, u32 offset)
{
unsigned int pe_header;
unsigned short num_sections;
u8 *section;
pe_header = get_unaligned_le32(&buf[0x3c]);
num_sections = get_unaligned_le16(&buf[pe_header + 6]);
#ifdef CONFIG_X86_32
section = &buf[pe_header + 0xa8];
#else
section = &buf[pe_header + 0xb8];
#endif
while (num_sections > 0) {
if (strncmp((char*)section, section_name, 8) == 0) {
/* section header size field */
put_unaligned_le32(size, section + 0x8);
/* section header vma field */
put_unaligned_le32(vma, section + 0xc);
/* section header 'size of initialised data' field */
put_unaligned_le32(datasz, section + 0x10);
/* section header 'file offset' field */
put_unaligned_le32(offset, section + 0x14);
break;
}
section += 0x28;
num_sections--;
}
}
static void update_pecoff_section_header(char *section_name, u32 offset, u32 size)
{
update_pecoff_section_header_fields(section_name, offset, size, size, offset);
}
static void update_pecoff_setup_and_reloc(unsigned int size)
{
u32 setup_offset = 0x200;
u32 reloc_offset = size - PECOFF_RELOC_RESERVE;
u32 setup_size = reloc_offset - setup_offset;
update_pecoff_section_header(".setup", setup_offset, setup_size);
update_pecoff_section_header(".reloc", reloc_offset, PECOFF_RELOC_RESERVE);
/*
* Modify .reloc section contents with a single entry. The
* relocation is applied to offset 10 of the relocation section.
*/
put_unaligned_le32(reloc_offset + 10, &buf[reloc_offset]);
put_unaligned_le32(10, &buf[reloc_offset + 4]);
}
static void update_pecoff_text(unsigned int text_start, unsigned int file_sz)
{
unsigned int pe_header;
unsigned int text_sz = file_sz - text_start;
pe_header = get_unaligned_le32(&buf[0x3c]);
/*
* Size of code: Subtract the size of the first sector (512 bytes)
* which includes the header.
*/
put_unaligned_le32(file_sz - 512, &buf[pe_header + 0x1c]);
/*
* Address of entry point for PE/COFF executable
*/
put_unaligned_le32(text_start + efi_pe_entry, &buf[pe_header + 0x28]);
update_pecoff_section_header(".text", text_start, text_sz);
}
static void update_pecoff_bss(unsigned int file_sz, unsigned int init_sz)
{
unsigned int pe_header;
unsigned int bss_sz = init_sz - file_sz;
pe_header = get_unaligned_le32(&buf[0x3c]);
/* Size of uninitialized data */
put_unaligned_le32(bss_sz, &buf[pe_header + 0x24]);
/* Size of image */
put_unaligned_le32(init_sz, &buf[pe_header + 0x50]);
update_pecoff_section_header_fields(".bss", file_sz, bss_sz, 0, 0);
}
static int reserve_pecoff_reloc_section(int c)
{
/* Reserve 0x20 bytes for .reloc section */
memset(buf+c, 0, PECOFF_RELOC_RESERVE);
return PECOFF_RELOC_RESERVE;
}
static void efi_stub_defaults(void)
{
/* Defaults for old kernel */
#ifdef CONFIG_X86_32
efi_pe_entry = 0x10;
#else
efi_pe_entry = 0x210;
startup_64 = 0x200;
#endif
}
static void efi_stub_entry_update(void)
{
unsigned long addr = efi32_stub_entry;
#ifdef CONFIG_X86_64
/* Yes, this is really how we defined it :( */
addr = efi64_stub_entry - 0x200;
#endif
#ifdef CONFIG_EFI_MIXED
if (efi32_stub_entry != addr)
die("32-bit and 64-bit EFI entry points do not match\n");
#endif
put_unaligned_le32(addr, &buf[0x264]);
}
#else
static inline void update_pecoff_setup_and_reloc(unsigned int size) {}
static inline void update_pecoff_text(unsigned int text_start,
unsigned int file_sz) {}
static inline void update_pecoff_bss(unsigned int file_sz,
unsigned int init_sz) {}
static inline void efi_stub_defaults(void) {}
static inline void efi_stub_entry_update(void) {}
static inline int reserve_pecoff_reloc_section(int c)
{
return 0;
}
#endif /* CONFIG_EFI_STUB */
/*
* Parse zoffset.h and find the entry points. We could just #include zoffset.h
* but that would mean tools/build would have to be rebuilt every time. It's
* not as if parsing it is hard...
*/
#define PARSE_ZOFS(p, sym) do { \
if (!strncmp(p, "#define ZO_" #sym " ", 11+sizeof(#sym))) \
sym = strtoul(p + 11 + sizeof(#sym), NULL, 16); \
} while (0)
static void parse_zoffset(char *fname)
{
FILE *file;
char *p;
int c;
file = fopen(fname, "r");
if (!file)
die("Unable to open `%s': %m", fname);
c = fread(buf, 1, sizeof(buf) - 1, file);
if (ferror(file))
die("read-error on `zoffset.h'");
fclose(file);
buf[c] = 0;
p = (char *)buf;
while (p && *p) {
PARSE_ZOFS(p, efi32_stub_entry);
PARSE_ZOFS(p, efi64_stub_entry);
PARSE_ZOFS(p, efi_pe_entry);
PARSE_ZOFS(p, startup_64);
p = strchr(p, '\n');
while (p && (*p == '\r' || *p == '\n'))
p++;
}
}
int main(int argc, char ** argv)
{
unsigned int i, sz, setup_sectors, init_sz;
int c;
u32 sys_size;
struct stat sb;
FILE *file, *dest;
int fd;
void *kernel;
u32 crc = 0xffffffffUL;
efi_stub_defaults();
if (argc != 5)
usage();
parse_zoffset(argv[3]);
dest = fopen(argv[4], "w");
if (!dest)
die("Unable to write `%s': %m", argv[4]);
/* Copy the setup code */
file = fopen(argv[1], "r");
if (!file)
die("Unable to open `%s': %m", argv[1]);
c = fread(buf, 1, sizeof(buf), file);
if (ferror(file))
die("read-error on `setup'");
if (c < 1024)
die("The setup must be at least 1024 bytes");
if (get_unaligned_le16(&buf[510]) != 0xAA55)
die("Boot block hasn't got boot flag (0xAA55)");
fclose(file);
c += reserve_pecoff_reloc_section(c);
/* Pad unused space with zeros */
setup_sectors = (c + 511) / 512;
if (setup_sectors < SETUP_SECT_MIN)
setup_sectors = SETUP_SECT_MIN;
i = setup_sectors*512;
memset(buf+c, 0, i-c);
update_pecoff_setup_and_reloc(i);
/* Set the default root device */
put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]);
printf("Setup is %d bytes (padded to %d bytes).\n", c, i);
/* Open and stat the kernel file */
fd = open(argv[2], O_RDONLY);
if (fd < 0)
die("Unable to open `%s': %m", argv[2]);
if (fstat(fd, &sb))
die("Unable to stat `%s': %m", argv[2]);
sz = sb.st_size;
printf("System is %d kB\n", (sz+1023)/1024);
kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0);
if (kernel == MAP_FAILED)
die("Unable to mmap '%s': %m", argv[2]);
/* Number of 16-byte paragraphs, including space for a 4-byte CRC */
sys_size = (sz + 15 + 4) / 16;
/* Patch the setup code with the appropriate size parameters */
buf[0x1f1] = setup_sectors-1;
put_unaligned_le32(sys_size, &buf[0x1f4]);
update_pecoff_text(setup_sectors * 512, i + (sys_size * 16));
init_sz = get_unaligned_le32(&buf[0x260]);
update_pecoff_bss(i + (sys_size * 16), init_sz);
efi_stub_entry_update();
crc = partial_crc32(buf, i, crc);
if (fwrite(buf, 1, i, dest) != i)
die("Writing setup failed");
/* Copy the kernel code */
crc = partial_crc32(kernel, sz, crc);
if (fwrite(kernel, 1, sz, dest) != sz)
die("Writing kernel failed");
/* Add padding leaving 4 bytes for the checksum */
while (sz++ < (sys_size*16) - 4) {
crc = partial_crc32_one('\0', crc);
if (fwrite("\0", 1, 1, dest) != 1)
die("Writing padding failed");
}
/* Write the CRC */
printf("CRC %x\n", crc);
put_unaligned_le32(crc, buf);
if (fwrite(buf, 1, 4, dest) != 4)
die("Writing CRC failed");
/* Catch any delayed write failures */
if (fclose(dest))
die("Writing image failed");
close(fd);
/* Everything is OK */
return 0;
}

139
arch/x86/boot/tty.c Normal file
View file

@ -0,0 +1,139 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Very simple screen and serial I/O
*/
#include "boot.h"
int early_serial_base;
#define XMTRDY 0x20
#define TXR 0 /* Transmit register (WRITE) */
#define LSR 5 /* Line Status */
/*
* These functions are in .inittext so they can be used to signal
* error during initialization.
*/
static void __attribute__((section(".inittext"))) serial_putchar(int ch)
{
unsigned timeout = 0xffff;
while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout)
cpu_relax();
outb(ch, early_serial_base + TXR);
}
static void __attribute__((section(".inittext"))) bios_putchar(int ch)
{
struct biosregs ireg;
initregs(&ireg);
ireg.bx = 0x0007;
ireg.cx = 0x0001;
ireg.ah = 0x0e;
ireg.al = ch;
intcall(0x10, &ireg, NULL);
}
void __attribute__((section(".inittext"))) putchar(int ch)
{
if (ch == '\n')
putchar('\r'); /* \n -> \r\n */
bios_putchar(ch);
if (early_serial_base != 0)
serial_putchar(ch);
}
void __attribute__((section(".inittext"))) puts(const char *str)
{
while (*str)
putchar(*str++);
}
/*
* Read the CMOS clock through the BIOS, and return the
* seconds in BCD.
*/
static u8 gettime(void)
{
struct biosregs ireg, oreg;
initregs(&ireg);
ireg.ah = 0x02;
intcall(0x1a, &ireg, &oreg);
return oreg.dh;
}
/*
* Read from the keyboard
*/
int getchar(void)
{
struct biosregs ireg, oreg;
initregs(&ireg);
/* ireg.ah = 0x00; */
intcall(0x16, &ireg, &oreg);
return oreg.al;
}
static int kbd_pending(void)
{
struct biosregs ireg, oreg;
initregs(&ireg);
ireg.ah = 0x01;
intcall(0x16, &ireg, &oreg);
return !(oreg.eflags & X86_EFLAGS_ZF);
}
void kbd_flush(void)
{
for (;;) {
if (!kbd_pending())
break;
getchar();
}
}
int getchar_timeout(void)
{
int cnt = 30;
int t0, t1;
t0 = gettime();
while (cnt) {
if (kbd_pending())
return getchar();
t1 = gettime();
if (t0 != t1) {
cnt--;
t0 = t1;
}
}
return 0; /* Timeout! */
}

21
arch/x86/boot/version.c Normal file
View file

@ -0,0 +1,21 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Kernel version string
*/
#include "boot.h"
#include <generated/utsrelease.h>
#include <generated/compile.h>
const char kernel_version[] =
UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") "
UTS_VERSION;

72
arch/x86/boot/vesa.h Normal file
View file

@ -0,0 +1,72 @@
/* ----------------------------------------------------------------------- *
*
* Copyright 1999-2007 H. Peter Anvin - All Rights Reserved
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, Inc., 53 Temple Place Ste 330,
* Boston MA 02111-1307, USA; either version 2 of the License, or
* (at your option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
#ifndef BOOT_VESA_H
#define BOOT_VESA_H
typedef struct {
u16 off, seg;
} far_ptr;
/* VESA General Information table */
struct vesa_general_info {
u32 signature; /* 0 Magic number = "VESA" */
u16 version; /* 4 */
far_ptr vendor_string; /* 6 */
u32 capabilities; /* 10 */
far_ptr video_mode_ptr; /* 14 */
u16 total_memory; /* 18 */
u8 reserved[236]; /* 20 */
} __attribute__ ((packed));
#define VESA_MAGIC ('V' + ('E' << 8) + ('S' << 16) + ('A' << 24))
struct vesa_mode_info {
u16 mode_attr; /* 0 */
u8 win_attr[2]; /* 2 */
u16 win_grain; /* 4 */
u16 win_size; /* 6 */
u16 win_seg[2]; /* 8 */
far_ptr win_scheme; /* 12 */
u16 logical_scan; /* 16 */
u16 h_res; /* 18 */
u16 v_res; /* 20 */
u8 char_width; /* 22 */
u8 char_height; /* 23 */
u8 memory_planes; /* 24 */
u8 bpp; /* 25 */
u8 banks; /* 26 */
u8 memory_layout; /* 27 */
u8 bank_size; /* 28 */
u8 image_planes; /* 29 */
u8 page_function; /* 30 */
u8 rmask; /* 31 */
u8 rpos; /* 32 */
u8 gmask; /* 33 */
u8 gpos; /* 34 */
u8 bmask; /* 35 */
u8 bpos; /* 36 */
u8 resv_mask; /* 37 */
u8 resv_pos; /* 38 */
u8 dcm_info; /* 39 */
u32 lfb_ptr; /* 40 Linear frame buffer address */
u32 offscreen_ptr; /* 44 Offscreen memory address */
u16 offscreen_size; /* 48 */
u8 reserved[206]; /* 50 */
} __attribute__ ((packed));
#endif /* LIB_SYS_VESA_H */

128
arch/x86/boot/video-bios.c Normal file
View file

@ -0,0 +1,128 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Standard video BIOS modes
*
* We have two options for this; silent and scanned.
*/
#include "boot.h"
#include "video.h"
static __videocard video_bios;
/* Set a conventional BIOS mode */
static int set_bios_mode(u8 mode);
static int bios_set_mode(struct mode_info *mi)
{
return set_bios_mode(mi->mode - VIDEO_FIRST_BIOS);
}
static int set_bios_mode(u8 mode)
{
struct biosregs ireg, oreg;
u8 new_mode;
initregs(&ireg);
ireg.al = mode; /* AH=0x00 Set Video Mode */
intcall(0x10, &ireg, NULL);
ireg.ah = 0x0f; /* Get Current Video Mode */
intcall(0x10, &ireg, &oreg);
do_restore = 1; /* Assume video contents were lost */
/* Not all BIOSes are clean with the top bit */
new_mode = oreg.al & 0x7f;
if (new_mode == mode)
return 0; /* Mode change OK */
#ifndef _WAKEUP
if (new_mode != boot_params.screen_info.orig_video_mode) {
/* Mode setting failed, but we didn't end up where we
started. That's bad. Try to revert to the original
video mode. */
ireg.ax = boot_params.screen_info.orig_video_mode;
intcall(0x10, &ireg, NULL);
}
#endif
return -1;
}
static int bios_probe(void)
{
u8 mode;
#ifdef _WAKEUP
u8 saved_mode = 0x03;
#else
u8 saved_mode = boot_params.screen_info.orig_video_mode;
#endif
u16 crtc;
struct mode_info *mi;
int nmodes = 0;
if (adapter != ADAPTER_EGA && adapter != ADAPTER_VGA)
return 0;
set_fs(0);
crtc = vga_crtc();
video_bios.modes = GET_HEAP(struct mode_info, 0);
for (mode = 0x14; mode <= 0x7f; mode++) {
if (!heap_free(sizeof(struct mode_info)))
break;
if (mode_defined(VIDEO_FIRST_BIOS+mode))
continue;
if (set_bios_mode(mode))
continue;
/* Try to verify that it's a text mode. */
/* Attribute Controller: make graphics controller disabled */
if (in_idx(0x3c0, 0x10) & 0x01)
continue;
/* Graphics Controller: verify Alpha addressing enabled */
if (in_idx(0x3ce, 0x06) & 0x01)
continue;
/* CRTC cursor location low should be zero(?) */
if (in_idx(crtc, 0x0f))
continue;
mi = GET_HEAP(struct mode_info, 1);
mi->mode = VIDEO_FIRST_BIOS+mode;
mi->depth = 0; /* text */
mi->x = rdfs16(0x44a);
mi->y = rdfs8(0x484)+1;
nmodes++;
}
set_bios_mode(saved_mode);
return nmodes;
}
static __videocard video_bios =
{
.card_name = "BIOS",
.probe = bios_probe,
.set_mode = bios_set_mode,
.unsafe = 1,
.xmode_first = VIDEO_FIRST_BIOS,
.xmode_n = 0x80,
};

173
arch/x86/boot/video-mode.c Normal file
View file

@ -0,0 +1,173 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007-2008 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* arch/i386/boot/video-mode.c
*
* Set the video mode. This is separated out into a different
* file in order to be shared with the ACPI wakeup code.
*/
#include "boot.h"
#include "video.h"
#include "vesa.h"
/*
* Common variables
*/
int adapter; /* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */
u16 video_segment;
int force_x, force_y; /* Don't query the BIOS for cols/rows */
int do_restore; /* Screen contents changed during mode flip */
int graphic_mode; /* Graphic mode with linear frame buffer */
/* Probe the video drivers and have them generate their mode lists. */
void probe_cards(int unsafe)
{
struct card_info *card;
static u8 probed[2];
if (probed[unsafe])
return;
probed[unsafe] = 1;
for (card = video_cards; card < video_cards_end; card++) {
if (card->unsafe == unsafe) {
if (card->probe)
card->nmodes = card->probe();
else
card->nmodes = 0;
}
}
}
/* Test if a mode is defined */
int mode_defined(u16 mode)
{
struct card_info *card;
struct mode_info *mi;
int i;
for (card = video_cards; card < video_cards_end; card++) {
mi = card->modes;
for (i = 0; i < card->nmodes; i++, mi++) {
if (mi->mode == mode)
return 1;
}
}
return 0;
}
/* Set mode (without recalc) */
static int raw_set_mode(u16 mode, u16 *real_mode)
{
int nmode, i;
struct card_info *card;
struct mode_info *mi;
/* Drop the recalc bit if set */
mode &= ~VIDEO_RECALC;
/* Scan for mode based on fixed ID, position, or resolution */
nmode = 0;
for (card = video_cards; card < video_cards_end; card++) {
mi = card->modes;
for (i = 0; i < card->nmodes; i++, mi++) {
int visible = mi->x || mi->y;
if ((mode == nmode && visible) ||
mode == mi->mode ||
mode == (mi->y << 8)+mi->x) {
*real_mode = mi->mode;
return card->set_mode(mi);
}
if (visible)
nmode++;
}
}
/* Nothing found? Is it an "exceptional" (unprobed) mode? */
for (card = video_cards; card < video_cards_end; card++) {
if (mode >= card->xmode_first &&
mode < card->xmode_first+card->xmode_n) {
struct mode_info mix;
*real_mode = mix.mode = mode;
mix.x = mix.y = 0;
return card->set_mode(&mix);
}
}
/* Otherwise, failure... */
return -1;
}
/*
* Recalculate the vertical video cutoff (hack!)
*/
static void vga_recalc_vertical(void)
{
unsigned int font_size, rows;
u16 crtc;
u8 pt, ov;
set_fs(0);
font_size = rdfs8(0x485); /* BIOS: font size (pixels) */
rows = force_y ? force_y : rdfs8(0x484)+1; /* Text rows */
rows *= font_size; /* Visible scan lines */
rows--; /* ... minus one */
crtc = vga_crtc();
pt = in_idx(crtc, 0x11);
pt &= ~0x80; /* Unlock CR0-7 */
out_idx(pt, crtc, 0x11);
out_idx((u8)rows, crtc, 0x12); /* Lower height register */
ov = in_idx(crtc, 0x07); /* Overflow register */
ov &= 0xbd;
ov |= (rows >> (8-1)) & 0x02;
ov |= (rows >> (9-6)) & 0x40;
out_idx(ov, crtc, 0x07);
}
/* Set mode (with recalc if specified) */
int set_mode(u16 mode)
{
int rv;
u16 real_mode;
/* Very special mode numbers... */
if (mode == VIDEO_CURRENT_MODE)
return 0; /* Nothing to do... */
else if (mode == NORMAL_VGA)
mode = VIDEO_80x25;
else if (mode == EXTENDED_VGA)
mode = VIDEO_8POINT;
rv = raw_set_mode(mode, &real_mode);
if (rv)
return rv;
if (mode & VIDEO_RECALC)
vga_recalc_vertical();
/* Save the canonical mode number for the kernel, not
an alias, size specification or menu position */
#ifndef _WAKEUP
boot_params.hdr.vid_mode = real_mode;
#endif
return 0;
}

281
arch/x86/boot/video-vesa.c Normal file
View file

@ -0,0 +1,281 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* VESA text modes
*/
#include "boot.h"
#include "video.h"
#include "vesa.h"
#include "string.h"
/* VESA information */
static struct vesa_general_info vginfo;
static struct vesa_mode_info vminfo;
static __videocard video_vesa;
#ifndef _WAKEUP
static void vesa_store_mode_params_graphics(void);
#else /* _WAKEUP */
static inline void vesa_store_mode_params_graphics(void) {}
#endif /* _WAKEUP */
static int vesa_probe(void)
{
struct biosregs ireg, oreg;
u16 mode;
addr_t mode_ptr;
struct mode_info *mi;
int nmodes = 0;
video_vesa.modes = GET_HEAP(struct mode_info, 0);
initregs(&ireg);
ireg.ax = 0x4f00;
ireg.di = (size_t)&vginfo;
intcall(0x10, &ireg, &oreg);
if (oreg.ax != 0x004f ||
vginfo.signature != VESA_MAGIC ||
vginfo.version < 0x0102)
return 0; /* Not present */
set_fs(vginfo.video_mode_ptr.seg);
mode_ptr = vginfo.video_mode_ptr.off;
while ((mode = rdfs16(mode_ptr)) != 0xffff) {
mode_ptr += 2;
if (!heap_free(sizeof(struct mode_info)))
break; /* Heap full, can't save mode info */
if (mode & ~0x1ff)
continue;
memset(&vminfo, 0, sizeof vminfo); /* Just in case... */
ireg.ax = 0x4f01;
ireg.cx = mode;
ireg.di = (size_t)&vminfo;
intcall(0x10, &ireg, &oreg);
if (oreg.ax != 0x004f)
continue;
if ((vminfo.mode_attr & 0x15) == 0x05) {
/* Text Mode, TTY BIOS supported,
supported by hardware */
mi = GET_HEAP(struct mode_info, 1);
mi->mode = mode + VIDEO_FIRST_VESA;
mi->depth = 0; /* text */
mi->x = vminfo.h_res;
mi->y = vminfo.v_res;
nmodes++;
} else if ((vminfo.mode_attr & 0x99) == 0x99 &&
(vminfo.memory_layout == 4 ||
vminfo.memory_layout == 6) &&
vminfo.memory_planes == 1) {
#ifdef CONFIG_FB_BOOT_VESA_SUPPORT
/* Graphics mode, color, linear frame buffer
supported. Only register the mode if
if framebuffer is configured, however,
otherwise the user will be left without a screen. */
mi = GET_HEAP(struct mode_info, 1);
mi->mode = mode + VIDEO_FIRST_VESA;
mi->depth = vminfo.bpp;
mi->x = vminfo.h_res;
mi->y = vminfo.v_res;
nmodes++;
#endif
}
}
return nmodes;
}
static int vesa_set_mode(struct mode_info *mode)
{
struct biosregs ireg, oreg;
int is_graphic;
u16 vesa_mode = mode->mode - VIDEO_FIRST_VESA;
memset(&vminfo, 0, sizeof vminfo); /* Just in case... */
initregs(&ireg);
ireg.ax = 0x4f01;
ireg.cx = vesa_mode;
ireg.di = (size_t)&vminfo;
intcall(0x10, &ireg, &oreg);
if (oreg.ax != 0x004f)
return -1;
if ((vminfo.mode_attr & 0x15) == 0x05) {
/* It's a supported text mode */
is_graphic = 0;
#ifdef CONFIG_FB_BOOT_VESA_SUPPORT
} else if ((vminfo.mode_attr & 0x99) == 0x99) {
/* It's a graphics mode with linear frame buffer */
is_graphic = 1;
vesa_mode |= 0x4000; /* Request linear frame buffer */
#endif
} else {
return -1; /* Invalid mode */
}
initregs(&ireg);
ireg.ax = 0x4f02;
ireg.bx = vesa_mode;
intcall(0x10, &ireg, &oreg);
if (oreg.ax != 0x004f)
return -1;
graphic_mode = is_graphic;
if (!is_graphic) {
/* Text mode */
force_x = mode->x;
force_y = mode->y;
do_restore = 1;
} else {
/* Graphics mode */
vesa_store_mode_params_graphics();
}
return 0;
}
#ifndef _WAKEUP
/* Switch DAC to 8-bit mode */
static void vesa_dac_set_8bits(void)
{
struct biosregs ireg, oreg;
u8 dac_size = 6;
/* If possible, switch the DAC to 8-bit mode */
if (vginfo.capabilities & 1) {
initregs(&ireg);
ireg.ax = 0x4f08;
ireg.bh = 0x08;
intcall(0x10, &ireg, &oreg);
if (oreg.ax == 0x004f)
dac_size = oreg.bh;
}
/* Set the color sizes to the DAC size, and offsets to 0 */
boot_params.screen_info.red_size = dac_size;
boot_params.screen_info.green_size = dac_size;
boot_params.screen_info.blue_size = dac_size;
boot_params.screen_info.rsvd_size = dac_size;
boot_params.screen_info.red_pos = 0;
boot_params.screen_info.green_pos = 0;
boot_params.screen_info.blue_pos = 0;
boot_params.screen_info.rsvd_pos = 0;
}
/* Save the VESA protected mode info */
static void vesa_store_pm_info(void)
{
struct biosregs ireg, oreg;
initregs(&ireg);
ireg.ax = 0x4f0a;
intcall(0x10, &ireg, &oreg);
if (oreg.ax != 0x004f)
return;
boot_params.screen_info.vesapm_seg = oreg.es;
boot_params.screen_info.vesapm_off = oreg.di;
}
/*
* Save video mode parameters for graphics mode
*/
static void vesa_store_mode_params_graphics(void)
{
/* Tell the kernel we're in VESA graphics mode */
boot_params.screen_info.orig_video_isVGA = VIDEO_TYPE_VLFB;
/* Mode parameters */
boot_params.screen_info.vesa_attributes = vminfo.mode_attr;
boot_params.screen_info.lfb_linelength = vminfo.logical_scan;
boot_params.screen_info.lfb_width = vminfo.h_res;
boot_params.screen_info.lfb_height = vminfo.v_res;
boot_params.screen_info.lfb_depth = vminfo.bpp;
boot_params.screen_info.pages = vminfo.image_planes;
boot_params.screen_info.lfb_base = vminfo.lfb_ptr;
memcpy(&boot_params.screen_info.red_size,
&vminfo.rmask, 8);
/* General parameters */
boot_params.screen_info.lfb_size = vginfo.total_memory;
if (vminfo.bpp <= 8)
vesa_dac_set_8bits();
vesa_store_pm_info();
}
/*
* Save EDID information for the kernel; this is invoked, separately,
* after mode-setting.
*/
void vesa_store_edid(void)
{
#ifdef CONFIG_FIRMWARE_EDID
struct biosregs ireg, oreg;
/* Apparently used as a nonsense token... */
memset(&boot_params.edid_info, 0x13, sizeof boot_params.edid_info);
if (vginfo.version < 0x0200)
return; /* EDID requires VBE 2.0+ */
initregs(&ireg);
ireg.ax = 0x4f15; /* VBE DDC */
/* ireg.bx = 0x0000; */ /* Report DDC capabilities */
/* ireg.cx = 0; */ /* Controller 0 */
ireg.es = 0; /* ES:DI must be 0 by spec */
intcall(0x10, &ireg, &oreg);
if (oreg.ax != 0x004f)
return; /* No EDID */
/* BH = time in seconds to transfer EDD information */
/* BL = DDC level supported */
ireg.ax = 0x4f15; /* VBE DDC */
ireg.bx = 0x0001; /* Read EDID */
/* ireg.cx = 0; */ /* Controller 0 */
/* ireg.dx = 0; */ /* EDID block number */
ireg.es = ds();
ireg.di =(size_t)&boot_params.edid_info; /* (ES:)Pointer to block */
intcall(0x10, &ireg, &oreg);
#endif /* CONFIG_FIRMWARE_EDID */
}
#endif /* not _WAKEUP */
static __videocard video_vesa =
{
.card_name = "VESA",
.probe = vesa_probe,
.set_mode = vesa_set_mode,
.xmode_first = VIDEO_FIRST_VESA,
.xmode_n = 0x200,
};

288
arch/x86/boot/video-vga.c Normal file
View file

@ -0,0 +1,288 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Common all-VGA modes
*/
#include "boot.h"
#include "video.h"
static struct mode_info vga_modes[] = {
{ VIDEO_80x25, 80, 25, 0 },
{ VIDEO_8POINT, 80, 50, 0 },
{ VIDEO_80x43, 80, 43, 0 },
{ VIDEO_80x28, 80, 28, 0 },
{ VIDEO_80x30, 80, 30, 0 },
{ VIDEO_80x34, 80, 34, 0 },
{ VIDEO_80x60, 80, 60, 0 },
};
static struct mode_info ega_modes[] = {
{ VIDEO_80x25, 80, 25, 0 },
{ VIDEO_8POINT, 80, 43, 0 },
};
static struct mode_info cga_modes[] = {
{ VIDEO_80x25, 80, 25, 0 },
};
static __videocard video_vga;
/* Set basic 80x25 mode */
static u8 vga_set_basic_mode(void)
{
struct biosregs ireg, oreg;
u8 mode;
initregs(&ireg);
/* Query current mode */
ireg.ax = 0x0f00;
intcall(0x10, &ireg, &oreg);
mode = oreg.al;
if (mode != 3 && mode != 7)
mode = 3;
/* Set the mode */
ireg.ax = mode; /* AH=0: set mode */
intcall(0x10, &ireg, NULL);
do_restore = 1;
return mode;
}
static void vga_set_8font(void)
{
/* Set 8x8 font - 80x43 on EGA, 80x50 on VGA */
struct biosregs ireg;
initregs(&ireg);
/* Set 8x8 font */
ireg.ax = 0x1112;
/* ireg.bl = 0; */
intcall(0x10, &ireg, NULL);
/* Use alternate print screen */
ireg.ax = 0x1200;
ireg.bl = 0x20;
intcall(0x10, &ireg, NULL);
/* Turn off cursor emulation */
ireg.ax = 0x1201;
ireg.bl = 0x34;
intcall(0x10, &ireg, NULL);
/* Cursor is scan lines 6-7 */
ireg.ax = 0x0100;
ireg.cx = 0x0607;
intcall(0x10, &ireg, NULL);
}
static void vga_set_14font(void)
{
/* Set 9x14 font - 80x28 on VGA */
struct biosregs ireg;
initregs(&ireg);
/* Set 9x14 font */
ireg.ax = 0x1111;
/* ireg.bl = 0; */
intcall(0x10, &ireg, NULL);
/* Turn off cursor emulation */
ireg.ax = 0x1201;
ireg.bl = 0x34;
intcall(0x10, &ireg, NULL);
/* Cursor is scan lines 11-12 */
ireg.ax = 0x0100;
ireg.cx = 0x0b0c;
intcall(0x10, &ireg, NULL);
}
static void vga_set_80x43(void)
{
/* Set 80x43 mode on VGA (not EGA) */
struct biosregs ireg;
initregs(&ireg);
/* Set 350 scans */
ireg.ax = 0x1201;
ireg.bl = 0x30;
intcall(0x10, &ireg, NULL);
/* Reset video mode */
ireg.ax = 0x0003;
intcall(0x10, &ireg, NULL);
vga_set_8font();
}
/* I/O address of the VGA CRTC */
u16 vga_crtc(void)
{
return (inb(0x3cc) & 1) ? 0x3d4 : 0x3b4;
}
static void vga_set_480_scanlines(void)
{
u16 crtc; /* CRTC base address */
u8 csel; /* CRTC miscellaneous output register */
crtc = vga_crtc();
out_idx(0x0c, crtc, 0x11); /* Vertical sync end, unlock CR0-7 */
out_idx(0x0b, crtc, 0x06); /* Vertical total */
out_idx(0x3e, crtc, 0x07); /* Vertical overflow */
out_idx(0xea, crtc, 0x10); /* Vertical sync start */
out_idx(0xdf, crtc, 0x12); /* Vertical display end */
out_idx(0xe7, crtc, 0x15); /* Vertical blank start */
out_idx(0x04, crtc, 0x16); /* Vertical blank end */
csel = inb(0x3cc);
csel &= 0x0d;
csel |= 0xe2;
outb(csel, 0x3c2);
}
static void vga_set_vertical_end(int lines)
{
u16 crtc; /* CRTC base address */
u8 ovfw; /* CRTC overflow register */
int end = lines-1;
crtc = vga_crtc();
ovfw = 0x3c | ((end >> (8-1)) & 0x02) | ((end >> (9-6)) & 0x40);
out_idx(ovfw, crtc, 0x07); /* Vertical overflow */
out_idx(end, crtc, 0x12); /* Vertical display end */
}
static void vga_set_80x30(void)
{
vga_set_480_scanlines();
vga_set_vertical_end(30*16);
}
static void vga_set_80x34(void)
{
vga_set_480_scanlines();
vga_set_14font();
vga_set_vertical_end(34*14);
}
static void vga_set_80x60(void)
{
vga_set_480_scanlines();
vga_set_8font();
vga_set_vertical_end(60*8);
}
static int vga_set_mode(struct mode_info *mode)
{
/* Set the basic mode */
vga_set_basic_mode();
/* Override a possibly broken BIOS */
force_x = mode->x;
force_y = mode->y;
switch (mode->mode) {
case VIDEO_80x25:
break;
case VIDEO_8POINT:
vga_set_8font();
break;
case VIDEO_80x43:
vga_set_80x43();
break;
case VIDEO_80x28:
vga_set_14font();
break;
case VIDEO_80x30:
vga_set_80x30();
break;
case VIDEO_80x34:
vga_set_80x34();
break;
case VIDEO_80x60:
vga_set_80x60();
break;
}
return 0;
}
/*
* Note: this probe includes basic information required by all
* systems. It should be executed first, by making sure
* video-vga.c is listed first in the Makefile.
*/
static int vga_probe(void)
{
static const char *card_name[] = {
"CGA/MDA/HGC", "EGA", "VGA"
};
static struct mode_info *mode_lists[] = {
cga_modes,
ega_modes,
vga_modes,
};
static int mode_count[] = {
sizeof(cga_modes)/sizeof(struct mode_info),
sizeof(ega_modes)/sizeof(struct mode_info),
sizeof(vga_modes)/sizeof(struct mode_info),
};
struct biosregs ireg, oreg;
initregs(&ireg);
ireg.ax = 0x1200;
ireg.bl = 0x10; /* Check EGA/VGA */
intcall(0x10, &ireg, &oreg);
#ifndef _WAKEUP
boot_params.screen_info.orig_video_ega_bx = oreg.bx;
#endif
/* If we have MDA/CGA/HGC then BL will be unchanged at 0x10 */
if (oreg.bl != 0x10) {
/* EGA/VGA */
ireg.ax = 0x1a00;
intcall(0x10, &ireg, &oreg);
if (oreg.al == 0x1a) {
adapter = ADAPTER_VGA;
#ifndef _WAKEUP
boot_params.screen_info.orig_video_isVGA = 1;
#endif
} else {
adapter = ADAPTER_EGA;
}
} else {
adapter = ADAPTER_CGA;
}
video_vga.modes = mode_lists[adapter];
video_vga.card_name = card_name[adapter];
return mode_count[adapter];
}
static __videocard video_vga = {
.card_name = "VGA",
.probe = vga_probe,
.set_mode = vga_set_mode,
};

341
arch/x86/boot/video.c Normal file
View file

@ -0,0 +1,341 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
* Copyright 2009 Intel Corporation; author H. Peter Anvin
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Select video mode
*/
#include "boot.h"
#include "video.h"
#include "vesa.h"
static void store_cursor_position(void)
{
struct biosregs ireg, oreg;
initregs(&ireg);
ireg.ah = 0x03;
intcall(0x10, &ireg, &oreg);
boot_params.screen_info.orig_x = oreg.dl;
boot_params.screen_info.orig_y = oreg.dh;
if (oreg.ch & 0x20)
boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR;
if ((oreg.ch & 0x1f) > (oreg.cl & 0x1f))
boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR;
}
static void store_video_mode(void)
{
struct biosregs ireg, oreg;
/* N.B.: the saving of the video page here is a bit silly,
since we pretty much assume page 0 everywhere. */
initregs(&ireg);
ireg.ah = 0x0f;
intcall(0x10, &ireg, &oreg);
/* Not all BIOSes are clean with respect to the top bit */
boot_params.screen_info.orig_video_mode = oreg.al & 0x7f;
boot_params.screen_info.orig_video_page = oreg.bh;
}
/*
* Store the video mode parameters for later usage by the kernel.
* This is done by asking the BIOS except for the rows/columns
* parameters in the default 80x25 mode -- these are set directly,
* because some very obscure BIOSes supply insane values.
*/
static void store_mode_params(void)
{
u16 font_size;
int x, y;
/* For graphics mode, it is up to the mode-setting driver
(currently only video-vesa.c) to store the parameters */
if (graphic_mode)
return;
store_cursor_position();
store_video_mode();
if (boot_params.screen_info.orig_video_mode == 0x07) {
/* MDA, HGC, or VGA in monochrome mode */
video_segment = 0xb000;
} else {
/* CGA, EGA, VGA and so forth */
video_segment = 0xb800;
}
set_fs(0);
font_size = rdfs16(0x485); /* Font size, BIOS area */
boot_params.screen_info.orig_video_points = font_size;
x = rdfs16(0x44a);
y = (adapter == ADAPTER_CGA) ? 25 : rdfs8(0x484)+1;
if (force_x)
x = force_x;
if (force_y)
y = force_y;
boot_params.screen_info.orig_video_cols = x;
boot_params.screen_info.orig_video_lines = y;
}
static unsigned int get_entry(void)
{
char entry_buf[4];
int i, len = 0;
int key;
unsigned int v;
do {
key = getchar();
if (key == '\b') {
if (len > 0) {
puts("\b \b");
len--;
}
} else if ((key >= '0' && key <= '9') ||
(key >= 'A' && key <= 'Z') ||
(key >= 'a' && key <= 'z')) {
if (len < sizeof entry_buf) {
entry_buf[len++] = key;
putchar(key);
}
}
} while (key != '\r');
putchar('\n');
if (len == 0)
return VIDEO_CURRENT_MODE; /* Default */
v = 0;
for (i = 0; i < len; i++) {
v <<= 4;
key = entry_buf[i] | 0x20;
v += (key > '9') ? key-'a'+10 : key-'0';
}
return v;
}
static void display_menu(void)
{
struct card_info *card;
struct mode_info *mi;
char ch;
int i;
int nmodes;
int modes_per_line;
int col;
nmodes = 0;
for (card = video_cards; card < video_cards_end; card++)
nmodes += card->nmodes;
modes_per_line = 1;
if (nmodes >= 20)
modes_per_line = 3;
for (col = 0; col < modes_per_line; col++)
puts("Mode: Resolution: Type: ");
putchar('\n');
col = 0;
ch = '0';
for (card = video_cards; card < video_cards_end; card++) {
mi = card->modes;
for (i = 0; i < card->nmodes; i++, mi++) {
char resbuf[32];
int visible = mi->x && mi->y;
u16 mode_id = mi->mode ? mi->mode :
(mi->y << 8)+mi->x;
if (!visible)
continue; /* Hidden mode */
if (mi->depth)
sprintf(resbuf, "%dx%d", mi->y, mi->depth);
else
sprintf(resbuf, "%d", mi->y);
printf("%c %03X %4dx%-7s %-6s",
ch, mode_id, mi->x, resbuf, card->card_name);
col++;
if (col >= modes_per_line) {
putchar('\n');
col = 0;
}
if (ch == '9')
ch = 'a';
else if (ch == 'z' || ch == ' ')
ch = ' '; /* Out of keys... */
else
ch++;
}
}
if (col)
putchar('\n');
}
#define H(x) ((x)-'a'+10)
#define SCAN ((H('s')<<12)+(H('c')<<8)+(H('a')<<4)+H('n'))
static unsigned int mode_menu(void)
{
int key;
unsigned int sel;
puts("Press <ENTER> to see video modes available, "
"<SPACE> to continue, or wait 30 sec\n");
kbd_flush();
while (1) {
key = getchar_timeout();
if (key == ' ' || key == 0)
return VIDEO_CURRENT_MODE; /* Default */
if (key == '\r')
break;
putchar('\a'); /* Beep! */
}
for (;;) {
display_menu();
puts("Enter a video mode or \"scan\" to scan for "
"additional modes: ");
sel = get_entry();
if (sel != SCAN)
return sel;
probe_cards(1);
}
}
/* Save screen content to the heap */
static struct saved_screen {
int x, y;
int curx, cury;
u16 *data;
} saved;
static void save_screen(void)
{
/* Should be called after store_mode_params() */
saved.x = boot_params.screen_info.orig_video_cols;
saved.y = boot_params.screen_info.orig_video_lines;
saved.curx = boot_params.screen_info.orig_x;
saved.cury = boot_params.screen_info.orig_y;
if (!heap_free(saved.x*saved.y*sizeof(u16)+512))
return; /* Not enough heap to save the screen */
saved.data = GET_HEAP(u16, saved.x*saved.y);
set_fs(video_segment);
copy_from_fs(saved.data, 0, saved.x*saved.y*sizeof(u16));
}
static void restore_screen(void)
{
/* Should be called after store_mode_params() */
int xs = boot_params.screen_info.orig_video_cols;
int ys = boot_params.screen_info.orig_video_lines;
int y;
addr_t dst = 0;
u16 *src = saved.data;
struct biosregs ireg;
if (graphic_mode)
return; /* Can't restore onto a graphic mode */
if (!src)
return; /* No saved screen contents */
/* Restore screen contents */
set_fs(video_segment);
for (y = 0; y < ys; y++) {
int npad;
if (y < saved.y) {
int copy = (xs < saved.x) ? xs : saved.x;
copy_to_fs(dst, src, copy*sizeof(u16));
dst += copy*sizeof(u16);
src += saved.x;
npad = (xs < saved.x) ? 0 : xs-saved.x;
} else {
npad = xs;
}
/* Writes "npad" blank characters to
video_segment:dst and advances dst */
asm volatile("pushw %%es ; "
"movw %2,%%es ; "
"shrw %%cx ; "
"jnc 1f ; "
"stosw \n\t"
"1: rep;stosl ; "
"popw %%es"
: "+D" (dst), "+c" (npad)
: "bdS" (video_segment),
"a" (0x07200720));
}
/* Restore cursor position */
if (saved.curx >= xs)
saved.curx = xs-1;
if (saved.cury >= ys)
saved.cury = ys-1;
initregs(&ireg);
ireg.ah = 0x02; /* Set cursor position */
ireg.dh = saved.cury;
ireg.dl = saved.curx;
intcall(0x10, &ireg, NULL);
store_cursor_position();
}
void set_video(void)
{
u16 mode = boot_params.hdr.vid_mode;
RESET_HEAP();
store_mode_params();
save_screen();
probe_cards(0);
for (;;) {
if (mode == ASK_VGA)
mode = mode_menu();
if (!set_mode(mode))
break;
printf("Undefined video mode number: %x\n", mode);
mode = ASK_VGA;
}
boot_params.hdr.vid_mode = mode;
vesa_store_edid();
store_mode_params();
if (do_restore)
restore_screen();
}

121
arch/x86/boot/video.h Normal file
View file

@ -0,0 +1,121 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
*
* ----------------------------------------------------------------------- */
/*
* Header file for the real-mode video probing code
*/
#ifndef BOOT_VIDEO_H
#define BOOT_VIDEO_H
#include <linux/types.h>
/*
* This code uses an extended set of video mode numbers. These include:
* Aliases for standard modes
* NORMAL_VGA (-1)
* EXTENDED_VGA (-2)
* ASK_VGA (-3)
* Video modes numbered by menu position -- NOT RECOMMENDED because of lack
* of compatibility when extending the table. These are between 0x00 and 0xff.
*/
#define VIDEO_FIRST_MENU 0x0000
/* Standard BIOS video modes (BIOS number + 0x0100) */
#define VIDEO_FIRST_BIOS 0x0100
/* VESA BIOS video modes (VESA number + 0x0200) */
#define VIDEO_FIRST_VESA 0x0200
/* Video7 special modes (BIOS number + 0x0900) */
#define VIDEO_FIRST_V7 0x0900
/* Special video modes */
#define VIDEO_FIRST_SPECIAL 0x0f00
#define VIDEO_80x25 0x0f00
#define VIDEO_8POINT 0x0f01
#define VIDEO_80x43 0x0f02
#define VIDEO_80x28 0x0f03
#define VIDEO_CURRENT_MODE 0x0f04
#define VIDEO_80x30 0x0f05
#define VIDEO_80x34 0x0f06
#define VIDEO_80x60 0x0f07
#define VIDEO_GFX_HACK 0x0f08
#define VIDEO_LAST_SPECIAL 0x0f09
/* Video modes given by resolution */
#define VIDEO_FIRST_RESOLUTION 0x1000
/* The "recalculate timings" flag */
#define VIDEO_RECALC 0x8000
void store_screen(void);
#define DO_STORE() store_screen()
/*
* Mode table structures
*/
struct mode_info {
u16 mode; /* Mode number (vga= style) */
u16 x, y; /* Width, height */
u16 depth; /* Bits per pixel, 0 for text mode */
};
struct card_info {
const char *card_name;
int (*set_mode)(struct mode_info *mode);
int (*probe)(void);
struct mode_info *modes;
int nmodes; /* Number of probed modes so far */
int unsafe; /* Probing is unsafe, only do after "scan" */
u16 xmode_first; /* Unprobed modes to try to call anyway */
u16 xmode_n; /* Size of unprobed mode range */
};
#define __videocard struct card_info __attribute__((used,section(".videocards")))
extern struct card_info video_cards[], video_cards_end[];
int mode_defined(u16 mode); /* video.c */
/* Basic video information */
#define ADAPTER_CGA 0 /* CGA/MDA/HGC */
#define ADAPTER_EGA 1
#define ADAPTER_VGA 2
extern int adapter;
extern u16 video_segment;
extern int force_x, force_y; /* Don't query the BIOS for cols/rows */
extern int do_restore; /* Restore screen contents */
extern int graphic_mode; /* Graphics mode with linear frame buffer */
/* Accessing VGA indexed registers */
static inline u8 in_idx(u16 port, u8 index)
{
outb(index, port);
return inb(port+1);
}
static inline void out_idx(u8 v, u16 port, u8 index)
{
outw(index+(v << 8), port);
}
/* Writes a value to an indexed port and then reads the port again */
static inline u8 tst_idx(u8 v, u16 port, u8 index)
{
out_idx(port, index, v);
return in_idx(port, index);
}
/* Get the I/O port of the VGA CRTC */
u16 vga_crtc(void); /* video-vga.c */
#endif /* BOOT_VIDEO_H */

View file

@ -0,0 +1,311 @@
# CONFIG_64BIT is not set
CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_AUDIT=y
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=18
CONFIG_CGROUPS=y
CONFIG_CGROUP_FREEZER=y
CONFIG_CPUSETS=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_RESOURCE_COUNTERS=y
CONFIG_CGROUP_SCHED=y
CONFIG_BLK_DEV_INITRD=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_KPROBES=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_OSF_PARTITION=y
CONFIG_AMIGA_PARTITION=y
CONFIG_MAC_PARTITION=y
CONFIG_BSD_DISKLABEL=y
CONFIG_MINIX_SUBPARTITION=y
CONFIG_SOLARIS_X86_PARTITION=y
CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_SGI_PARTITION=y
CONFIG_SUN_PARTITION=y
CONFIG_KARMA_PARTITION=y
CONFIG_EFI_PARTITION=y
CONFIG_SMP=y
CONFIG_X86_GENERIC=y
CONFIG_HPET_TIMER=y
CONFIG_SCHED_SMT=y
CONFIG_PREEMPT_VOLUNTARY=y
CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
CONFIG_X86_MCE=y
CONFIG_X86_REBOOTFIXUPS=y
CONFIG_MICROCODE=y
CONFIG_MICROCODE_AMD=y
CONFIG_X86_MSR=y
CONFIG_X86_CPUID=y
CONFIG_HIGHPTE=y
CONFIG_X86_CHECK_BIOS_CORRUPTION=y
# CONFIG_MTRR_SANITIZER is not set
CONFIG_EFI=y
CONFIG_HZ_1000=y
CONFIG_KEXEC=y
CONFIG_CRASH_DUMP=y
# CONFIG_COMPAT_VDSO is not set
CONFIG_HIBERNATION=y
CONFIG_PM_DEBUG=y
CONFIG_PM_TRACE_RTC=y
CONFIG_ACPI_DOCK=y
CONFIG_CPU_FREQ=y
# CONFIG_CPU_FREQ_STAT is not set
CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
CONFIG_X86_ACPI_CPUFREQ=y
CONFIG_PCIEPORTBUS=y
CONFIG_PCI_MSI=y
CONFIG_PCCARD=y
CONFIG_YENTA=y
CONFIG_HOTPLUG_PCI=y
CONFIG_BINFMT_MISC=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_XFRM_USER=y
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_IP_ROUTE_MULTIPATH=y
CONFIG_IP_ROUTE_VERBOSE=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_IP_PNP_RARP=y
CONFIG_IP_MROUTE=y
CONFIG_IP_PIMSM_V1=y
CONFIG_IP_PIMSM_V2=y
CONFIG_SYN_COOKIES=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_INET_DIAG is not set
CONFIG_TCP_CONG_ADVANCED=y
# CONFIG_TCP_CONG_BIC is not set
# CONFIG_TCP_CONG_WESTWOOD is not set
# CONFIG_TCP_CONG_HTCP is not set
CONFIG_TCP_MD5SIG=y
CONFIG_IPV6=y
CONFIG_INET6_AH=y
CONFIG_INET6_ESP=y
CONFIG_NETLABEL=y
CONFIG_NETFILTER=y
# CONFIG_NETFILTER_ADVANCED is not set
CONFIG_NF_CONNTRACK=y
CONFIG_NF_CONNTRACK_FTP=y
CONFIG_NF_CONNTRACK_IRC=y
CONFIG_NF_CONNTRACK_SIP=y
CONFIG_NF_CT_NETLINK=y
CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
CONFIG_NETFILTER_XT_TARGET_NFLOG=y
CONFIG_NETFILTER_XT_TARGET_SECMARK=y
CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
CONFIG_NETFILTER_XT_MATCH_POLICY=y
CONFIG_NETFILTER_XT_MATCH_STATE=y
CONFIG_NF_CONNTRACK_IPV4=y
CONFIG_IP_NF_IPTABLES=y
CONFIG_IP_NF_FILTER=y
CONFIG_IP_NF_TARGET_REJECT=y
CONFIG_IP_NF_TARGET_ULOG=y
CONFIG_NF_NAT=y
CONFIG_IP_NF_TARGET_MASQUERADE=y
CONFIG_IP_NF_MANGLE=y
CONFIG_NF_CONNTRACK_IPV6=y
CONFIG_IP6_NF_IPTABLES=y
CONFIG_IP6_NF_MATCH_IPV6HEADER=y
CONFIG_IP6_NF_FILTER=y
CONFIG_IP6_NF_TARGET_REJECT=y
CONFIG_IP6_NF_MANGLE=y
CONFIG_NET_SCHED=y
CONFIG_NET_EMATCH=y
CONFIG_NET_CLS_ACT=y
CONFIG_HAMRADIO=y
CONFIG_CFG80211=y
CONFIG_MAC80211=y
CONFIG_MAC80211_LEDS=y
CONFIG_RFKILL=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_DEBUG_DEVRES=y
CONFIG_CONNECTOR=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_SD=y
CONFIG_BLK_DEV_SR=y
CONFIG_BLK_DEV_SR_VENDOR=y
CONFIG_CHR_DEV_SG=y
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_SPI_ATTRS=y
# CONFIG_SCSI_LOWLEVEL is not set
CONFIG_ATA=y
CONFIG_SATA_AHCI=y
CONFIG_ATA_PIIX=y
CONFIG_PATA_AMD=y
CONFIG_PATA_OLDPIIX=y
CONFIG_PATA_SCH=y
CONFIG_PATA_MPIIX=y
CONFIG_ATA_GENERIC=y
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
CONFIG_BLK_DEV_DM=y
CONFIG_DM_MIRROR=y
CONFIG_DM_ZERO=y
CONFIG_MACINTOSH_DRIVERS=y
CONFIG_MAC_EMUMOUSEBTN=y
CONFIG_NETDEVICES=y
CONFIG_NETCONSOLE=y
CONFIG_BNX2=y
CONFIG_TIGON3=y
CONFIG_NET_TULIP=y
CONFIG_E100=y
CONFIG_E1000=y
CONFIG_E1000E=y
CONFIG_SKY2=y
CONFIG_NE2K_PCI=y
CONFIG_FORCEDETH=y
CONFIG_8139TOO=y
# CONFIG_8139TOO_PIO is not set
CONFIG_R8169=y
CONFIG_FDDI=y
CONFIG_INPUT_POLLDEV=y
# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_EVDEV=y
CONFIG_INPUT_JOYSTICK=y
CONFIG_INPUT_TABLET=y
CONFIG_INPUT_TOUCHSCREEN=y
CONFIG_INPUT_MISC=y
CONFIG_VT_HW_CONSOLE_BINDING=y
# CONFIG_LEGACY_PTYS is not set
CONFIG_SERIAL_NONSTANDARD=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_8250_NR_UARTS=32
CONFIG_SERIAL_8250_EXTENDED=y
CONFIG_SERIAL_8250_MANY_PORTS=y
CONFIG_SERIAL_8250_SHARE_IRQ=y
CONFIG_SERIAL_8250_DETECT_IRQ=y
CONFIG_SERIAL_8250_RSA=y
CONFIG_HW_RANDOM=y
CONFIG_NVRAM=y
CONFIG_HPET=y
# CONFIG_HPET_MMAP is not set
CONFIG_I2C_I801=y
CONFIG_WATCHDOG=y
CONFIG_AGP=y
CONFIG_AGP_AMD64=y
CONFIG_AGP_INTEL=y
CONFIG_DRM=y
CONFIG_DRM_I915=y
CONFIG_FB_MODE_HELPERS=y
CONFIG_FB_TILEBLITTING=y
CONFIG_FB_EFI=y
# CONFIG_LCD_CLASS_DEVICE is not set
CONFIG_VGACON_SOFT_SCROLLBACK=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
# CONFIG_LOGO_LINUX_VGA16 is not set
CONFIG_SOUND=y
CONFIG_SND=y
CONFIG_SND_SEQUENCER=y
CONFIG_SND_SEQ_DUMMY=y
CONFIG_SND_MIXER_OSS=y
CONFIG_SND_PCM_OSS=y
CONFIG_SND_SEQUENCER_OSS=y
CONFIG_SND_HRTIMER=y
CONFIG_SND_HDA_INTEL=y
CONFIG_SND_HDA_HWDEP=y
CONFIG_HIDRAW=y
CONFIG_HID_GYRATION=y
CONFIG_LOGITECH_FF=y
CONFIG_HID_NTRIG=y
CONFIG_HID_PANTHERLORD=y
CONFIG_PANTHERLORD_FF=y
CONFIG_HID_PETALYNX=y
CONFIG_HID_SAMSUNG=y
CONFIG_HID_SONY=y
CONFIG_HID_SUNPLUS=y
CONFIG_HID_TOPSEED=y
CONFIG_HID_PID=y
CONFIG_USB_HIDDEV=y
CONFIG_USB=y
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
CONFIG_USB_MON=y
CONFIG_USB_EHCI_HCD=y
# CONFIG_USB_EHCI_TT_NEWSCHED is not set
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_UHCI_HCD=y
CONFIG_USB_PRINTER=y
CONFIG_USB_STORAGE=y
CONFIG_USB_LIBUSUAL=y
CONFIG_EDAC=y
CONFIG_RTC_CLASS=y
# CONFIG_RTC_HCTOSYS is not set
CONFIG_DMADEVICES=y
CONFIG_EEEPC_LAPTOP=y
CONFIG_EFI_VARS=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
CONFIG_QUOTA=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
CONFIG_QFMT_V2=y
CONFIG_AUTOFS4_FS=y
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
CONFIG_NLS_DEFAULT="utf8"
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
CONFIG_PRINTK_TIME=y
# CONFIG_ENABLE_WARN_DEPRECATED is not set
CONFIG_FRAME_WARN=2048
CONFIG_MAGIC_SYSRQ=y
# CONFIG_UNUSED_SYMBOLS is not set
CONFIG_DEBUG_KERNEL=y
# CONFIG_SCHED_DEBUG is not set
CONFIG_SCHEDSTATS=y
CONFIG_TIMER_STATS=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
CONFIG_EARLY_PRINTK_DBGP=y
CONFIG_DEBUG_STACKOVERFLOW=y
# CONFIG_DEBUG_RODATA_TEST is not set
CONFIG_DEBUG_BOOT_PARAMS=y
CONFIG_OPTIMIZE_INLINING=y
CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_DISABLE=y
CONFIG_CRYPTO_AES_586=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set

View file

@ -0,0 +1,28 @@
CONFIG_NET=y
CONFIG_NET_CORE=y
CONFIG_NETDEVICES=y
CONFIG_BLOCK=y
CONFIG_BLK_DEV=y
CONFIG_NETWORK_FILESYSTEMS=y
CONFIG_INET=y
CONFIG_TTY=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_BINFMT_ELF=y
CONFIG_PCI=y
CONFIG_PCI_MSI=y
CONFIG_DEBUG_KERNEL=y
CONFIG_VIRTUALIZATION=y
CONFIG_HYPERVISOR_GUEST=y
CONFIG_PARAVIRT=y
CONFIG_KVM_GUEST=y
CONFIG_VIRTIO=y
CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_BLK=y
CONFIG_VIRTIO_CONSOLE=y
CONFIG_VIRTIO_NET=y
CONFIG_9P_FS=y
CONFIG_NET_9P=y
CONFIG_NET_9P_VIRTIO=y

View file

@ -0,0 +1 @@
CONFIG_NOHIGHMEM=y

View file

@ -0,0 +1,308 @@
CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_AUDIT=y
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=18
CONFIG_CGROUPS=y
CONFIG_CGROUP_FREEZER=y
CONFIG_CPUSETS=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_RESOURCE_COUNTERS=y
CONFIG_CGROUP_SCHED=y
CONFIG_BLK_DEV_INITRD=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_KPROBES=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_OSF_PARTITION=y
CONFIG_AMIGA_PARTITION=y
CONFIG_MAC_PARTITION=y
CONFIG_BSD_DISKLABEL=y
CONFIG_MINIX_SUBPARTITION=y
CONFIG_SOLARIS_X86_PARTITION=y
CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_SGI_PARTITION=y
CONFIG_SUN_PARTITION=y
CONFIG_KARMA_PARTITION=y
CONFIG_EFI_PARTITION=y
CONFIG_SMP=y
CONFIG_CALGARY_IOMMU=y
CONFIG_NR_CPUS=64
CONFIG_SCHED_SMT=y
CONFIG_PREEMPT_VOLUNTARY=y
CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
CONFIG_X86_MCE=y
CONFIG_MICROCODE=y
CONFIG_MICROCODE_AMD=y
CONFIG_X86_MSR=y
CONFIG_X86_CPUID=y
CONFIG_NUMA=y
CONFIG_X86_CHECK_BIOS_CORRUPTION=y
# CONFIG_MTRR_SANITIZER is not set
CONFIG_EFI=y
CONFIG_HZ_1000=y
CONFIG_KEXEC=y
CONFIG_CRASH_DUMP=y
# CONFIG_COMPAT_VDSO is not set
CONFIG_HIBERNATION=y
CONFIG_PM_DEBUG=y
CONFIG_PM_TRACE_RTC=y
CONFIG_ACPI_DOCK=y
CONFIG_CPU_FREQ=y
# CONFIG_CPU_FREQ_STAT is not set
CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
CONFIG_X86_ACPI_CPUFREQ=y
CONFIG_PCI_MMCONFIG=y
CONFIG_PCIEPORTBUS=y
CONFIG_PCCARD=y
CONFIG_YENTA=y
CONFIG_HOTPLUG_PCI=y
CONFIG_BINFMT_MISC=y
CONFIG_IA32_EMULATION=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_XFRM_USER=y
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_IP_ROUTE_MULTIPATH=y
CONFIG_IP_ROUTE_VERBOSE=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_IP_PNP_RARP=y
CONFIG_IP_MROUTE=y
CONFIG_IP_PIMSM_V1=y
CONFIG_IP_PIMSM_V2=y
CONFIG_SYN_COOKIES=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_INET_DIAG is not set
CONFIG_TCP_CONG_ADVANCED=y
# CONFIG_TCP_CONG_BIC is not set
# CONFIG_TCP_CONG_WESTWOOD is not set
# CONFIG_TCP_CONG_HTCP is not set
CONFIG_TCP_MD5SIG=y
CONFIG_IPV6=y
CONFIG_INET6_AH=y
CONFIG_INET6_ESP=y
CONFIG_NETLABEL=y
CONFIG_NETFILTER=y
# CONFIG_NETFILTER_ADVANCED is not set
CONFIG_NF_CONNTRACK=y
CONFIG_NF_CONNTRACK_FTP=y
CONFIG_NF_CONNTRACK_IRC=y
CONFIG_NF_CONNTRACK_SIP=y
CONFIG_NF_CT_NETLINK=y
CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
CONFIG_NETFILTER_XT_TARGET_NFLOG=y
CONFIG_NETFILTER_XT_TARGET_SECMARK=y
CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
CONFIG_NETFILTER_XT_MATCH_POLICY=y
CONFIG_NETFILTER_XT_MATCH_STATE=y
CONFIG_NF_CONNTRACK_IPV4=y
CONFIG_IP_NF_IPTABLES=y
CONFIG_IP_NF_FILTER=y
CONFIG_IP_NF_TARGET_REJECT=y
CONFIG_IP_NF_TARGET_ULOG=y
CONFIG_NF_NAT=y
CONFIG_IP_NF_TARGET_MASQUERADE=y
CONFIG_IP_NF_MANGLE=y
CONFIG_NF_CONNTRACK_IPV6=y
CONFIG_IP6_NF_IPTABLES=y
CONFIG_IP6_NF_MATCH_IPV6HEADER=y
CONFIG_IP6_NF_FILTER=y
CONFIG_IP6_NF_TARGET_REJECT=y
CONFIG_IP6_NF_MANGLE=y
CONFIG_NET_SCHED=y
CONFIG_NET_EMATCH=y
CONFIG_NET_CLS_ACT=y
CONFIG_HAMRADIO=y
CONFIG_CFG80211=y
CONFIG_MAC80211=y
CONFIG_MAC80211_LEDS=y
CONFIG_RFKILL=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_DEBUG_DEVRES=y
CONFIG_CONNECTOR=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_SD=y
CONFIG_BLK_DEV_SR=y
CONFIG_BLK_DEV_SR_VENDOR=y
CONFIG_CHR_DEV_SG=y
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_SPI_ATTRS=y
# CONFIG_SCSI_LOWLEVEL is not set
CONFIG_ATA=y
CONFIG_SATA_AHCI=y
CONFIG_ATA_PIIX=y
CONFIG_PATA_AMD=y
CONFIG_PATA_OLDPIIX=y
CONFIG_PATA_SCH=y
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
CONFIG_BLK_DEV_DM=y
CONFIG_DM_MIRROR=y
CONFIG_DM_ZERO=y
CONFIG_MACINTOSH_DRIVERS=y
CONFIG_MAC_EMUMOUSEBTN=y
CONFIG_NETDEVICES=y
CONFIG_NETCONSOLE=y
CONFIG_TIGON3=y
CONFIG_NET_TULIP=y
CONFIG_E100=y
CONFIG_E1000=y
CONFIG_SKY2=y
CONFIG_FORCEDETH=y
CONFIG_8139TOO=y
CONFIG_FDDI=y
CONFIG_INPUT_POLLDEV=y
# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_EVDEV=y
CONFIG_INPUT_JOYSTICK=y
CONFIG_INPUT_TABLET=y
CONFIG_INPUT_TOUCHSCREEN=y
CONFIG_INPUT_MISC=y
CONFIG_VT_HW_CONSOLE_BINDING=y
# CONFIG_LEGACY_PTYS is not set
CONFIG_SERIAL_NONSTANDARD=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_8250_NR_UARTS=32
CONFIG_SERIAL_8250_EXTENDED=y
CONFIG_SERIAL_8250_MANY_PORTS=y
CONFIG_SERIAL_8250_SHARE_IRQ=y
CONFIG_SERIAL_8250_DETECT_IRQ=y
CONFIG_SERIAL_8250_RSA=y
CONFIG_HW_RANDOM=y
# CONFIG_HW_RANDOM_INTEL is not set
# CONFIG_HW_RANDOM_AMD is not set
CONFIG_NVRAM=y
CONFIG_HPET=y
# CONFIG_HPET_MMAP is not set
CONFIG_I2C_I801=y
CONFIG_WATCHDOG=y
CONFIG_AGP=y
CONFIG_AGP_AMD64=y
CONFIG_AGP_INTEL=y
CONFIG_DRM=y
CONFIG_DRM_I915=y
CONFIG_DRM_I915_KMS=y
CONFIG_FB_MODE_HELPERS=y
CONFIG_FB_TILEBLITTING=y
CONFIG_FB_EFI=y
# CONFIG_LCD_CLASS_DEVICE is not set
CONFIG_VGACON_SOFT_SCROLLBACK=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
# CONFIG_LOGO_LINUX_VGA16 is not set
CONFIG_SOUND=y
CONFIG_SND=y
CONFIG_SND_SEQUENCER=y
CONFIG_SND_SEQ_DUMMY=y
CONFIG_SND_MIXER_OSS=y
CONFIG_SND_PCM_OSS=y
CONFIG_SND_SEQUENCER_OSS=y
CONFIG_SND_HRTIMER=y
CONFIG_SND_HDA_INTEL=y
CONFIG_SND_HDA_HWDEP=y
CONFIG_HIDRAW=y
CONFIG_HID_GYRATION=y
CONFIG_LOGITECH_FF=y
CONFIG_HID_NTRIG=y
CONFIG_HID_PANTHERLORD=y
CONFIG_PANTHERLORD_FF=y
CONFIG_HID_PETALYNX=y
CONFIG_HID_SAMSUNG=y
CONFIG_HID_SONY=y
CONFIG_HID_SUNPLUS=y
CONFIG_HID_TOPSEED=y
CONFIG_HID_PID=y
CONFIG_USB_HIDDEV=y
CONFIG_USB=y
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
CONFIG_USB_MON=y
CONFIG_USB_EHCI_HCD=y
# CONFIG_USB_EHCI_TT_NEWSCHED is not set
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_UHCI_HCD=y
CONFIG_USB_PRINTER=y
CONFIG_USB_STORAGE=y
CONFIG_USB_LIBUSUAL=y
CONFIG_EDAC=y
CONFIG_RTC_CLASS=y
# CONFIG_RTC_HCTOSYS is not set
CONFIG_DMADEVICES=y
CONFIG_EEEPC_LAPTOP=y
CONFIG_AMD_IOMMU=y
CONFIG_AMD_IOMMU_STATS=y
CONFIG_INTEL_IOMMU=y
# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
CONFIG_EFI_VARS=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
CONFIG_QUOTA=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
CONFIG_QFMT_V2=y
CONFIG_AUTOFS4_FS=y
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
CONFIG_NLS_DEFAULT="utf8"
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
CONFIG_PRINTK_TIME=y
# CONFIG_ENABLE_WARN_DEPRECATED is not set
CONFIG_MAGIC_SYSRQ=y
# CONFIG_UNUSED_SYMBOLS is not set
CONFIG_DEBUG_KERNEL=y
# CONFIG_SCHED_DEBUG is not set
CONFIG_SCHEDSTATS=y
CONFIG_TIMER_STATS=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
CONFIG_EARLY_PRINTK_DBGP=y
CONFIG_DEBUG_STACKOVERFLOW=y
# CONFIG_DEBUG_RODATA_TEST is not set
CONFIG_DEBUG_BOOT_PARAMS=y
CONFIG_OPTIMIZE_INLINING=y
CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_DISABLE=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set

93
arch/x86/crypto/Makefile Normal file
View file

@ -0,0 +1,93 @@
#
# Arch-specific CryptoAPI modules.
#
avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no)
avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
$(comma)4)$(comma)%ymm2,yes,no)
obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
obj-$(CONFIG_CRYPTO_DES3_EDE_X86_64) += des3_ede-x86_64.o
obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
# These modules require assembler to support AVX.
ifeq ($(avx_supported),yes)
obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64) += \
camellia-aesni-avx-x86_64.o
obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o
obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o
obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o
endif
# These modules require assembler to support AVX2.
ifeq ($(avx2_supported),yes)
obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
obj-$(CONFIG_CRYPTO_SHA1_MB) += sha-mb/
endif
aes-i586-y := aes-i586-asm_32.o aes_glue.o
twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
des3_ede-x86_64-y := des3_ede-asm_64.o des3_ede_glue.o
camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
ifeq ($(avx_supported),yes)
camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
camellia_aesni_avx_glue.o
cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o
cast6-avx-x86_64-y := cast6-avx-x86_64-asm_64.o cast6_avx_glue.o
twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o \
twofish_avx_glue.o
serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o \
serpent_avx_glue.o
endif
ifeq ($(avx2_supported),yes)
camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o
serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
endif
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
ifeq ($(avx2_supported),yes)
sha1-ssse3-y += sha1_avx2_x86_64_asm.o
endif
crc32c-intel-y := crc32c-intel_glue.o
crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o
sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o

View file

@ -0,0 +1,362 @@
// -------------------------------------------------------------------------
// Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK.
// All rights reserved.
//
// LICENSE TERMS
//
// The free distribution and use of this software in both source and binary
// form is allowed (with or without changes) provided that:
//
// 1. distributions of this source code include the above copyright
// notice, this list of conditions and the following disclaimer//
//
// 2. distributions in binary form include the above copyright
// notice, this list of conditions and the following disclaimer
// in the documentation and/or other associated materials//
//
// 3. the copyright holder's name is not used to endorse products
// built using this software without specific written permission.
//
//
// ALTERNATIVELY, provided that this notice is retained in full, this product
// may be distributed under the terms of the GNU General Public License (GPL),
// in which case the provisions of the GPL apply INSTEAD OF those given above.
//
// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
// DISCLAIMER
//
// This software is provided 'as is' with no explicit or implied warranties
// in respect of its properties including, but not limited to, correctness
// and fitness for purpose.
// -------------------------------------------------------------------------
// Issue Date: 29/07/2002
.file "aes-i586-asm.S"
.text
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
/* offsets to parameters with one register pushed onto stack */
#define ctx 8
#define out_blk 12
#define in_blk 16
/* offsets in crypto_aes_ctx structure */
#define klen (480)
#define ekey (0)
#define dkey (240)
// register mapping for encrypt and decrypt subroutines
#define r0 eax
#define r1 ebx
#define r2 ecx
#define r3 edx
#define r4 esi
#define r5 edi
#define eaxl al
#define eaxh ah
#define ebxl bl
#define ebxh bh
#define ecxl cl
#define ecxh ch
#define edxl dl
#define edxh dh
#define _h(reg) reg##h
#define h(reg) _h(reg)
#define _l(reg) reg##l
#define l(reg) _l(reg)
// This macro takes a 32-bit word representing a column and uses
// each of its four bytes to index into four tables of 256 32-bit
// words to obtain values that are then xored into the appropriate
// output registers r0, r1, r4 or r5.
// Parameters:
// table table base address
// %1 out_state[0]
// %2 out_state[1]
// %3 out_state[2]
// %4 out_state[3]
// idx input register for the round (destroyed)
// tmp scratch register for the round
// sched key schedule
#define do_col(table, a1,a2,a3,a4, idx, tmp) \
movzx %l(idx),%tmp; \
xor table(,%tmp,4),%a1; \
movzx %h(idx),%tmp; \
shr $16,%idx; \
xor table+tlen(,%tmp,4),%a2; \
movzx %l(idx),%tmp; \
movzx %h(idx),%idx; \
xor table+2*tlen(,%tmp,4),%a3; \
xor table+3*tlen(,%idx,4),%a4;
// initialise output registers from the key schedule
// NB1: original value of a3 is in idx on exit
// NB2: original values of a1,a2,a4 aren't used
#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
mov 0 sched,%a1; \
movzx %l(idx),%tmp; \
mov 12 sched,%a2; \
xor table(,%tmp,4),%a1; \
mov 4 sched,%a4; \
movzx %h(idx),%tmp; \
shr $16,%idx; \
xor table+tlen(,%tmp,4),%a2; \
movzx %l(idx),%tmp; \
movzx %h(idx),%idx; \
xor table+3*tlen(,%idx,4),%a4; \
mov %a3,%idx; \
mov 8 sched,%a3; \
xor table+2*tlen(,%tmp,4),%a3;
// initialise output registers from the key schedule
// NB1: original value of a3 is in idx on exit
// NB2: original values of a1,a2,a4 aren't used
#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
mov 0 sched,%a1; \
movzx %l(idx),%tmp; \
mov 4 sched,%a2; \
xor table(,%tmp,4),%a1; \
mov 12 sched,%a4; \
movzx %h(idx),%tmp; \
shr $16,%idx; \
xor table+tlen(,%tmp,4),%a2; \
movzx %l(idx),%tmp; \
movzx %h(idx),%idx; \
xor table+3*tlen(,%idx,4),%a4; \
mov %a3,%idx; \
mov 8 sched,%a3; \
xor table+2*tlen(,%tmp,4),%a3;
// original Gladman had conditional saves to MMX regs.
#define save(a1, a2) \
mov %a2,4*a1(%esp)
#define restore(a1, a2) \
mov 4*a2(%esp),%a1
// These macros perform a forward encryption cycle. They are entered with
// the first previous round column values in r0,r1,r4,r5 and
// exit with the final values in the same registers, using stack
// for temporary storage.
// round column values
// on entry: r0,r1,r4,r5
// on exit: r2,r1,r4,r5
#define fwd_rnd1(arg, table) \
save (0,r1); \
save (1,r5); \
\
/* compute new column values */ \
do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \
do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \
restore(r0,0); \
do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \
restore(r0,1); \
do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */
// round column values
// on entry: r2,r1,r4,r5
// on exit: r0,r1,r4,r5
#define fwd_rnd2(arg, table) \
save (0,r1); \
save (1,r5); \
\
/* compute new column values */ \
do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \
do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \
restore(r2,0); \
do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \
restore(r2,1); \
do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */
// These macros performs an inverse encryption cycle. They are entered with
// the first previous round column values in r0,r1,r4,r5 and
// exit with the final values in the same registers, using stack
// for temporary storage
// round column values
// on entry: r0,r1,r4,r5
// on exit: r2,r1,r4,r5
#define inv_rnd1(arg, table) \
save (0,r1); \
save (1,r5); \
\
/* compute new column values */ \
do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \
do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \
restore(r0,0); \
do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \
restore(r0,1); \
do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */
// round column values
// on entry: r2,r1,r4,r5
// on exit: r0,r1,r4,r5
#define inv_rnd2(arg, table) \
save (0,r1); \
save (1,r5); \
\
/* compute new column values */ \
do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \
do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \
restore(r2,0); \
do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \
restore(r2,1); \
do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */
// AES (Rijndael) Encryption Subroutine
/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
.extern crypto_ft_tab
.extern crypto_fl_tab
ENTRY(aes_enc_blk)
push %ebp
mov ctx(%esp),%ebp
// CAUTION: the order and the values used in these assigns
// rely on the register mappings
1: push %ebx
mov in_blk+4(%esp),%r2
push %esi
mov klen(%ebp),%r3 // key size
push %edi
#if ekey != 0
lea ekey(%ebp),%ebp // key pointer
#endif
// input four columns and xor in first round key
mov (%r2),%r0
mov 4(%r2),%r1
mov 8(%r2),%r4
mov 12(%r2),%r5
xor (%ebp),%r0
xor 4(%ebp),%r1
xor 8(%ebp),%r4
xor 12(%ebp),%r5
sub $8,%esp // space for register saves on stack
add $16,%ebp // increment to next round key
cmp $24,%r3
jb 4f // 10 rounds for 128-bit key
lea 32(%ebp),%ebp
je 3f // 12 rounds for 192-bit key
lea 32(%ebp),%ebp
2: fwd_rnd1( -64(%ebp), crypto_ft_tab) // 14 rounds for 256-bit key
fwd_rnd2( -48(%ebp), crypto_ft_tab)
3: fwd_rnd1( -32(%ebp), crypto_ft_tab) // 12 rounds for 192-bit key
fwd_rnd2( -16(%ebp), crypto_ft_tab)
4: fwd_rnd1( (%ebp), crypto_ft_tab) // 10 rounds for 128-bit key
fwd_rnd2( +16(%ebp), crypto_ft_tab)
fwd_rnd1( +32(%ebp), crypto_ft_tab)
fwd_rnd2( +48(%ebp), crypto_ft_tab)
fwd_rnd1( +64(%ebp), crypto_ft_tab)
fwd_rnd2( +80(%ebp), crypto_ft_tab)
fwd_rnd1( +96(%ebp), crypto_ft_tab)
fwd_rnd2(+112(%ebp), crypto_ft_tab)
fwd_rnd1(+128(%ebp), crypto_ft_tab)
fwd_rnd2(+144(%ebp), crypto_fl_tab) // last round uses a different table
// move final values to the output array. CAUTION: the
// order of these assigns rely on the register mappings
add $8,%esp
mov out_blk+12(%esp),%ebp
mov %r5,12(%ebp)
pop %edi
mov %r4,8(%ebp)
pop %esi
mov %r1,4(%ebp)
pop %ebx
mov %r0,(%ebp)
pop %ebp
ret
ENDPROC(aes_enc_blk)
// AES (Rijndael) Decryption Subroutine
/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
.extern crypto_it_tab
.extern crypto_il_tab
ENTRY(aes_dec_blk)
push %ebp
mov ctx(%esp),%ebp
// CAUTION: the order and the values used in these assigns
// rely on the register mappings
1: push %ebx
mov in_blk+4(%esp),%r2
push %esi
mov klen(%ebp),%r3 // key size
push %edi
#if dkey != 0
lea dkey(%ebp),%ebp // key pointer
#endif
// input four columns and xor in first round key
mov (%r2),%r0
mov 4(%r2),%r1
mov 8(%r2),%r4
mov 12(%r2),%r5
xor (%ebp),%r0
xor 4(%ebp),%r1
xor 8(%ebp),%r4
xor 12(%ebp),%r5
sub $8,%esp // space for register saves on stack
add $16,%ebp // increment to next round key
cmp $24,%r3
jb 4f // 10 rounds for 128-bit key
lea 32(%ebp),%ebp
je 3f // 12 rounds for 192-bit key
lea 32(%ebp),%ebp
2: inv_rnd1( -64(%ebp), crypto_it_tab) // 14 rounds for 256-bit key
inv_rnd2( -48(%ebp), crypto_it_tab)
3: inv_rnd1( -32(%ebp), crypto_it_tab) // 12 rounds for 192-bit key
inv_rnd2( -16(%ebp), crypto_it_tab)
4: inv_rnd1( (%ebp), crypto_it_tab) // 10 rounds for 128-bit key
inv_rnd2( +16(%ebp), crypto_it_tab)
inv_rnd1( +32(%ebp), crypto_it_tab)
inv_rnd2( +48(%ebp), crypto_it_tab)
inv_rnd1( +64(%ebp), crypto_it_tab)
inv_rnd2( +80(%ebp), crypto_it_tab)
inv_rnd1( +96(%ebp), crypto_it_tab)
inv_rnd2(+112(%ebp), crypto_it_tab)
inv_rnd1(+128(%ebp), crypto_it_tab)
inv_rnd2(+144(%ebp), crypto_il_tab) // last round uses a different table
// move final values to the output array. CAUTION: the
// order of these assigns rely on the register mappings
add $8,%esp
mov out_blk+12(%esp),%ebp
mov %r5,12(%ebp)
pop %edi
mov %r4,8(%ebp)
pop %esi
mov %r1,4(%ebp)
pop %ebx
mov %r0,(%ebp)
pop %ebp
ret
ENDPROC(aes_dec_blk)

View file

@ -0,0 +1,188 @@
/* AES (Rijndael) implementation (FIPS PUB 197) for x86_64
*
* Copyright (C) 2005 Andreas Steinmetz, <ast@domdv.de>
*
* License:
* This code can be distributed under the terms of the GNU General Public
* License (GPL) Version 2 provided that the above header down to and
* including this sentence is retained in full.
*/
.extern crypto_ft_tab
.extern crypto_it_tab
.extern crypto_fl_tab
.extern crypto_il_tab
.text
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
#define R1 %rax
#define R1E %eax
#define R1X %ax
#define R1H %ah
#define R1L %al
#define R2 %rbx
#define R2E %ebx
#define R2X %bx
#define R2H %bh
#define R2L %bl
#define R3 %rcx
#define R3E %ecx
#define R3X %cx
#define R3H %ch
#define R3L %cl
#define R4 %rdx
#define R4E %edx
#define R4X %dx
#define R4H %dh
#define R4L %dl
#define R5 %rsi
#define R5E %esi
#define R6 %rdi
#define R6E %edi
#define R7 %rbp
#define R7E %ebp
#define R8 %r8
#define R9 %r9
#define R10 %r10
#define R11 %r11
#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
ENTRY(FUNC); \
movq r1,r2; \
movq r3,r4; \
leaq KEY+48(r8),r9; \
movq r10,r11; \
movl (r7),r5 ## E; \
movl 4(r7),r1 ## E; \
movl 8(r7),r6 ## E; \
movl 12(r7),r7 ## E; \
movl 480(r8),r10 ## E; \
xorl -48(r9),r5 ## E; \
xorl -44(r9),r1 ## E; \
xorl -40(r9),r6 ## E; \
xorl -36(r9),r7 ## E; \
cmpl $24,r10 ## E; \
jb B128; \
leaq 32(r9),r9; \
je B192; \
leaq 32(r9),r9;
#define epilogue(FUNC,r1,r2,r3,r4,r5,r6,r7,r8,r9) \
movq r1,r2; \
movq r3,r4; \
movl r5 ## E,(r9); \
movl r6 ## E,4(r9); \
movl r7 ## E,8(r9); \
movl r8 ## E,12(r9); \
ret; \
ENDPROC(FUNC);
#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
movzbl r2 ## H,r5 ## E; \
movzbl r2 ## L,r6 ## E; \
movl TAB+1024(,r5,4),r5 ## E;\
movw r4 ## X,r2 ## X; \
movl TAB(,r6,4),r6 ## E; \
roll $16,r2 ## E; \
shrl $16,r4 ## E; \
movzbl r4 ## H,r7 ## E; \
movzbl r4 ## L,r4 ## E; \
xorl OFFSET(r8),ra ## E; \
xorl OFFSET+4(r8),rb ## E; \
xorl TAB+3072(,r7,4),r5 ## E;\
xorl TAB+2048(,r4,4),r6 ## E;\
movzbl r1 ## L,r7 ## E; \
movzbl r1 ## H,r4 ## E; \
movl TAB+1024(,r4,4),r4 ## E;\
movw r3 ## X,r1 ## X; \
roll $16,r1 ## E; \
shrl $16,r3 ## E; \
xorl TAB(,r7,4),r5 ## E; \
movzbl r3 ## H,r7 ## E; \
movzbl r3 ## L,r3 ## E; \
xorl TAB+3072(,r7,4),r4 ## E;\
xorl TAB+2048(,r3,4),r5 ## E;\
movzbl r1 ## H,r7 ## E; \
movzbl r1 ## L,r3 ## E; \
shrl $16,r1 ## E; \
xorl TAB+3072(,r7,4),r6 ## E;\
movl TAB+2048(,r3,4),r3 ## E;\
movzbl r1 ## H,r7 ## E; \
movzbl r1 ## L,r1 ## E; \
xorl TAB+1024(,r7,4),r6 ## E;\
xorl TAB(,r1,4),r3 ## E; \
movzbl r2 ## H,r1 ## E; \
movzbl r2 ## L,r7 ## E; \
shrl $16,r2 ## E; \
xorl TAB+3072(,r1,4),r3 ## E;\
xorl TAB+2048(,r7,4),r4 ## E;\
movzbl r2 ## H,r1 ## E; \
movzbl r2 ## L,r2 ## E; \
xorl OFFSET+8(r8),rc ## E; \
xorl OFFSET+12(r8),rd ## E; \
xorl TAB+1024(,r1,4),r3 ## E;\
xorl TAB(,r2,4),r4 ## E;
#define move_regs(r1,r2,r3,r4) \
movl r3 ## E,r1 ## E; \
movl r4 ## E,r2 ## E;
#define entry(FUNC,KEY,B128,B192) \
prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
#define return(FUNC) epilogue(FUNC,R8,R2,R9,R7,R5,R6,R3,R4,R11)
#define encrypt_round(TAB,OFFSET) \
round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
move_regs(R1,R2,R5,R6)
#define encrypt_final(TAB,OFFSET) \
round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
#define decrypt_round(TAB,OFFSET) \
round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \
move_regs(R1,R2,R5,R6)
#define decrypt_final(TAB,OFFSET) \
round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)
/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */
entry(aes_enc_blk,0,.Le128,.Le192)
encrypt_round(crypto_ft_tab,-96)
encrypt_round(crypto_ft_tab,-80)
.Le192: encrypt_round(crypto_ft_tab,-64)
encrypt_round(crypto_ft_tab,-48)
.Le128: encrypt_round(crypto_ft_tab,-32)
encrypt_round(crypto_ft_tab,-16)
encrypt_round(crypto_ft_tab, 0)
encrypt_round(crypto_ft_tab, 16)
encrypt_round(crypto_ft_tab, 32)
encrypt_round(crypto_ft_tab, 48)
encrypt_round(crypto_ft_tab, 64)
encrypt_round(crypto_ft_tab, 80)
encrypt_round(crypto_ft_tab, 96)
encrypt_final(crypto_fl_tab,112)
return(aes_enc_blk)
/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */
entry(aes_dec_blk,240,.Ld128,.Ld192)
decrypt_round(crypto_it_tab,-96)
decrypt_round(crypto_it_tab,-80)
.Ld192: decrypt_round(crypto_it_tab,-64)
decrypt_round(crypto_it_tab,-48)
.Ld128: decrypt_round(crypto_it_tab,-32)
decrypt_round(crypto_it_tab,-16)
decrypt_round(crypto_it_tab, 0)
decrypt_round(crypto_it_tab, 16)
decrypt_round(crypto_it_tab, 32)
decrypt_round(crypto_it_tab, 48)
decrypt_round(crypto_it_tab, 64)
decrypt_round(crypto_it_tab, 80)
decrypt_round(crypto_it_tab, 96)
decrypt_final(crypto_il_tab,112)
return(aes_dec_blk)

View file

@ -0,0 +1,580 @@
/*
* Implement AES CTR mode by8 optimization with AVX instructions. (x86_64)
*
* This is AES128/192/256 CTR mode optimization implementation. It requires
* the support of Intel(R) AESNI and AVX instructions.
*
* This work was inspired by the AES CTR mode optimization published
* in Intel Optimized IPSEC Cryptograhpic library.
* Additional information on it can be found at:
* http://downloadcenter.intel.com/Detail_Desc.aspx?agr=Y&DwnldID=22972
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2014 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* Contact Information:
* James Guilford <james.guilford@intel.com>
* Sean Gulley <sean.m.gulley@intel.com>
* Chandramouli Narayanan <mouli@linux.intel.com>
*
* BSD LICENSE
*
* Copyright(c) 2014 Intel Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <linux/linkage.h>
#include <asm/inst.h>
#define CONCAT(a,b) a##b
#define VMOVDQ vmovdqu
#define xdata0 %xmm0
#define xdata1 %xmm1
#define xdata2 %xmm2
#define xdata3 %xmm3
#define xdata4 %xmm4
#define xdata5 %xmm5
#define xdata6 %xmm6
#define xdata7 %xmm7
#define xcounter %xmm8
#define xbyteswap %xmm9
#define xkey0 %xmm10
#define xkey4 %xmm11
#define xkey8 %xmm12
#define xkey12 %xmm13
#define xkeyA %xmm14
#define xkeyB %xmm15
#define p_in %rdi
#define p_iv %rsi
#define p_keys %rdx
#define p_out %rcx
#define num_bytes %r8
#define tmp %r10
#define DDQ(i) CONCAT(ddq_add_,i)
#define XMM(i) CONCAT(%xmm, i)
#define DDQ_DATA 0
#define XDATA 1
#define KEY_128 1
#define KEY_192 2
#define KEY_256 3
.section .rodata
.align 16
byteswap_const:
.octa 0x000102030405060708090A0B0C0D0E0F
ddq_low_msk:
.octa 0x0000000000000000FFFFFFFFFFFFFFFF
ddq_high_add_1:
.octa 0x00000000000000010000000000000000
ddq_add_1:
.octa 0x00000000000000000000000000000001
ddq_add_2:
.octa 0x00000000000000000000000000000002
ddq_add_3:
.octa 0x00000000000000000000000000000003
ddq_add_4:
.octa 0x00000000000000000000000000000004
ddq_add_5:
.octa 0x00000000000000000000000000000005
ddq_add_6:
.octa 0x00000000000000000000000000000006
ddq_add_7:
.octa 0x00000000000000000000000000000007
ddq_add_8:
.octa 0x00000000000000000000000000000008
.text
/* generate a unique variable for ddq_add_x */
.macro setddq n
var_ddq_add = DDQ(\n)
.endm
/* generate a unique variable for xmm register */
.macro setxdata n
var_xdata = XMM(\n)
.endm
/* club the numeric 'id' to the symbol 'name' */
.macro club name, id
.altmacro
.if \name == DDQ_DATA
setddq %\id
.elseif \name == XDATA
setxdata %\id
.endif
.noaltmacro
.endm
/*
* do_aes num_in_par load_keys key_len
* This increments p_in, but not p_out
*/
.macro do_aes b, k, key_len
.set by, \b
.set load_keys, \k
.set klen, \key_len
.if (load_keys)
vmovdqa 0*16(p_keys), xkey0
.endif
vpshufb xbyteswap, xcounter, xdata0
.set i, 1
.rept (by - 1)
club DDQ_DATA, i
club XDATA, i
vpaddq var_ddq_add(%rip), xcounter, var_xdata
vptest ddq_low_msk(%rip), var_xdata
jnz 1f
vpaddq ddq_high_add_1(%rip), var_xdata, var_xdata
vpaddq ddq_high_add_1(%rip), xcounter, xcounter
1:
vpshufb xbyteswap, var_xdata, var_xdata
.set i, (i +1)
.endr
vmovdqa 1*16(p_keys), xkeyA
vpxor xkey0, xdata0, xdata0
club DDQ_DATA, by
vpaddq var_ddq_add(%rip), xcounter, xcounter
vptest ddq_low_msk(%rip), xcounter
jnz 1f
vpaddq ddq_high_add_1(%rip), xcounter, xcounter
1:
.set i, 1
.rept (by - 1)
club XDATA, i
vpxor xkey0, var_xdata, var_xdata
.set i, (i +1)
.endr
vmovdqa 2*16(p_keys), xkeyB
.set i, 0
.rept by
club XDATA, i
vaesenc xkeyA, var_xdata, var_xdata /* key 1 */
.set i, (i +1)
.endr
.if (klen == KEY_128)
.if (load_keys)
vmovdqa 3*16(p_keys), xkey4
.endif
.else
vmovdqa 3*16(p_keys), xkeyA
.endif
.set i, 0
.rept by
club XDATA, i
vaesenc xkeyB, var_xdata, var_xdata /* key 2 */
.set i, (i +1)
.endr
add $(16*by), p_in
.if (klen == KEY_128)
vmovdqa 4*16(p_keys), xkeyB
.else
.if (load_keys)
vmovdqa 4*16(p_keys), xkey4
.endif
.endif
.set i, 0
.rept by
club XDATA, i
/* key 3 */
.if (klen == KEY_128)
vaesenc xkey4, var_xdata, var_xdata
.else
vaesenc xkeyA, var_xdata, var_xdata
.endif
.set i, (i +1)
.endr
vmovdqa 5*16(p_keys), xkeyA
.set i, 0
.rept by
club XDATA, i
/* key 4 */
.if (klen == KEY_128)
vaesenc xkeyB, var_xdata, var_xdata
.else
vaesenc xkey4, var_xdata, var_xdata
.endif
.set i, (i +1)
.endr
.if (klen == KEY_128)
.if (load_keys)
vmovdqa 6*16(p_keys), xkey8
.endif
.else
vmovdqa 6*16(p_keys), xkeyB
.endif
.set i, 0
.rept by
club XDATA, i
vaesenc xkeyA, var_xdata, var_xdata /* key 5 */
.set i, (i +1)
.endr
vmovdqa 7*16(p_keys), xkeyA
.set i, 0
.rept by
club XDATA, i
/* key 6 */
.if (klen == KEY_128)
vaesenc xkey8, var_xdata, var_xdata
.else
vaesenc xkeyB, var_xdata, var_xdata
.endif
.set i, (i +1)
.endr
.if (klen == KEY_128)
vmovdqa 8*16(p_keys), xkeyB
.else
.if (load_keys)
vmovdqa 8*16(p_keys), xkey8
.endif
.endif
.set i, 0
.rept by
club XDATA, i
vaesenc xkeyA, var_xdata, var_xdata /* key 7 */
.set i, (i +1)
.endr
.if (klen == KEY_128)
.if (load_keys)
vmovdqa 9*16(p_keys), xkey12
.endif
.else
vmovdqa 9*16(p_keys), xkeyA
.endif
.set i, 0
.rept by
club XDATA, i
/* key 8 */
.if (klen == KEY_128)
vaesenc xkeyB, var_xdata, var_xdata
.else
vaesenc xkey8, var_xdata, var_xdata
.endif
.set i, (i +1)
.endr
vmovdqa 10*16(p_keys), xkeyB
.set i, 0
.rept by
club XDATA, i
/* key 9 */
.if (klen == KEY_128)
vaesenc xkey12, var_xdata, var_xdata
.else
vaesenc xkeyA, var_xdata, var_xdata
.endif
.set i, (i +1)
.endr
.if (klen != KEY_128)
vmovdqa 11*16(p_keys), xkeyA
.endif
.set i, 0
.rept by
club XDATA, i
/* key 10 */
.if (klen == KEY_128)
vaesenclast xkeyB, var_xdata, var_xdata
.else
vaesenc xkeyB, var_xdata, var_xdata
.endif
.set i, (i +1)
.endr
.if (klen != KEY_128)
.if (load_keys)
vmovdqa 12*16(p_keys), xkey12
.endif
.set i, 0
.rept by
club XDATA, i
vaesenc xkeyA, var_xdata, var_xdata /* key 11 */
.set i, (i +1)
.endr
.if (klen == KEY_256)
vmovdqa 13*16(p_keys), xkeyA
.endif
.set i, 0
.rept by
club XDATA, i
.if (klen == KEY_256)
/* key 12 */
vaesenc xkey12, var_xdata, var_xdata
.else
vaesenclast xkey12, var_xdata, var_xdata
.endif
.set i, (i +1)
.endr
.if (klen == KEY_256)
vmovdqa 14*16(p_keys), xkeyB
.set i, 0
.rept by
club XDATA, i
/* key 13 */
vaesenc xkeyA, var_xdata, var_xdata
.set i, (i +1)
.endr
.set i, 0
.rept by
club XDATA, i
/* key 14 */
vaesenclast xkeyB, var_xdata, var_xdata
.set i, (i +1)
.endr
.endif
.endif
.set i, 0
.rept (by / 2)
.set j, (i+1)
VMOVDQ (i*16 - 16*by)(p_in), xkeyA
VMOVDQ (j*16 - 16*by)(p_in), xkeyB
club XDATA, i
vpxor xkeyA, var_xdata, var_xdata
club XDATA, j
vpxor xkeyB, var_xdata, var_xdata
.set i, (i+2)
.endr
.if (i < by)
VMOVDQ (i*16 - 16*by)(p_in), xkeyA
club XDATA, i
vpxor xkeyA, var_xdata, var_xdata
.endif
.set i, 0
.rept by
club XDATA, i
VMOVDQ var_xdata, i*16(p_out)
.set i, (i+1)
.endr
.endm
.macro do_aes_load val, key_len
do_aes \val, 1, \key_len
.endm
.macro do_aes_noload val, key_len
do_aes \val, 0, \key_len
.endm
/* main body of aes ctr load */
.macro do_aes_ctrmain key_len
cmp $16, num_bytes
jb .Ldo_return2\key_len
vmovdqa byteswap_const(%rip), xbyteswap
vmovdqu (p_iv), xcounter
vpshufb xbyteswap, xcounter, xcounter
mov num_bytes, tmp
and $(7*16), tmp
jz .Lmult_of_8_blks\key_len
/* 1 <= tmp <= 7 */
cmp $(4*16), tmp
jg .Lgt4\key_len
je .Leq4\key_len
.Llt4\key_len:
cmp $(2*16), tmp
jg .Leq3\key_len
je .Leq2\key_len
.Leq1\key_len:
do_aes_load 1, \key_len
add $(1*16), p_out
and $(~7*16), num_bytes
jz .Ldo_return2\key_len
jmp .Lmain_loop2\key_len
.Leq2\key_len:
do_aes_load 2, \key_len
add $(2*16), p_out
and $(~7*16), num_bytes
jz .Ldo_return2\key_len
jmp .Lmain_loop2\key_len
.Leq3\key_len:
do_aes_load 3, \key_len
add $(3*16), p_out
and $(~7*16), num_bytes
jz .Ldo_return2\key_len
jmp .Lmain_loop2\key_len
.Leq4\key_len:
do_aes_load 4, \key_len
add $(4*16), p_out
and $(~7*16), num_bytes
jz .Ldo_return2\key_len
jmp .Lmain_loop2\key_len
.Lgt4\key_len:
cmp $(6*16), tmp
jg .Leq7\key_len
je .Leq6\key_len
.Leq5\key_len:
do_aes_load 5, \key_len
add $(5*16), p_out
and $(~7*16), num_bytes
jz .Ldo_return2\key_len
jmp .Lmain_loop2\key_len
.Leq6\key_len:
do_aes_load 6, \key_len
add $(6*16), p_out
and $(~7*16), num_bytes
jz .Ldo_return2\key_len
jmp .Lmain_loop2\key_len
.Leq7\key_len:
do_aes_load 7, \key_len
add $(7*16), p_out
and $(~7*16), num_bytes
jz .Ldo_return2\key_len
jmp .Lmain_loop2\key_len
.Lmult_of_8_blks\key_len:
.if (\key_len != KEY_128)
vmovdqa 0*16(p_keys), xkey0
vmovdqa 4*16(p_keys), xkey4
vmovdqa 8*16(p_keys), xkey8
vmovdqa 12*16(p_keys), xkey12
.else
vmovdqa 0*16(p_keys), xkey0
vmovdqa 3*16(p_keys), xkey4
vmovdqa 6*16(p_keys), xkey8
vmovdqa 9*16(p_keys), xkey12
.endif
.align 16
.Lmain_loop2\key_len:
/* num_bytes is a multiple of 8 and >0 */
do_aes_noload 8, \key_len
add $(8*16), p_out
sub $(8*16), num_bytes
jne .Lmain_loop2\key_len
.Ldo_return2\key_len:
/* return updated IV */
vpshufb xbyteswap, xcounter, xcounter
vmovdqu xcounter, (p_iv)
ret
.endm
/*
* routine to do AES128 CTR enc/decrypt "by8"
* XMM registers are clobbered.
* Saving/restoring must be done at a higher level
* aes_ctr_enc_128_avx_by8(void *in, void *iv, void *keys, void *out,
* unsigned int num_bytes)
*/
ENTRY(aes_ctr_enc_128_avx_by8)
/* call the aes main loop */
do_aes_ctrmain KEY_128
ENDPROC(aes_ctr_enc_128_avx_by8)
/*
* routine to do AES192 CTR enc/decrypt "by8"
* XMM registers are clobbered.
* Saving/restoring must be done at a higher level
* aes_ctr_enc_192_avx_by8(void *in, void *iv, void *keys, void *out,
* unsigned int num_bytes)
*/
ENTRY(aes_ctr_enc_192_avx_by8)
/* call the aes main loop */
do_aes_ctrmain KEY_192
ENDPROC(aes_ctr_enc_192_avx_by8)
/*
* routine to do AES256 CTR enc/decrypt "by8"
* XMM registers are clobbered.
* Saving/restoring must be done at a higher level
* aes_ctr_enc_256_avx_by8(void *in, void *iv, void *keys, void *out,
* unsigned int num_bytes)
*/
ENTRY(aes_ctr_enc_256_avx_by8)
/* call the aes main loop */
do_aes_ctrmain KEY_256
ENDPROC(aes_ctr_enc_256_avx_by8)

View file

@ -0,0 +1,70 @@
/*
* Glue Code for the asm optimized version of the AES Cipher Algorithm
*
*/
#include <linux/module.h>
#include <crypto/aes.h>
#include <asm/crypto/aes.h>
asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
{
aes_enc_blk(ctx, dst, src);
}
EXPORT_SYMBOL_GPL(crypto_aes_encrypt_x86);
void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
{
aes_dec_blk(ctx, dst, src);
}
EXPORT_SYMBOL_GPL(crypto_aes_decrypt_x86);
static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
aes_enc_blk(crypto_tfm_ctx(tfm), dst, src);
}
static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
aes_dec_blk(crypto_tfm_ctx(tfm), dst, src);
}
static struct crypto_alg aes_alg = {
.cra_name = "aes",
.cra_driver_name = "aes-asm",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_module = THIS_MODULE,
.cra_u = {
.cipher = {
.cia_min_keysize = AES_MIN_KEY_SIZE,
.cia_max_keysize = AES_MAX_KEY_SIZE,
.cia_setkey = crypto_aes_set_key,
.cia_encrypt = aes_encrypt,
.cia_decrypt = aes_decrypt
}
}
};
static int __init aes_init(void)
{
return crypto_register_alg(&aes_alg);
}
static void __exit aes_fini(void)
{
crypto_unregister_alg(&aes_alg);
}
module_init(aes_init);
module_exit(aes_fini);
MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, asm optimized");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("aes");
MODULE_ALIAS_CRYPTO("aes-asm");

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,379 @@
/*
* Blowfish Cipher Algorithm (x86_64)
*
* Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
#include <linux/linkage.h>
.file "blowfish-x86_64-asm.S"
.text
/* structure of crypto context */
#define p 0
#define s0 ((16 + 2) * 4)
#define s1 ((16 + 2 + (1 * 256)) * 4)
#define s2 ((16 + 2 + (2 * 256)) * 4)
#define s3 ((16 + 2 + (3 * 256)) * 4)
/* register macros */
#define CTX %rdi
#define RIO %rsi
#define RX0 %rax
#define RX1 %rbx
#define RX2 %rcx
#define RX3 %rdx
#define RX0d %eax
#define RX1d %ebx
#define RX2d %ecx
#define RX3d %edx
#define RX0bl %al
#define RX1bl %bl
#define RX2bl %cl
#define RX3bl %dl
#define RX0bh %ah
#define RX1bh %bh
#define RX2bh %ch
#define RX3bh %dh
#define RT0 %rbp
#define RT1 %rsi
#define RT2 %r8
#define RT3 %r9
#define RT0d %ebp
#define RT1d %esi
#define RT2d %r8d
#define RT3d %r9d
#define RKEY %r10
/***********************************************************************
* 1-way blowfish
***********************************************************************/
#define F() \
rorq $16, RX0; \
movzbl RX0bh, RT0d; \
movzbl RX0bl, RT1d; \
rolq $16, RX0; \
movl s0(CTX,RT0,4), RT0d; \
addl s1(CTX,RT1,4), RT0d; \
movzbl RX0bh, RT1d; \
movzbl RX0bl, RT2d; \
rolq $32, RX0; \
xorl s2(CTX,RT1,4), RT0d; \
addl s3(CTX,RT2,4), RT0d; \
xorq RT0, RX0;
#define add_roundkey_enc(n) \
xorq p+4*(n)(CTX), RX0;
#define round_enc(n) \
add_roundkey_enc(n); \
\
F(); \
F();
#define add_roundkey_dec(n) \
movq p+4*(n-1)(CTX), RT0; \
rorq $32, RT0; \
xorq RT0, RX0;
#define round_dec(n) \
add_roundkey_dec(n); \
\
F(); \
F(); \
#define read_block() \
movq (RIO), RX0; \
rorq $32, RX0; \
bswapq RX0;
#define write_block() \
bswapq RX0; \
movq RX0, (RIO);
#define xor_block() \
bswapq RX0; \
xorq RX0, (RIO);
ENTRY(__blowfish_enc_blk)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: bool, if true: xor output
*/
movq %rbp, %r11;
movq %rsi, %r10;
movq %rdx, RIO;
read_block();
round_enc(0);
round_enc(2);
round_enc(4);
round_enc(6);
round_enc(8);
round_enc(10);
round_enc(12);
round_enc(14);
add_roundkey_enc(16);
movq %r11, %rbp;
movq %r10, RIO;
test %cl, %cl;
jnz .L__enc_xor;
write_block();
ret;
.L__enc_xor:
xor_block();
ret;
ENDPROC(__blowfish_enc_blk)
ENTRY(blowfish_dec_blk)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
movq %rbp, %r11;
movq %rsi, %r10;
movq %rdx, RIO;
read_block();
round_dec(17);
round_dec(15);
round_dec(13);
round_dec(11);
round_dec(9);
round_dec(7);
round_dec(5);
round_dec(3);
add_roundkey_dec(1);
movq %r10, RIO;
write_block();
movq %r11, %rbp;
ret;
ENDPROC(blowfish_dec_blk)
/**********************************************************************
4-way blowfish, four blocks parallel
**********************************************************************/
/* F() for 4-way. Slower when used alone/1-way, but faster when used
* parallel/4-way (tested on AMD Phenom II & Intel Xeon E7330).
*/
#define F4(x) \
movzbl x ## bh, RT1d; \
movzbl x ## bl, RT3d; \
rorq $16, x; \
movzbl x ## bh, RT0d; \
movzbl x ## bl, RT2d; \
rorq $16, x; \
movl s0(CTX,RT0,4), RT0d; \
addl s1(CTX,RT2,4), RT0d; \
xorl s2(CTX,RT1,4), RT0d; \
addl s3(CTX,RT3,4), RT0d; \
xorq RT0, x;
#define add_preloaded_roundkey4() \
xorq RKEY, RX0; \
xorq RKEY, RX1; \
xorq RKEY, RX2; \
xorq RKEY, RX3;
#define preload_roundkey_enc(n) \
movq p+4*(n)(CTX), RKEY;
#define add_roundkey_enc4(n) \
add_preloaded_roundkey4(); \
preload_roundkey_enc(n + 2);
#define round_enc4(n) \
add_roundkey_enc4(n); \
\
F4(RX0); \
F4(RX1); \
F4(RX2); \
F4(RX3); \
\
F4(RX0); \
F4(RX1); \
F4(RX2); \
F4(RX3);
#define preload_roundkey_dec(n) \
movq p+4*((n)-1)(CTX), RKEY; \
rorq $32, RKEY;
#define add_roundkey_dec4(n) \
add_preloaded_roundkey4(); \
preload_roundkey_dec(n - 2);
#define round_dec4(n) \
add_roundkey_dec4(n); \
\
F4(RX0); \
F4(RX1); \
F4(RX2); \
F4(RX3); \
\
F4(RX0); \
F4(RX1); \
F4(RX2); \
F4(RX3);
#define read_block4() \
movq (RIO), RX0; \
rorq $32, RX0; \
bswapq RX0; \
\
movq 8(RIO), RX1; \
rorq $32, RX1; \
bswapq RX1; \
\
movq 16(RIO), RX2; \
rorq $32, RX2; \
bswapq RX2; \
\
movq 24(RIO), RX3; \
rorq $32, RX3; \
bswapq RX3;
#define write_block4() \
bswapq RX0; \
movq RX0, (RIO); \
\
bswapq RX1; \
movq RX1, 8(RIO); \
\
bswapq RX2; \
movq RX2, 16(RIO); \
\
bswapq RX3; \
movq RX3, 24(RIO);
#define xor_block4() \
bswapq RX0; \
xorq RX0, (RIO); \
\
bswapq RX1; \
xorq RX1, 8(RIO); \
\
bswapq RX2; \
xorq RX2, 16(RIO); \
\
bswapq RX3; \
xorq RX3, 24(RIO);
ENTRY(__blowfish_enc_blk_4way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: bool, if true: xor output
*/
pushq %rbp;
pushq %rbx;
pushq %rcx;
preload_roundkey_enc(0);
movq %rsi, %r11;
movq %rdx, RIO;
read_block4();
round_enc4(0);
round_enc4(2);
round_enc4(4);
round_enc4(6);
round_enc4(8);
round_enc4(10);
round_enc4(12);
round_enc4(14);
add_preloaded_roundkey4();
popq %rbp;
movq %r11, RIO;
test %bpl, %bpl;
jnz .L__enc_xor4;
write_block4();
popq %rbx;
popq %rbp;
ret;
.L__enc_xor4:
xor_block4();
popq %rbx;
popq %rbp;
ret;
ENDPROC(__blowfish_enc_blk_4way)
ENTRY(blowfish_dec_blk_4way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
pushq %rbp;
pushq %rbx;
preload_roundkey_dec(17);
movq %rsi, %r11;
movq %rdx, RIO;
read_block4();
round_dec4(17);
round_dec4(15);
round_dec4(13);
round_dec4(11);
round_dec4(9);
round_dec4(7);
round_dec4(5);
round_dec4(3);
add_preloaded_roundkey4();
movq %r11, RIO;
write_block4();
popq %rbx;
popq %rbp;
ret;
ENDPROC(blowfish_dec_blk_4way)

View file

@ -0,0 +1,482 @@
/*
* Glue Code for assembler optimized version of Blowfish
*
* Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
* CTR part based on code (crypto/ctr.c) by:
* (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
#include <asm/processor.h>
#include <crypto/blowfish.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/types.h>
#include <crypto/algapi.h>
/* regular block cipher functions */
asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
bool xor);
asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
/* 4-way parallel cipher functions */
asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src);
static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
{
__blowfish_enc_blk(ctx, dst, src, false);
}
static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
{
__blowfish_enc_blk(ctx, dst, src, true);
}
static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
{
__blowfish_enc_blk_4way(ctx, dst, src, false);
}
static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
{
__blowfish_enc_blk_4way(ctx, dst, src, true);
}
static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
blowfish_enc_blk(crypto_tfm_ctx(tfm), dst, src);
}
static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src);
}
static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
void (*fn)(struct bf_ctx *, u8 *, const u8 *),
void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *))
{
struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = BF_BLOCK_SIZE;
unsigned int nbytes;
int err;
err = blkcipher_walk_virt(desc, walk);
while ((nbytes = walk->nbytes)) {
u8 *wsrc = walk->src.virt.addr;
u8 *wdst = walk->dst.virt.addr;
/* Process four block batch */
if (nbytes >= bsize * 4) {
do {
fn_4way(ctx, wdst, wsrc);
wsrc += bsize * 4;
wdst += bsize * 4;
nbytes -= bsize * 4;
} while (nbytes >= bsize * 4);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
do {
fn(ctx, wdst, wsrc);
wsrc += bsize;
wdst += bsize;
nbytes -= bsize;
} while (nbytes >= bsize);
done:
err = blkcipher_walk_done(desc, walk, nbytes);
}
return err;
}
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
return ecb_crypt(desc, &walk, blowfish_enc_blk, blowfish_enc_blk_4way);
}
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way);
}
static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = BF_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
u64 *dst = (u64 *)walk->dst.virt.addr;
u64 *iv = (u64 *)walk->iv;
do {
*dst = *src ^ *iv;
blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
iv = dst;
src += 1;
dst += 1;
nbytes -= bsize;
} while (nbytes >= bsize);
*(u64 *)walk->iv = *iv;
return nbytes;
}
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
nbytes = __cbc_encrypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
return err;
}
static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = BF_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
u64 *dst = (u64 *)walk->dst.virt.addr;
u64 ivs[4 - 1];
u64 last_iv;
/* Start of the last block. */
src += nbytes / bsize - 1;
dst += nbytes / bsize - 1;
last_iv = *src;
/* Process four block batch */
if (nbytes >= bsize * 4) {
do {
nbytes -= bsize * 4 - bsize;
src -= 4 - 1;
dst -= 4 - 1;
ivs[0] = src[0];
ivs[1] = src[1];
ivs[2] = src[2];
blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src);
dst[1] ^= ivs[0];
dst[2] ^= ivs[1];
dst[3] ^= ivs[2];
nbytes -= bsize;
if (nbytes < bsize)
goto done;
*dst ^= *(src - 1);
src -= 1;
dst -= 1;
} while (nbytes >= bsize * 4);
}
/* Handle leftovers */
for (;;) {
blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
nbytes -= bsize;
if (nbytes < bsize)
break;
*dst ^= *(src - 1);
src -= 1;
dst -= 1;
}
done:
*dst ^= *(u64 *)walk->iv;
*(u64 *)walk->iv = last_iv;
return nbytes;
}
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
nbytes = __cbc_decrypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
return err;
}
static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
{
u8 *ctrblk = walk->iv;
u8 keystream[BF_BLOCK_SIZE];
u8 *src = walk->src.virt.addr;
u8 *dst = walk->dst.virt.addr;
unsigned int nbytes = walk->nbytes;
blowfish_enc_blk(ctx, keystream, ctrblk);
crypto_xor(keystream, src, nbytes);
memcpy(dst, keystream, nbytes);
crypto_inc(ctrblk, BF_BLOCK_SIZE);
}
static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = BF_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
u64 *dst = (u64 *)walk->dst.virt.addr;
u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
__be64 ctrblocks[4];
/* Process four block batch */
if (nbytes >= bsize * 4) {
do {
if (dst != src) {
dst[0] = src[0];
dst[1] = src[1];
dst[2] = src[2];
dst[3] = src[3];
}
/* create ctrblks for parallel encrypt */
ctrblocks[0] = cpu_to_be64(ctrblk++);
ctrblocks[1] = cpu_to_be64(ctrblk++);
ctrblocks[2] = cpu_to_be64(ctrblk++);
ctrblocks[3] = cpu_to_be64(ctrblk++);
blowfish_enc_blk_xor_4way(ctx, (u8 *)dst,
(u8 *)ctrblocks);
src += 4;
dst += 4;
} while ((nbytes -= bsize * 4) >= bsize * 4);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
do {
if (dst != src)
*dst = *src;
ctrblocks[0] = cpu_to_be64(ctrblk++);
blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks);
src += 1;
dst += 1;
} while ((nbytes -= bsize) >= bsize);
done:
*(__be64 *)walk->iv = cpu_to_be64(ctrblk);
return nbytes;
}
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE);
while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) {
nbytes = __ctr_crypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
if (walk.nbytes) {
ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk);
err = blkcipher_walk_done(desc, &walk, 0);
}
return err;
}
static struct crypto_alg bf_algs[4] = { {
.cra_name = "blowfish",
.cra_driver_name = "blowfish-asm",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = BF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct bf_ctx),
.cra_alignmask = 0,
.cra_module = THIS_MODULE,
.cra_u = {
.cipher = {
.cia_min_keysize = BF_MIN_KEY_SIZE,
.cia_max_keysize = BF_MAX_KEY_SIZE,
.cia_setkey = blowfish_setkey,
.cia_encrypt = blowfish_encrypt,
.cia_decrypt = blowfish_decrypt,
}
}
}, {
.cra_name = "ecb(blowfish)",
.cra_driver_name = "ecb-blowfish-asm",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = BF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct bf_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = BF_MIN_KEY_SIZE,
.max_keysize = BF_MAX_KEY_SIZE,
.setkey = blowfish_setkey,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
},
},
}, {
.cra_name = "cbc(blowfish)",
.cra_driver_name = "cbc-blowfish-asm",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = BF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct bf_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = BF_MIN_KEY_SIZE,
.max_keysize = BF_MAX_KEY_SIZE,
.ivsize = BF_BLOCK_SIZE,
.setkey = blowfish_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
},
},
}, {
.cra_name = "ctr(blowfish)",
.cra_driver_name = "ctr-blowfish-asm",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct bf_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = BF_MIN_KEY_SIZE,
.max_keysize = BF_MAX_KEY_SIZE,
.ivsize = BF_BLOCK_SIZE,
.setkey = blowfish_setkey,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
},
},
} };
static bool is_blacklisted_cpu(void)
{
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return false;
if (boot_cpu_data.x86 == 0x0f) {
/*
* On Pentium 4, blowfish-x86_64 is slower than generic C
* implementation because use of 64bit rotates (which are really
* slow on P4). Therefore blacklist P4s.
*/
return true;
}
return false;
}
static int force;
module_param(force, int, 0);
MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
static int __init init(void)
{
if (!force && is_blacklisted_cpu()) {
printk(KERN_INFO
"blowfish-x86_64: performance on this CPU "
"would be suboptimal: disabling "
"blowfish-x86_64.\n");
return -ENODEV;
}
return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs));
}
static void __exit fini(void)
{
crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs));
}
module_init(init);
module_exit(fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Blowfish Cipher Algorithm, asm optimized");
MODULE_ALIAS_CRYPTO("blowfish");
MODULE_ALIAS_CRYPTO("blowfish-asm");

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,514 @@
/*
* Camellia Cipher Algorithm (x86_64)
*
* Copyright (C) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
#include <linux/linkage.h>
.file "camellia-x86_64-asm_64.S"
.text
.extern camellia_sp10011110;
.extern camellia_sp22000222;
.extern camellia_sp03303033;
.extern camellia_sp00444404;
.extern camellia_sp02220222;
.extern camellia_sp30333033;
.extern camellia_sp44044404;
.extern camellia_sp11101110;
#define sp10011110 camellia_sp10011110
#define sp22000222 camellia_sp22000222
#define sp03303033 camellia_sp03303033
#define sp00444404 camellia_sp00444404
#define sp02220222 camellia_sp02220222
#define sp30333033 camellia_sp30333033
#define sp44044404 camellia_sp44044404
#define sp11101110 camellia_sp11101110
#define CAMELLIA_TABLE_BYTE_LEN 272
/* struct camellia_ctx: */
#define key_table 0
#define key_length CAMELLIA_TABLE_BYTE_LEN
/* register macros */
#define CTX %rdi
#define RIO %rsi
#define RIOd %esi
#define RAB0 %rax
#define RCD0 %rcx
#define RAB1 %rbx
#define RCD1 %rdx
#define RAB0d %eax
#define RCD0d %ecx
#define RAB1d %ebx
#define RCD1d %edx
#define RAB0bl %al
#define RCD0bl %cl
#define RAB1bl %bl
#define RCD1bl %dl
#define RAB0bh %ah
#define RCD0bh %ch
#define RAB1bh %bh
#define RCD1bh %dh
#define RT0 %rsi
#define RT1 %rbp
#define RT2 %r8
#define RT0d %esi
#define RT1d %ebp
#define RT2d %r8d
#define RT2bl %r8b
#define RXOR %r9
#define RRBP %r10
#define RDST %r11
#define RXORd %r9d
#define RXORbl %r9b
#define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \
movzbl ab ## bl, tmp2 ## d; \
movzbl ab ## bh, tmp1 ## d; \
rorq $16, ab; \
xorq T0(, tmp2, 8), dst; \
xorq T1(, tmp1, 8), dst;
/**********************************************************************
1-way camellia
**********************************************************************/
#define roundsm(ab, subkey, cd) \
movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \
\
xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
\
xorq RT2, cd ## 0;
#define fls(l, r, kl, kr) \
movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \
andl l ## 0d, RT0d; \
roll $1, RT0d; \
shlq $32, RT0; \
xorq RT0, l ## 0; \
movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \
orq r ## 0, RT1; \
shrq $32, RT1; \
xorq RT1, r ## 0; \
\
movq (key_table + ((kl) * 2) * 4)(CTX), RT2; \
orq l ## 0, RT2; \
shrq $32, RT2; \
xorq RT2, l ## 0; \
movl (key_table + ((kr) * 2) * 4)(CTX), RT0d; \
andl r ## 0d, RT0d; \
roll $1, RT0d; \
shlq $32, RT0; \
xorq RT0, r ## 0;
#define enc_rounds(i) \
roundsm(RAB, i + 2, RCD); \
roundsm(RCD, i + 3, RAB); \
roundsm(RAB, i + 4, RCD); \
roundsm(RCD, i + 5, RAB); \
roundsm(RAB, i + 6, RCD); \
roundsm(RCD, i + 7, RAB);
#define enc_fls(i) \
fls(RAB, RCD, i + 0, i + 1);
#define enc_inpack() \
movq (RIO), RAB0; \
bswapq RAB0; \
rolq $32, RAB0; \
movq 4*2(RIO), RCD0; \
bswapq RCD0; \
rorq $32, RCD0; \
xorq key_table(CTX), RAB0;
#define enc_outunpack(op, max) \
xorq key_table(CTX, max, 8), RCD0; \
rorq $32, RCD0; \
bswapq RCD0; \
op ## q RCD0, (RIO); \
rolq $32, RAB0; \
bswapq RAB0; \
op ## q RAB0, 4*2(RIO);
#define dec_rounds(i) \
roundsm(RAB, i + 7, RCD); \
roundsm(RCD, i + 6, RAB); \
roundsm(RAB, i + 5, RCD); \
roundsm(RCD, i + 4, RAB); \
roundsm(RAB, i + 3, RCD); \
roundsm(RCD, i + 2, RAB);
#define dec_fls(i) \
fls(RAB, RCD, i + 1, i + 0);
#define dec_inpack(max) \
movq (RIO), RAB0; \
bswapq RAB0; \
rolq $32, RAB0; \
movq 4*2(RIO), RCD0; \
bswapq RCD0; \
rorq $32, RCD0; \
xorq key_table(CTX, max, 8), RAB0;
#define dec_outunpack() \
xorq key_table(CTX), RCD0; \
rorq $32, RCD0; \
bswapq RCD0; \
movq RCD0, (RIO); \
rolq $32, RAB0; \
bswapq RAB0; \
movq RAB0, 4*2(RIO);
ENTRY(__camellia_enc_blk)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: bool xor
*/
movq %rbp, RRBP;
movq %rcx, RXOR;
movq %rsi, RDST;
movq %rdx, RIO;
enc_inpack();
enc_rounds(0);
enc_fls(8);
enc_rounds(8);
enc_fls(16);
enc_rounds(16);
movl $24, RT1d; /* max */
cmpb $16, key_length(CTX);
je .L__enc_done;
enc_fls(24);
enc_rounds(24);
movl $32, RT1d; /* max */
.L__enc_done:
testb RXORbl, RXORbl;
movq RDST, RIO;
jnz .L__enc_xor;
enc_outunpack(mov, RT1);
movq RRBP, %rbp;
ret;
.L__enc_xor:
enc_outunpack(xor, RT1);
movq RRBP, %rbp;
ret;
ENDPROC(__camellia_enc_blk)
ENTRY(camellia_dec_blk)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
cmpl $16, key_length(CTX);
movl $32, RT2d;
movl $24, RXORd;
cmovel RXORd, RT2d; /* max */
movq %rbp, RRBP;
movq %rsi, RDST;
movq %rdx, RIO;
dec_inpack(RT2);
cmpb $24, RT2bl;
je .L__dec_rounds16;
dec_rounds(24);
dec_fls(24);
.L__dec_rounds16:
dec_rounds(16);
dec_fls(16);
dec_rounds(8);
dec_fls(8);
dec_rounds(0);
movq RDST, RIO;
dec_outunpack();
movq RRBP, %rbp;
ret;
ENDPROC(camellia_dec_blk)
/**********************************************************************
2-way camellia
**********************************************************************/
#define roundsm2(ab, subkey, cd) \
movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \
xorq RT2, cd ## 1; \
\
xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
\
xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 1, cd ## 1); \
xorq RT2, cd ## 0; \
xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 1, cd ## 1); \
xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 1, cd ## 1); \
xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 1, cd ## 1);
#define fls2(l, r, kl, kr) \
movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \
andl l ## 0d, RT0d; \
roll $1, RT0d; \
shlq $32, RT0; \
xorq RT0, l ## 0; \
movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \
orq r ## 0, RT1; \
shrq $32, RT1; \
xorq RT1, r ## 0; \
\
movl (key_table + ((kl) * 2) * 4)(CTX), RT2d; \
andl l ## 1d, RT2d; \
roll $1, RT2d; \
shlq $32, RT2; \
xorq RT2, l ## 1; \
movq (key_table + ((kr) * 2) * 4)(CTX), RT0; \
orq r ## 1, RT0; \
shrq $32, RT0; \
xorq RT0, r ## 1; \
\
movq (key_table + ((kl) * 2) * 4)(CTX), RT1; \
orq l ## 0, RT1; \
shrq $32, RT1; \
xorq RT1, l ## 0; \
movl (key_table + ((kr) * 2) * 4)(CTX), RT2d; \
andl r ## 0d, RT2d; \
roll $1, RT2d; \
shlq $32, RT2; \
xorq RT2, r ## 0; \
\
movq (key_table + ((kl) * 2) * 4)(CTX), RT0; \
orq l ## 1, RT0; \
shrq $32, RT0; \
xorq RT0, l ## 1; \
movl (key_table + ((kr) * 2) * 4)(CTX), RT1d; \
andl r ## 1d, RT1d; \
roll $1, RT1d; \
shlq $32, RT1; \
xorq RT1, r ## 1;
#define enc_rounds2(i) \
roundsm2(RAB, i + 2, RCD); \
roundsm2(RCD, i + 3, RAB); \
roundsm2(RAB, i + 4, RCD); \
roundsm2(RCD, i + 5, RAB); \
roundsm2(RAB, i + 6, RCD); \
roundsm2(RCD, i + 7, RAB);
#define enc_fls2(i) \
fls2(RAB, RCD, i + 0, i + 1);
#define enc_inpack2() \
movq (RIO), RAB0; \
bswapq RAB0; \
rorq $32, RAB0; \
movq 4*2(RIO), RCD0; \
bswapq RCD0; \
rolq $32, RCD0; \
xorq key_table(CTX), RAB0; \
\
movq 8*2(RIO), RAB1; \
bswapq RAB1; \
rorq $32, RAB1; \
movq 12*2(RIO), RCD1; \
bswapq RCD1; \
rolq $32, RCD1; \
xorq key_table(CTX), RAB1;
#define enc_outunpack2(op, max) \
xorq key_table(CTX, max, 8), RCD0; \
rolq $32, RCD0; \
bswapq RCD0; \
op ## q RCD0, (RIO); \
rorq $32, RAB0; \
bswapq RAB0; \
op ## q RAB0, 4*2(RIO); \
\
xorq key_table(CTX, max, 8), RCD1; \
rolq $32, RCD1; \
bswapq RCD1; \
op ## q RCD1, 8*2(RIO); \
rorq $32, RAB1; \
bswapq RAB1; \
op ## q RAB1, 12*2(RIO);
#define dec_rounds2(i) \
roundsm2(RAB, i + 7, RCD); \
roundsm2(RCD, i + 6, RAB); \
roundsm2(RAB, i + 5, RCD); \
roundsm2(RCD, i + 4, RAB); \
roundsm2(RAB, i + 3, RCD); \
roundsm2(RCD, i + 2, RAB);
#define dec_fls2(i) \
fls2(RAB, RCD, i + 1, i + 0);
#define dec_inpack2(max) \
movq (RIO), RAB0; \
bswapq RAB0; \
rorq $32, RAB0; \
movq 4*2(RIO), RCD0; \
bswapq RCD0; \
rolq $32, RCD0; \
xorq key_table(CTX, max, 8), RAB0; \
\
movq 8*2(RIO), RAB1; \
bswapq RAB1; \
rorq $32, RAB1; \
movq 12*2(RIO), RCD1; \
bswapq RCD1; \
rolq $32, RCD1; \
xorq key_table(CTX, max, 8), RAB1;
#define dec_outunpack2() \
xorq key_table(CTX), RCD0; \
rolq $32, RCD0; \
bswapq RCD0; \
movq RCD0, (RIO); \
rorq $32, RAB0; \
bswapq RAB0; \
movq RAB0, 4*2(RIO); \
\
xorq key_table(CTX), RCD1; \
rolq $32, RCD1; \
bswapq RCD1; \
movq RCD1, 8*2(RIO); \
rorq $32, RAB1; \
bswapq RAB1; \
movq RAB1, 12*2(RIO);
ENTRY(__camellia_enc_blk_2way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: bool xor
*/
pushq %rbx;
movq %rbp, RRBP;
movq %rcx, RXOR;
movq %rsi, RDST;
movq %rdx, RIO;
enc_inpack2();
enc_rounds2(0);
enc_fls2(8);
enc_rounds2(8);
enc_fls2(16);
enc_rounds2(16);
movl $24, RT2d; /* max */
cmpb $16, key_length(CTX);
je .L__enc2_done;
enc_fls2(24);
enc_rounds2(24);
movl $32, RT2d; /* max */
.L__enc2_done:
test RXORbl, RXORbl;
movq RDST, RIO;
jnz .L__enc2_xor;
enc_outunpack2(mov, RT2);
movq RRBP, %rbp;
popq %rbx;
ret;
.L__enc2_xor:
enc_outunpack2(xor, RT2);
movq RRBP, %rbp;
popq %rbx;
ret;
ENDPROC(__camellia_enc_blk_2way)
ENTRY(camellia_dec_blk_2way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
cmpl $16, key_length(CTX);
movl $32, RT2d;
movl $24, RXORd;
cmovel RXORd, RT2d; /* max */
movq %rbx, RXOR;
movq %rbp, RRBP;
movq %rsi, RDST;
movq %rdx, RIO;
dec_inpack2(RT2);
cmpb $24, RT2bl;
je .L__dec2_rounds16;
dec_rounds2(24);
dec_fls2(24);
.L__dec2_rounds16:
dec_rounds2(16);
dec_fls2(16);
dec_rounds2(8);
dec_fls2(8);
dec_rounds2(0);
movq RDST, RIO;
dec_outunpack2();
movq RRBP, %rbp;
movq RXOR, %rbx;
ret;
ENDPROC(camellia_dec_blk_2way)

View file

@ -0,0 +1,586 @@
/*
* Glue Code for x86_64/AVX2/AES-NI assembler optimized version of Camellia
*
* Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
#include <crypto/ablk_helper.h>
#include <crypto/algapi.h>
#include <crypto/ctr.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
#include <asm/xcr.h>
#include <asm/xsave.h>
#include <asm/crypto/camellia.h>
#include <asm/crypto/glue_helper.h>
#define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
#define CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS 32
/* 32-way AVX2/AES-NI parallel cipher functions */
asmlinkage void camellia_ecb_enc_32way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void camellia_ecb_dec_32way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void camellia_cbc_dec_32way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void camellia_ctr_32way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
asmlinkage void camellia_xts_enc_32way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
asmlinkage void camellia_xts_dec_32way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
static const struct common_glue_ctx camellia_enc = {
.num_funcs = 4,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_32way) }
}, {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_16way) }
}, {
.num_blocks = 2,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) }
} }
};
static const struct common_glue_ctx camellia_ctr = {
.num_funcs = 4,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_32way) }
}, {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_16way) }
}, {
.num_blocks = 2,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) }
}, {
.num_blocks = 1,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) }
} }
};
static const struct common_glue_ctx camellia_enc_xts = {
.num_funcs = 3,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_32way) }
}, {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_16way) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc) }
} }
};
static const struct common_glue_ctx camellia_dec = {
.num_funcs = 4,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_32way) }
}, {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_16way) }
}, {
.num_blocks = 2,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) }
} }
};
static const struct common_glue_ctx camellia_dec_cbc = {
.num_funcs = 4,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_32way) }
}, {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_16way) }
}, {
.num_blocks = 2,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) }
}, {
.num_blocks = 1,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) }
} }
};
static const struct common_glue_ctx camellia_dec_xts = {
.num_funcs = 3,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_32way) }
}, {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_16way) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec) }
} }
};
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes);
}
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes);
}
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc,
dst, src, nbytes);
}
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src,
nbytes);
}
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes);
}
static inline bool camellia_fpu_begin(bool fpu_enabled, unsigned int nbytes)
{
return glue_fpu_begin(CAMELLIA_BLOCK_SIZE,
CAMELLIA_AESNI_PARALLEL_BLOCKS, NULL, fpu_enabled,
nbytes);
}
static inline void camellia_fpu_end(bool fpu_enabled)
{
glue_fpu_end(fpu_enabled);
}
static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
unsigned int key_len)
{
return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len,
&tfm->crt_flags);
}
struct crypt_priv {
struct camellia_ctx *ctx;
bool fpu_enabled;
};
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
struct crypt_priv *ctx = priv;
int i;
ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes >= CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS * bsize) {
camellia_ecb_enc_32way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
nbytes -= bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
}
if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
}
while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
}
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
camellia_enc_blk(ctx->ctx, srcdst, srcdst);
}
static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
struct crypt_priv *ctx = priv;
int i;
ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes >= CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS * bsize) {
camellia_ecb_dec_32way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
nbytes -= bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
}
if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
}
while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
}
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
camellia_dec_blk(ctx->ctx, srcdst, srcdst);
}
static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->camellia_ctx,
.fpu_enabled = false,
};
struct lrw_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.table_ctx = &ctx->lrw_table,
.crypt_ctx = &crypt_ctx,
.crypt_fn = encrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = lrw_crypt(desc, dst, src, nbytes, &req);
camellia_fpu_end(crypt_ctx.fpu_enabled);
return ret;
}
static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->camellia_ctx,
.fpu_enabled = false,
};
struct lrw_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.table_ctx = &ctx->lrw_table,
.crypt_ctx = &crypt_ctx,
.crypt_fn = decrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = lrw_crypt(desc, dst, src, nbytes, &req);
camellia_fpu_end(crypt_ctx.fpu_enabled);
return ret;
}
static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
return glue_xts_crypt_128bit(&camellia_enc_xts, desc, dst, src, nbytes,
XTS_TWEAK_CAST(camellia_enc_blk),
&ctx->tweak_ctx, &ctx->crypt_ctx);
}
static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
return glue_xts_crypt_128bit(&camellia_dec_xts, desc, dst, src, nbytes,
XTS_TWEAK_CAST(camellia_enc_blk),
&ctx->tweak_ctx, &ctx->crypt_ctx);
}
static struct crypto_alg cmll_algs[10] = { {
.cra_name = "__ecb-camellia-aesni-avx2",
.cra_driver_name = "__driver-ecb-camellia-aesni-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.setkey = camellia_setkey,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
},
},
}, {
.cra_name = "__cbc-camellia-aesni-avx2",
.cra_driver_name = "__driver-cbc-camellia-aesni-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.setkey = camellia_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
},
},
}, {
.cra_name = "__ctr-camellia-aesni-avx2",
.cra_driver_name = "__driver-ctr-camellia-aesni-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct camellia_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = camellia_setkey,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
},
},
}, {
.cra_name = "__lrw-camellia-aesni-avx2",
.cra_driver_name = "__driver-lrw-camellia-aesni-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_lrw_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_exit = lrw_camellia_exit_tfm,
.cra_u = {
.blkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE +
CAMELLIA_BLOCK_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE +
CAMELLIA_BLOCK_SIZE,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = lrw_camellia_setkey,
.encrypt = lrw_encrypt,
.decrypt = lrw_decrypt,
},
},
}, {
.cra_name = "__xts-camellia-aesni-avx2",
.cra_driver_name = "__driver-xts-camellia-aesni-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_xts_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE * 2,
.max_keysize = CAMELLIA_MAX_KEY_SIZE * 2,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = xts_camellia_setkey,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
},
},
}, {
.cra_name = "ecb(camellia)",
.cra_driver_name = "ecb-camellia-aesni-avx2",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "cbc(camellia)",
.cra_driver_name = "cbc-camellia-aesni-avx2",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = __ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "ctr(camellia)",
.cra_driver_name = "ctr-camellia-aesni-avx2",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_encrypt,
.geniv = "chainiv",
},
},
}, {
.cra_name = "lrw(camellia)",
.cra_driver_name = "lrw-camellia-aesni-avx2",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE +
CAMELLIA_BLOCK_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE +
CAMELLIA_BLOCK_SIZE,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "xts(camellia)",
.cra_driver_name = "xts-camellia-aesni-avx2",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE * 2,
.max_keysize = CAMELLIA_MAX_KEY_SIZE * 2,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
} };
static int __init camellia_aesni_init(void)
{
u64 xcr0;
if (!cpu_has_avx2 || !cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
pr_info("AVX2 or AES-NI instructions are not detected.\n");
return -ENODEV;
}
xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
pr_info("AVX2 detected but unusable.\n");
return -ENODEV;
}
return crypto_register_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
}
static void __exit camellia_aesni_fini(void)
{
crypto_unregister_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
}
module_init(camellia_aesni_init);
module_exit(camellia_aesni_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Camellia Cipher Algorithm, AES-NI/AVX2 optimized");
MODULE_ALIAS_CRYPTO("camellia");
MODULE_ALIAS_CRYPTO("camellia-asm");

View file

@ -0,0 +1,578 @@
/*
* Glue Code for x86_64/AVX/AES-NI assembler optimized version of Camellia
*
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
#include <crypto/ablk_helper.h>
#include <crypto/algapi.h>
#include <crypto/ctr.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
#include <asm/xcr.h>
#include <asm/xsave.h>
#include <asm/crypto/camellia.h>
#include <asm/crypto/glue_helper.h>
#define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
/* 16-way parallel cipher functions (avx/aes-ni) */
asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(camellia_ecb_enc_16way);
asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(camellia_ecb_dec_16way);
asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(camellia_cbc_dec_16way);
asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
EXPORT_SYMBOL_GPL(camellia_ctr_16way);
asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
EXPORT_SYMBOL_GPL(camellia_xts_enc_16way);
asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
EXPORT_SYMBOL_GPL(camellia_xts_dec_16way);
void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
GLUE_FUNC_CAST(camellia_enc_blk));
}
EXPORT_SYMBOL_GPL(camellia_xts_enc);
void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
GLUE_FUNC_CAST(camellia_dec_blk));
}
EXPORT_SYMBOL_GPL(camellia_xts_dec);
static const struct common_glue_ctx camellia_enc = {
.num_funcs = 3,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_16way) }
}, {
.num_blocks = 2,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) }
} }
};
static const struct common_glue_ctx camellia_ctr = {
.num_funcs = 3,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_16way) }
}, {
.num_blocks = 2,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) }
}, {
.num_blocks = 1,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) }
} }
};
static const struct common_glue_ctx camellia_enc_xts = {
.num_funcs = 2,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_16way) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc) }
} }
};
static const struct common_glue_ctx camellia_dec = {
.num_funcs = 3,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_16way) }
}, {
.num_blocks = 2,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) }
} }
};
static const struct common_glue_ctx camellia_dec_cbc = {
.num_funcs = 3,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_16way) }
}, {
.num_blocks = 2,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) }
}, {
.num_blocks = 1,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) }
} }
};
static const struct common_glue_ctx camellia_dec_xts = {
.num_funcs = 2,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_16way) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec) }
} }
};
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes);
}
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes);
}
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc,
dst, src, nbytes);
}
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src,
nbytes);
}
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes);
}
static inline bool camellia_fpu_begin(bool fpu_enabled, unsigned int nbytes)
{
return glue_fpu_begin(CAMELLIA_BLOCK_SIZE,
CAMELLIA_AESNI_PARALLEL_BLOCKS, NULL, fpu_enabled,
nbytes);
}
static inline void camellia_fpu_end(bool fpu_enabled)
{
glue_fpu_end(fpu_enabled);
}
static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
unsigned int key_len)
{
return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len,
&tfm->crt_flags);
}
struct crypt_priv {
struct camellia_ctx *ctx;
bool fpu_enabled;
};
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
struct crypt_priv *ctx = priv;
int i;
ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
}
while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
}
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
camellia_enc_blk(ctx->ctx, srcdst, srcdst);
}
static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
struct crypt_priv *ctx = priv;
int i;
ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
}
while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
}
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
camellia_dec_blk(ctx->ctx, srcdst, srcdst);
}
static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->camellia_ctx,
.fpu_enabled = false,
};
struct lrw_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.table_ctx = &ctx->lrw_table,
.crypt_ctx = &crypt_ctx,
.crypt_fn = encrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = lrw_crypt(desc, dst, src, nbytes, &req);
camellia_fpu_end(crypt_ctx.fpu_enabled);
return ret;
}
static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->camellia_ctx,
.fpu_enabled = false,
};
struct lrw_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.table_ctx = &ctx->lrw_table,
.crypt_ctx = &crypt_ctx,
.crypt_fn = decrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = lrw_crypt(desc, dst, src, nbytes, &req);
camellia_fpu_end(crypt_ctx.fpu_enabled);
return ret;
}
static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
return glue_xts_crypt_128bit(&camellia_enc_xts, desc, dst, src, nbytes,
XTS_TWEAK_CAST(camellia_enc_blk),
&ctx->tweak_ctx, &ctx->crypt_ctx);
}
static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
return glue_xts_crypt_128bit(&camellia_dec_xts, desc, dst, src, nbytes,
XTS_TWEAK_CAST(camellia_enc_blk),
&ctx->tweak_ctx, &ctx->crypt_ctx);
}
static struct crypto_alg cmll_algs[10] = { {
.cra_name = "__ecb-camellia-aesni",
.cra_driver_name = "__driver-ecb-camellia-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.setkey = camellia_setkey,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
},
},
}, {
.cra_name = "__cbc-camellia-aesni",
.cra_driver_name = "__driver-cbc-camellia-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.setkey = camellia_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
},
},
}, {
.cra_name = "__ctr-camellia-aesni",
.cra_driver_name = "__driver-ctr-camellia-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct camellia_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = camellia_setkey,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
},
},
}, {
.cra_name = "__lrw-camellia-aesni",
.cra_driver_name = "__driver-lrw-camellia-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_lrw_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_exit = lrw_camellia_exit_tfm,
.cra_u = {
.blkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE +
CAMELLIA_BLOCK_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE +
CAMELLIA_BLOCK_SIZE,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = lrw_camellia_setkey,
.encrypt = lrw_encrypt,
.decrypt = lrw_decrypt,
},
},
}, {
.cra_name = "__xts-camellia-aesni",
.cra_driver_name = "__driver-xts-camellia-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_xts_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE * 2,
.max_keysize = CAMELLIA_MAX_KEY_SIZE * 2,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = xts_camellia_setkey,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
},
},
}, {
.cra_name = "ecb(camellia)",
.cra_driver_name = "ecb-camellia-aesni",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "cbc(camellia)",
.cra_driver_name = "cbc-camellia-aesni",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = __ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "ctr(camellia)",
.cra_driver_name = "ctr-camellia-aesni",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_encrypt,
.geniv = "chainiv",
},
},
}, {
.cra_name = "lrw(camellia)",
.cra_driver_name = "lrw-camellia-aesni",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE +
CAMELLIA_BLOCK_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE +
CAMELLIA_BLOCK_SIZE,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "xts(camellia)",
.cra_driver_name = "xts-camellia-aesni",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE * 2,
.max_keysize = CAMELLIA_MAX_KEY_SIZE * 2,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
} };
static int __init camellia_aesni_init(void)
{
u64 xcr0;
if (!cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
pr_info("AVX or AES-NI instructions are not detected.\n");
return -ENODEV;
}
xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
pr_info("AVX detected but unusable.\n");
return -ENODEV;
}
return crypto_register_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
}
static void __exit camellia_aesni_fini(void)
{
crypto_unregister_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
}
module_init(camellia_aesni_init);
module_exit(camellia_aesni_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Camellia Cipher Algorithm, AES-NI/AVX optimized");
MODULE_ALIAS_CRYPTO("camellia");
MODULE_ALIAS_CRYPTO("camellia-asm");

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,546 @@
/*
* Cast5 Cipher 16-way parallel algorithm (AVX/x86_64)
*
* Copyright (C) 2012 Johannes Goetzfried
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
*
* Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
#include <linux/linkage.h>
.file "cast5-avx-x86_64-asm_64.S"
.extern cast_s1
.extern cast_s2
.extern cast_s3
.extern cast_s4
/* structure of crypto context */
#define km 0
#define kr (16*4)
#define rr ((16*4)+16)
/* s-boxes */
#define s1 cast_s1
#define s2 cast_s2
#define s3 cast_s3
#define s4 cast_s4
/**********************************************************************
16-way AVX cast5
**********************************************************************/
#define CTX %rdi
#define RL1 %xmm0
#define RR1 %xmm1
#define RL2 %xmm2
#define RR2 %xmm3
#define RL3 %xmm4
#define RR3 %xmm5
#define RL4 %xmm6
#define RR4 %xmm7
#define RX %xmm8
#define RKM %xmm9
#define RKR %xmm10
#define RKRF %xmm11
#define RKRR %xmm12
#define R32 %xmm13
#define R1ST %xmm14
#define RTMP %xmm15
#define RID1 %rbp
#define RID1d %ebp
#define RID2 %rsi
#define RID2d %esi
#define RGI1 %rdx
#define RGI1bl %dl
#define RGI1bh %dh
#define RGI2 %rcx
#define RGI2bl %cl
#define RGI2bh %ch
#define RGI3 %rax
#define RGI3bl %al
#define RGI3bh %ah
#define RGI4 %rbx
#define RGI4bl %bl
#define RGI4bh %bh
#define RFS1 %r8
#define RFS1d %r8d
#define RFS2 %r9
#define RFS2d %r9d
#define RFS3 %r10
#define RFS3d %r10d
#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \
movzbl src ## bh, RID1d; \
movzbl src ## bl, RID2d; \
shrq $16, src; \
movl s1(, RID1, 4), dst ## d; \
op1 s2(, RID2, 4), dst ## d; \
movzbl src ## bh, RID1d; \
movzbl src ## bl, RID2d; \
interleave_op(il_reg); \
op2 s3(, RID1, 4), dst ## d; \
op3 s4(, RID2, 4), dst ## d;
#define dummy(d) /* do nothing */
#define shr_next(reg) \
shrq $16, reg;
#define F_head(a, x, gi1, gi2, op0) \
op0 a, RKM, x; \
vpslld RKRF, x, RTMP; \
vpsrld RKRR, x, x; \
vpor RTMP, x, x; \
\
vmovq x, gi1; \
vpextrq $1, x, gi2;
#define F_tail(a, x, gi1, gi2, op1, op2, op3) \
lookup_32bit(##gi1, RFS1, op1, op2, op3, shr_next, ##gi1); \
lookup_32bit(##gi2, RFS3, op1, op2, op3, shr_next, ##gi2); \
\
lookup_32bit(##gi1, RFS2, op1, op2, op3, dummy, none); \
shlq $32, RFS2; \
orq RFS1, RFS2; \
lookup_32bit(##gi2, RFS1, op1, op2, op3, dummy, none); \
shlq $32, RFS1; \
orq RFS1, RFS3; \
\
vmovq RFS2, x; \
vpinsrq $1, RFS3, x, x;
#define F_2(a1, b1, a2, b2, op0, op1, op2, op3) \
F_head(b1, RX, RGI1, RGI2, op0); \
F_head(b2, RX, RGI3, RGI4, op0); \
\
F_tail(b1, RX, RGI1, RGI2, op1, op2, op3); \
F_tail(b2, RTMP, RGI3, RGI4, op1, op2, op3); \
\
vpxor a1, RX, a1; \
vpxor a2, RTMP, a2;
#define F1_2(a1, b1, a2, b2) \
F_2(a1, b1, a2, b2, vpaddd, xorl, subl, addl)
#define F2_2(a1, b1, a2, b2) \
F_2(a1, b1, a2, b2, vpxor, subl, addl, xorl)
#define F3_2(a1, b1, a2, b2) \
F_2(a1, b1, a2, b2, vpsubd, addl, xorl, subl)
#define subround(a1, b1, a2, b2, f) \
F ## f ## _2(a1, b1, a2, b2);
#define round(l, r, n, f) \
vbroadcastss (km+(4*n))(CTX), RKM; \
vpand R1ST, RKR, RKRF; \
vpsubq RKRF, R32, RKRR; \
vpsrldq $1, RKR, RKR; \
subround(l ## 1, r ## 1, l ## 2, r ## 2, f); \
subround(l ## 3, r ## 3, l ## 4, r ## 4, f);
#define enc_preload_rkr() \
vbroadcastss .L16_mask, RKR; \
/* add 16-bit rotation to key rotations (mod 32) */ \
vpxor kr(CTX), RKR, RKR;
#define dec_preload_rkr() \
vbroadcastss .L16_mask, RKR; \
/* add 16-bit rotation to key rotations (mod 32) */ \
vpxor kr(CTX), RKR, RKR; \
vpshufb .Lbswap128_mask, RKR, RKR;
#define transpose_2x4(x0, x1, t0, t1) \
vpunpckldq x1, x0, t0; \
vpunpckhdq x1, x0, t1; \
\
vpunpcklqdq t1, t0, x0; \
vpunpckhqdq t1, t0, x1;
#define inpack_blocks(x0, x1, t0, t1, rmask) \
vpshufb rmask, x0, x0; \
vpshufb rmask, x1, x1; \
\
transpose_2x4(x0, x1, t0, t1)
#define outunpack_blocks(x0, x1, t0, t1, rmask) \
transpose_2x4(x0, x1, t0, t1) \
\
vpshufb rmask, x0, x0; \
vpshufb rmask, x1, x1;
.data
.align 16
.Lbswap_mask:
.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
.Lbswap_iv_mask:
.byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0
.L16_mask:
.byte 16, 16, 16, 16
.L32_mask:
.byte 32, 0, 0, 0
.Lfirst_mask:
.byte 0x1f, 0, 0, 0
.text
.align 16
__cast5_enc_blk16:
/* input:
* %rdi: ctx, CTX
* RL1: blocks 1 and 2
* RR1: blocks 3 and 4
* RL2: blocks 5 and 6
* RR2: blocks 7 and 8
* RL3: blocks 9 and 10
* RR3: blocks 11 and 12
* RL4: blocks 13 and 14
* RR4: blocks 15 and 16
* output:
* RL1: encrypted blocks 1 and 2
* RR1: encrypted blocks 3 and 4
* RL2: encrypted blocks 5 and 6
* RR2: encrypted blocks 7 and 8
* RL3: encrypted blocks 9 and 10
* RR3: encrypted blocks 11 and 12
* RL4: encrypted blocks 13 and 14
* RR4: encrypted blocks 15 and 16
*/
pushq %rbp;
pushq %rbx;
vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32;
enc_preload_rkr();
inpack_blocks(RL1, RR1, RTMP, RX, RKM);
inpack_blocks(RL2, RR2, RTMP, RX, RKM);
inpack_blocks(RL3, RR3, RTMP, RX, RKM);
inpack_blocks(RL4, RR4, RTMP, RX, RKM);
round(RL, RR, 0, 1);
round(RR, RL, 1, 2);
round(RL, RR, 2, 3);
round(RR, RL, 3, 1);
round(RL, RR, 4, 2);
round(RR, RL, 5, 3);
round(RL, RR, 6, 1);
round(RR, RL, 7, 2);
round(RL, RR, 8, 3);
round(RR, RL, 9, 1);
round(RL, RR, 10, 2);
round(RR, RL, 11, 3);
movzbl rr(CTX), %eax;
testl %eax, %eax;
jnz .L__skip_enc;
round(RL, RR, 12, 1);
round(RR, RL, 13, 2);
round(RL, RR, 14, 3);
round(RR, RL, 15, 1);
.L__skip_enc:
popq %rbx;
popq %rbp;
vmovdqa .Lbswap_mask, RKM;
outunpack_blocks(RR1, RL1, RTMP, RX, RKM);
outunpack_blocks(RR2, RL2, RTMP, RX, RKM);
outunpack_blocks(RR3, RL3, RTMP, RX, RKM);
outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
ret;
ENDPROC(__cast5_enc_blk16)
.align 16
__cast5_dec_blk16:
/* input:
* %rdi: ctx, CTX
* RL1: encrypted blocks 1 and 2
* RR1: encrypted blocks 3 and 4
* RL2: encrypted blocks 5 and 6
* RR2: encrypted blocks 7 and 8
* RL3: encrypted blocks 9 and 10
* RR3: encrypted blocks 11 and 12
* RL4: encrypted blocks 13 and 14
* RR4: encrypted blocks 15 and 16
* output:
* RL1: decrypted blocks 1 and 2
* RR1: decrypted blocks 3 and 4
* RL2: decrypted blocks 5 and 6
* RR2: decrypted blocks 7 and 8
* RL3: decrypted blocks 9 and 10
* RR3: decrypted blocks 11 and 12
* RL4: decrypted blocks 13 and 14
* RR4: decrypted blocks 15 and 16
*/
pushq %rbp;
pushq %rbx;
vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32;
dec_preload_rkr();
inpack_blocks(RL1, RR1, RTMP, RX, RKM);
inpack_blocks(RL2, RR2, RTMP, RX, RKM);
inpack_blocks(RL3, RR3, RTMP, RX, RKM);
inpack_blocks(RL4, RR4, RTMP, RX, RKM);
movzbl rr(CTX), %eax;
testl %eax, %eax;
jnz .L__skip_dec;
round(RL, RR, 15, 1);
round(RR, RL, 14, 3);
round(RL, RR, 13, 2);
round(RR, RL, 12, 1);
.L__dec_tail:
round(RL, RR, 11, 3);
round(RR, RL, 10, 2);
round(RL, RR, 9, 1);
round(RR, RL, 8, 3);
round(RL, RR, 7, 2);
round(RR, RL, 6, 1);
round(RL, RR, 5, 3);
round(RR, RL, 4, 2);
round(RL, RR, 3, 1);
round(RR, RL, 2, 3);
round(RL, RR, 1, 2);
round(RR, RL, 0, 1);
vmovdqa .Lbswap_mask, RKM;
popq %rbx;
popq %rbp;
outunpack_blocks(RR1, RL1, RTMP, RX, RKM);
outunpack_blocks(RR2, RL2, RTMP, RX, RKM);
outunpack_blocks(RR3, RL3, RTMP, RX, RKM);
outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
ret;
.L__skip_dec:
vpsrldq $4, RKR, RKR;
jmp .L__dec_tail;
ENDPROC(__cast5_dec_blk16)
ENTRY(cast5_ecb_enc_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
movq %rsi, %r11;
vmovdqu (0*4*4)(%rdx), RL1;
vmovdqu (1*4*4)(%rdx), RR1;
vmovdqu (2*4*4)(%rdx), RL2;
vmovdqu (3*4*4)(%rdx), RR2;
vmovdqu (4*4*4)(%rdx), RL3;
vmovdqu (5*4*4)(%rdx), RR3;
vmovdqu (6*4*4)(%rdx), RL4;
vmovdqu (7*4*4)(%rdx), RR4;
call __cast5_enc_blk16;
vmovdqu RR1, (0*4*4)(%r11);
vmovdqu RL1, (1*4*4)(%r11);
vmovdqu RR2, (2*4*4)(%r11);
vmovdqu RL2, (3*4*4)(%r11);
vmovdqu RR3, (4*4*4)(%r11);
vmovdqu RL3, (5*4*4)(%r11);
vmovdqu RR4, (6*4*4)(%r11);
vmovdqu RL4, (7*4*4)(%r11);
ret;
ENDPROC(cast5_ecb_enc_16way)
ENTRY(cast5_ecb_dec_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
movq %rsi, %r11;
vmovdqu (0*4*4)(%rdx), RL1;
vmovdqu (1*4*4)(%rdx), RR1;
vmovdqu (2*4*4)(%rdx), RL2;
vmovdqu (3*4*4)(%rdx), RR2;
vmovdqu (4*4*4)(%rdx), RL3;
vmovdqu (5*4*4)(%rdx), RR3;
vmovdqu (6*4*4)(%rdx), RL4;
vmovdqu (7*4*4)(%rdx), RR4;
call __cast5_dec_blk16;
vmovdqu RR1, (0*4*4)(%r11);
vmovdqu RL1, (1*4*4)(%r11);
vmovdqu RR2, (2*4*4)(%r11);
vmovdqu RL2, (3*4*4)(%r11);
vmovdqu RR3, (4*4*4)(%r11);
vmovdqu RL3, (5*4*4)(%r11);
vmovdqu RR4, (6*4*4)(%r11);
vmovdqu RL4, (7*4*4)(%r11);
ret;
ENDPROC(cast5_ecb_dec_16way)
ENTRY(cast5_cbc_dec_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
pushq %r12;
movq %rsi, %r11;
movq %rdx, %r12;
vmovdqu (0*16)(%rdx), RL1;
vmovdqu (1*16)(%rdx), RR1;
vmovdqu (2*16)(%rdx), RL2;
vmovdqu (3*16)(%rdx), RR2;
vmovdqu (4*16)(%rdx), RL3;
vmovdqu (5*16)(%rdx), RR3;
vmovdqu (6*16)(%rdx), RL4;
vmovdqu (7*16)(%rdx), RR4;
call __cast5_dec_blk16;
/* xor with src */
vmovq (%r12), RX;
vpshufd $0x4f, RX, RX;
vpxor RX, RR1, RR1;
vpxor 0*16+8(%r12), RL1, RL1;
vpxor 1*16+8(%r12), RR2, RR2;
vpxor 2*16+8(%r12), RL2, RL2;
vpxor 3*16+8(%r12), RR3, RR3;
vpxor 4*16+8(%r12), RL3, RL3;
vpxor 5*16+8(%r12), RR4, RR4;
vpxor 6*16+8(%r12), RL4, RL4;
vmovdqu RR1, (0*16)(%r11);
vmovdqu RL1, (1*16)(%r11);
vmovdqu RR2, (2*16)(%r11);
vmovdqu RL2, (3*16)(%r11);
vmovdqu RR3, (4*16)(%r11);
vmovdqu RL3, (5*16)(%r11);
vmovdqu RR4, (6*16)(%r11);
vmovdqu RL4, (7*16)(%r11);
popq %r12;
ret;
ENDPROC(cast5_cbc_dec_16way)
ENTRY(cast5_ctr_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: iv (big endian, 64bit)
*/
pushq %r12;
movq %rsi, %r11;
movq %rdx, %r12;
vpcmpeqd RTMP, RTMP, RTMP;
vpsrldq $8, RTMP, RTMP; /* low: -1, high: 0 */
vpcmpeqd RKR, RKR, RKR;
vpaddq RKR, RKR, RKR; /* low: -2, high: -2 */
vmovdqa .Lbswap_iv_mask, R1ST;
vmovdqa .Lbswap128_mask, RKM;
/* load IV and byteswap */
vmovq (%rcx), RX;
vpshufb R1ST, RX, RX;
/* construct IVs */
vpsubq RTMP, RX, RX; /* le: IV1, IV0 */
vpshufb RKM, RX, RL1; /* be: IV0, IV1 */
vpsubq RKR, RX, RX;
vpshufb RKM, RX, RR1; /* be: IV2, IV3 */
vpsubq RKR, RX, RX;
vpshufb RKM, RX, RL2; /* be: IV4, IV5 */
vpsubq RKR, RX, RX;
vpshufb RKM, RX, RR2; /* be: IV6, IV7 */
vpsubq RKR, RX, RX;
vpshufb RKM, RX, RL3; /* be: IV8, IV9 */
vpsubq RKR, RX, RX;
vpshufb RKM, RX, RR3; /* be: IV10, IV11 */
vpsubq RKR, RX, RX;
vpshufb RKM, RX, RL4; /* be: IV12, IV13 */
vpsubq RKR, RX, RX;
vpshufb RKM, RX, RR4; /* be: IV14, IV15 */
/* store last IV */
vpsubq RTMP, RX, RX; /* le: IV16, IV14 */
vpshufb R1ST, RX, RX; /* be: IV16, IV16 */
vmovq RX, (%rcx);
call __cast5_enc_blk16;
/* dst = src ^ iv */
vpxor (0*16)(%r12), RR1, RR1;
vpxor (1*16)(%r12), RL1, RL1;
vpxor (2*16)(%r12), RR2, RR2;
vpxor (3*16)(%r12), RL2, RL2;
vpxor (4*16)(%r12), RR3, RR3;
vpxor (5*16)(%r12), RL3, RL3;
vpxor (6*16)(%r12), RR4, RR4;
vpxor (7*16)(%r12), RL4, RL4;
vmovdqu RR1, (0*16)(%r11);
vmovdqu RL1, (1*16)(%r11);
vmovdqu RR2, (2*16)(%r11);
vmovdqu RL2, (3*16)(%r11);
vmovdqu RR3, (4*16)(%r11);
vmovdqu RL3, (5*16)(%r11);
vmovdqu RR4, (6*16)(%r11);
vmovdqu RL4, (7*16)(%r11);
popq %r12;
ret;
ENDPROC(cast5_ctr_16way)

View file

@ -0,0 +1,494 @@
/*
* Glue Code for the AVX assembler implemention of the Cast5 Cipher
*
* Copyright (C) 2012 Johannes Goetzfried
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
#include <linux/module.h>
#include <linux/hardirq.h>
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
#include <crypto/ablk_helper.h>
#include <crypto/algapi.h>
#include <crypto/cast5.h>
#include <crypto/cryptd.h>
#include <crypto/ctr.h>
#include <asm/xcr.h>
#include <asm/xsave.h>
#include <asm/crypto/glue_helper.h>
#define CAST5_PARALLEL_BLOCKS 16
asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
__be64 *iv);
static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
{
return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
NULL, fpu_enabled, nbytes);
}
static inline void cast5_fpu_end(bool fpu_enabled)
{
return glue_fpu_end(fpu_enabled);
}
static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
bool enc)
{
bool fpu_enabled = false;
struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = CAST5_BLOCK_SIZE;
unsigned int nbytes;
void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
int err;
fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
err = blkcipher_walk_virt(desc, walk);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
while ((nbytes = walk->nbytes)) {
u8 *wsrc = walk->src.virt.addr;
u8 *wdst = walk->dst.virt.addr;
fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
/* Process multi-block batch */
if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
do {
fn(ctx, wdst, wsrc);
wsrc += bsize * CAST5_PARALLEL_BLOCKS;
wdst += bsize * CAST5_PARALLEL_BLOCKS;
nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
if (nbytes < bsize)
goto done;
}
fn = (enc) ? __cast5_encrypt : __cast5_decrypt;
/* Handle leftovers */
do {
fn(ctx, wdst, wsrc);
wsrc += bsize;
wdst += bsize;
nbytes -= bsize;
} while (nbytes >= bsize);
done:
err = blkcipher_walk_done(desc, walk, nbytes);
}
cast5_fpu_end(fpu_enabled);
return err;
}
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
return ecb_crypt(desc, &walk, true);
}
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
return ecb_crypt(desc, &walk, false);
}
static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = CAST5_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
u64 *dst = (u64 *)walk->dst.virt.addr;
u64 *iv = (u64 *)walk->iv;
do {
*dst = *src ^ *iv;
__cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
iv = dst;
src += 1;
dst += 1;
nbytes -= bsize;
} while (nbytes >= bsize);
*(u64 *)walk->iv = *iv;
return nbytes;
}
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
nbytes = __cbc_encrypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
return err;
}
static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = CAST5_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
u64 *dst = (u64 *)walk->dst.virt.addr;
u64 last_iv;
/* Start of the last block. */
src += nbytes / bsize - 1;
dst += nbytes / bsize - 1;
last_iv = *src;
/* Process multi-block batch */
if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
do {
nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
src -= CAST5_PARALLEL_BLOCKS - 1;
dst -= CAST5_PARALLEL_BLOCKS - 1;
cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src);
nbytes -= bsize;
if (nbytes < bsize)
goto done;
*dst ^= *(src - 1);
src -= 1;
dst -= 1;
} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
}
/* Handle leftovers */
for (;;) {
__cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
nbytes -= bsize;
if (nbytes < bsize)
break;
*dst ^= *(src - 1);
src -= 1;
dst -= 1;
}
done:
*dst ^= *(u64 *)walk->iv;
*(u64 *)walk->iv = last_iv;
return nbytes;
}
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
bool fpu_enabled = false;
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
while ((nbytes = walk.nbytes)) {
fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
nbytes = __cbc_decrypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
cast5_fpu_end(fpu_enabled);
return err;
}
static void ctr_crypt_final(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
u8 *ctrblk = walk->iv;
u8 keystream[CAST5_BLOCK_SIZE];
u8 *src = walk->src.virt.addr;
u8 *dst = walk->dst.virt.addr;
unsigned int nbytes = walk->nbytes;
__cast5_encrypt(ctx, keystream, ctrblk);
crypto_xor(keystream, src, nbytes);
memcpy(dst, keystream, nbytes);
crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
}
static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = CAST5_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
u64 *dst = (u64 *)walk->dst.virt.addr;
/* Process multi-block batch */
if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
do {
cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src,
(__be64 *)walk->iv);
src += CAST5_PARALLEL_BLOCKS;
dst += CAST5_PARALLEL_BLOCKS;
nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
do {
u64 ctrblk;
if (dst != src)
*dst = *src;
ctrblk = *(u64 *)walk->iv;
be64_add_cpu((__be64 *)walk->iv, 1);
__cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
*dst ^= ctrblk;
src += 1;
dst += 1;
nbytes -= bsize;
} while (nbytes >= bsize);
done:
return nbytes;
}
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
bool fpu_enabled = false;
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
nbytes = __ctr_crypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
cast5_fpu_end(fpu_enabled);
if (walk.nbytes) {
ctr_crypt_final(desc, &walk);
err = blkcipher_walk_done(desc, &walk, 0);
}
return err;
}
static struct crypto_alg cast5_algs[6] = { {
.cra_name = "__ecb-cast5-avx",
.cra_driver_name = "__driver-ecb-cast5-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = CAST5_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct cast5_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = CAST5_MIN_KEY_SIZE,
.max_keysize = CAST5_MAX_KEY_SIZE,
.setkey = cast5_setkey,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
},
},
}, {
.cra_name = "__cbc-cast5-avx",
.cra_driver_name = "__driver-cbc-cast5-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = CAST5_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct cast5_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = CAST5_MIN_KEY_SIZE,
.max_keysize = CAST5_MAX_KEY_SIZE,
.setkey = cast5_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
},
},
}, {
.cra_name = "__ctr-cast5-avx",
.cra_driver_name = "__driver-ctr-cast5-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct cast5_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = CAST5_MIN_KEY_SIZE,
.max_keysize = CAST5_MAX_KEY_SIZE,
.ivsize = CAST5_BLOCK_SIZE,
.setkey = cast5_setkey,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
},
},
}, {
.cra_name = "ecb(cast5)",
.cra_driver_name = "ecb-cast5-avx",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = CAST5_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAST5_MIN_KEY_SIZE,
.max_keysize = CAST5_MAX_KEY_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "cbc(cast5)",
.cra_driver_name = "cbc-cast5-avx",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = CAST5_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAST5_MIN_KEY_SIZE,
.max_keysize = CAST5_MAX_KEY_SIZE,
.ivsize = CAST5_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = __ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "ctr(cast5)",
.cra_driver_name = "ctr-cast5-avx",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAST5_MIN_KEY_SIZE,
.max_keysize = CAST5_MAX_KEY_SIZE,
.ivsize = CAST5_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_encrypt,
.geniv = "chainiv",
},
},
} };
static int __init cast5_init(void)
{
u64 xcr0;
if (!cpu_has_avx || !cpu_has_osxsave) {
pr_info("AVX instructions are not detected.\n");
return -ENODEV;
}
xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
pr_info("AVX detected but unusable.\n");
return -ENODEV;
}
return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
}
static void __exit cast5_exit(void)
{
crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
}
module_init(cast5_init);
module_exit(cast5_exit);
MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("cast5");

View file

@ -0,0 +1,472 @@
/*
* Cast6 Cipher 8-way parallel algorithm (AVX/x86_64)
*
* Copyright (C) 2012 Johannes Goetzfried
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
*
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
#include <linux/linkage.h>
#include "glue_helper-asm-avx.S"
.file "cast6-avx-x86_64-asm_64.S"
.extern cast_s1
.extern cast_s2
.extern cast_s3
.extern cast_s4
/* structure of crypto context */
#define km 0
#define kr (12*4*4)
/* s-boxes */
#define s1 cast_s1
#define s2 cast_s2
#define s3 cast_s3
#define s4 cast_s4
/**********************************************************************
8-way AVX cast6
**********************************************************************/
#define CTX %rdi
#define RA1 %xmm0
#define RB1 %xmm1
#define RC1 %xmm2
#define RD1 %xmm3
#define RA2 %xmm4
#define RB2 %xmm5
#define RC2 %xmm6
#define RD2 %xmm7
#define RX %xmm8
#define RKM %xmm9
#define RKR %xmm10
#define RKRF %xmm11
#define RKRR %xmm12
#define R32 %xmm13
#define R1ST %xmm14
#define RTMP %xmm15
#define RID1 %rbp
#define RID1d %ebp
#define RID2 %rsi
#define RID2d %esi
#define RGI1 %rdx
#define RGI1bl %dl
#define RGI1bh %dh
#define RGI2 %rcx
#define RGI2bl %cl
#define RGI2bh %ch
#define RGI3 %rax
#define RGI3bl %al
#define RGI3bh %ah
#define RGI4 %rbx
#define RGI4bl %bl
#define RGI4bh %bh
#define RFS1 %r8
#define RFS1d %r8d
#define RFS2 %r9
#define RFS2d %r9d
#define RFS3 %r10
#define RFS3d %r10d
#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \
movzbl src ## bh, RID1d; \
movzbl src ## bl, RID2d; \
shrq $16, src; \
movl s1(, RID1, 4), dst ## d; \
op1 s2(, RID2, 4), dst ## d; \
movzbl src ## bh, RID1d; \
movzbl src ## bl, RID2d; \
interleave_op(il_reg); \
op2 s3(, RID1, 4), dst ## d; \
op3 s4(, RID2, 4), dst ## d;
#define dummy(d) /* do nothing */
#define shr_next(reg) \
shrq $16, reg;
#define F_head(a, x, gi1, gi2, op0) \
op0 a, RKM, x; \
vpslld RKRF, x, RTMP; \
vpsrld RKRR, x, x; \
vpor RTMP, x, x; \
\
vmovq x, gi1; \
vpextrq $1, x, gi2;
#define F_tail(a, x, gi1, gi2, op1, op2, op3) \
lookup_32bit(##gi1, RFS1, op1, op2, op3, shr_next, ##gi1); \
lookup_32bit(##gi2, RFS3, op1, op2, op3, shr_next, ##gi2); \
\
lookup_32bit(##gi1, RFS2, op1, op2, op3, dummy, none); \
shlq $32, RFS2; \
orq RFS1, RFS2; \
lookup_32bit(##gi2, RFS1, op1, op2, op3, dummy, none); \
shlq $32, RFS1; \
orq RFS1, RFS3; \
\
vmovq RFS2, x; \
vpinsrq $1, RFS3, x, x;
#define F_2(a1, b1, a2, b2, op0, op1, op2, op3) \
F_head(b1, RX, RGI1, RGI2, op0); \
F_head(b2, RX, RGI3, RGI4, op0); \
\
F_tail(b1, RX, RGI1, RGI2, op1, op2, op3); \
F_tail(b2, RTMP, RGI3, RGI4, op1, op2, op3); \
\
vpxor a1, RX, a1; \
vpxor a2, RTMP, a2;
#define F1_2(a1, b1, a2, b2) \
F_2(a1, b1, a2, b2, vpaddd, xorl, subl, addl)
#define F2_2(a1, b1, a2, b2) \
F_2(a1, b1, a2, b2, vpxor, subl, addl, xorl)
#define F3_2(a1, b1, a2, b2) \
F_2(a1, b1, a2, b2, vpsubd, addl, xorl, subl)
#define qop(in, out, f) \
F ## f ## _2(out ## 1, in ## 1, out ## 2, in ## 2);
#define get_round_keys(nn) \
vbroadcastss (km+(4*(nn)))(CTX), RKM; \
vpand R1ST, RKR, RKRF; \
vpsubq RKRF, R32, RKRR; \
vpsrldq $1, RKR, RKR;
#define Q(n) \
get_round_keys(4*n+0); \
qop(RD, RC, 1); \
\
get_round_keys(4*n+1); \
qop(RC, RB, 2); \
\
get_round_keys(4*n+2); \
qop(RB, RA, 3); \
\
get_round_keys(4*n+3); \
qop(RA, RD, 1);
#define QBAR(n) \
get_round_keys(4*n+3); \
qop(RA, RD, 1); \
\
get_round_keys(4*n+2); \
qop(RB, RA, 3); \
\
get_round_keys(4*n+1); \
qop(RC, RB, 2); \
\
get_round_keys(4*n+0); \
qop(RD, RC, 1);
#define shuffle(mask) \
vpshufb mask, RKR, RKR;
#define preload_rkr(n, do_mask, mask) \
vbroadcastss .L16_mask, RKR; \
/* add 16-bit rotation to key rotations (mod 32) */ \
vpxor (kr+n*16)(CTX), RKR, RKR; \
do_mask(mask);
#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
vpunpckldq x1, x0, t0; \
vpunpckhdq x1, x0, t2; \
vpunpckldq x3, x2, t1; \
vpunpckhdq x3, x2, x3; \
\
vpunpcklqdq t1, t0, x0; \
vpunpckhqdq t1, t0, x1; \
vpunpcklqdq x3, t2, x2; \
vpunpckhqdq x3, t2, x3;
#define inpack_blocks(x0, x1, x2, x3, t0, t1, t2, rmask) \
vpshufb rmask, x0, x0; \
vpshufb rmask, x1, x1; \
vpshufb rmask, x2, x2; \
vpshufb rmask, x3, x3; \
\
transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
#define outunpack_blocks(x0, x1, x2, x3, t0, t1, t2, rmask) \
transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
\
vpshufb rmask, x0, x0; \
vpshufb rmask, x1, x1; \
vpshufb rmask, x2, x2; \
vpshufb rmask, x3, x3;
.data
.align 16
.Lxts_gf128mul_and_shl1_mask:
.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
.Lbswap_mask:
.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
.Lrkr_enc_Q_Q_QBAR_QBAR:
.byte 0, 1, 2, 3, 4, 5, 6, 7, 11, 10, 9, 8, 15, 14, 13, 12
.Lrkr_enc_QBAR_QBAR_QBAR_QBAR:
.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
.Lrkr_dec_Q_Q_Q_Q:
.byte 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3
.Lrkr_dec_Q_Q_QBAR_QBAR:
.byte 12, 13, 14, 15, 8, 9, 10, 11, 7, 6, 5, 4, 3, 2, 1, 0
.Lrkr_dec_QBAR_QBAR_QBAR_QBAR:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
.L16_mask:
.byte 16, 16, 16, 16
.L32_mask:
.byte 32, 0, 0, 0
.Lfirst_mask:
.byte 0x1f, 0, 0, 0
.text
.align 8
__cast6_enc_blk8:
/* input:
* %rdi: ctx, CTX
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
* output:
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
*/
pushq %rbp;
pushq %rbx;
vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32;
inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
preload_rkr(0, dummy, none);
Q(0);
Q(1);
Q(2);
Q(3);
preload_rkr(1, shuffle, .Lrkr_enc_Q_Q_QBAR_QBAR);
Q(4);
Q(5);
QBAR(6);
QBAR(7);
preload_rkr(2, shuffle, .Lrkr_enc_QBAR_QBAR_QBAR_QBAR);
QBAR(8);
QBAR(9);
QBAR(10);
QBAR(11);
popq %rbx;
popq %rbp;
vmovdqa .Lbswap_mask, RKM;
outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
ret;
ENDPROC(__cast6_enc_blk8)
.align 8
__cast6_dec_blk8:
/* input:
* %rdi: ctx, CTX
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
* output:
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks
*/
pushq %rbp;
pushq %rbx;
vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32;
inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
preload_rkr(2, shuffle, .Lrkr_dec_Q_Q_Q_Q);
Q(11);
Q(10);
Q(9);
Q(8);
preload_rkr(1, shuffle, .Lrkr_dec_Q_Q_QBAR_QBAR);
Q(7);
Q(6);
QBAR(5);
QBAR(4);
preload_rkr(0, shuffle, .Lrkr_dec_QBAR_QBAR_QBAR_QBAR);
QBAR(3);
QBAR(2);
QBAR(1);
QBAR(0);
popq %rbx;
popq %rbp;
vmovdqa .Lbswap_mask, RKM;
outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
ret;
ENDPROC(__cast6_dec_blk8)
ENTRY(cast6_ecb_enc_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
movq %rsi, %r11;
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
call __cast6_enc_blk8;
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
ret;
ENDPROC(cast6_ecb_enc_8way)
ENTRY(cast6_ecb_dec_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
movq %rsi, %r11;
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
call __cast6_dec_blk8;
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
ret;
ENDPROC(cast6_ecb_dec_8way)
ENTRY(cast6_cbc_dec_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
pushq %r12;
movq %rsi, %r11;
movq %rdx, %r12;
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
call __cast6_dec_blk8;
store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
popq %r12;
ret;
ENDPROC(cast6_cbc_dec_8way)
ENTRY(cast6_ctr_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: iv (little endian, 128bit)
*/
pushq %r12;
movq %rsi, %r11;
movq %rdx, %r12;
load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
RD2, RX, RKR, RKM);
call __cast6_enc_blk8;
store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
popq %r12;
ret;
ENDPROC(cast6_ctr_8way)
ENTRY(cast6_xts_enc_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: iv (t α GF(2¹²))
*/
movq %rsi, %r11;
/* regs <= src, dst <= IVs, regs <= regs xor IVs */
load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
RX, RKR, RKM, .Lxts_gf128mul_and_shl1_mask);
call __cast6_enc_blk8;
/* dst <= regs xor IVs(in dst) */
store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
ret;
ENDPROC(cast6_xts_enc_8way)
ENTRY(cast6_xts_dec_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: iv (t α GF(2¹²))
*/
movq %rsi, %r11;
/* regs <= src, dst <= IVs, regs <= regs xor IVs */
load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
RX, RKR, RKM, .Lxts_gf128mul_and_shl1_mask);
call __cast6_dec_blk8;
/* dst <= regs xor IVs(in dst) */
store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
ret;
ENDPROC(cast6_xts_dec_8way)

View file

@ -0,0 +1,614 @@
/*
* Glue Code for the AVX assembler implemention of the Cast6 Cipher
*
* Copyright (C) 2012 Johannes Goetzfried
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
*
* Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
#include <linux/module.h>
#include <linux/hardirq.h>
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
#include <crypto/ablk_helper.h>
#include <crypto/algapi.h>
#include <crypto/cast6.h>
#include <crypto/cryptd.h>
#include <crypto/b128ops.h>
#include <crypto/ctr.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
#include <asm/xcr.h>
#include <asm/xsave.h>
#include <asm/crypto/glue_helper.h>
#define CAST6_PARALLEL_BLOCKS 8
asmlinkage void cast6_ecb_enc_8way(struct cast6_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void cast6_ecb_dec_8way(struct cast6_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void cast6_cbc_dec_8way(struct cast6_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void cast6_ctr_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src,
le128 *iv);
asmlinkage void cast6_xts_enc_8way(struct cast6_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
asmlinkage void cast6_xts_dec_8way(struct cast6_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
static void cast6_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
GLUE_FUNC_CAST(__cast6_encrypt));
}
static void cast6_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
GLUE_FUNC_CAST(__cast6_decrypt));
}
static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
be128 ctrblk;
le128_to_be128(&ctrblk, iv);
le128_inc(iv);
__cast6_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
u128_xor(dst, src, (u128 *)&ctrblk);
}
static const struct common_glue_ctx cast6_enc = {
.num_funcs = 2,
.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAST6_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_enc_8way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_encrypt) }
} }
};
static const struct common_glue_ctx cast6_ctr = {
.num_funcs = 2,
.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAST6_PARALLEL_BLOCKS,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_ctr_8way) }
}, {
.num_blocks = 1,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr) }
} }
};
static const struct common_glue_ctx cast6_enc_xts = {
.num_funcs = 2,
.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAST6_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc_8way) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc) }
} }
};
static const struct common_glue_ctx cast6_dec = {
.num_funcs = 2,
.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAST6_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_dec_8way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_decrypt) }
} }
};
static const struct common_glue_ctx cast6_dec_cbc = {
.num_funcs = 2,
.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAST6_PARALLEL_BLOCKS,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(cast6_cbc_dec_8way) }
}, {
.num_blocks = 1,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__cast6_decrypt) }
} }
};
static const struct common_glue_ctx cast6_dec_xts = {
.num_funcs = 2,
.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAST6_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec_8way) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec) }
} }
};
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_ecb_crypt_128bit(&cast6_enc, desc, dst, src, nbytes);
}
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_ecb_crypt_128bit(&cast6_dec, desc, dst, src, nbytes);
}
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__cast6_encrypt), desc,
dst, src, nbytes);
}
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_cbc_decrypt_128bit(&cast6_dec_cbc, desc, dst, src,
nbytes);
}
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_ctr_crypt_128bit(&cast6_ctr, desc, dst, src, nbytes);
}
static inline bool cast6_fpu_begin(bool fpu_enabled, unsigned int nbytes)
{
return glue_fpu_begin(CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS,
NULL, fpu_enabled, nbytes);
}
static inline void cast6_fpu_end(bool fpu_enabled)
{
glue_fpu_end(fpu_enabled);
}
struct crypt_priv {
struct cast6_ctx *ctx;
bool fpu_enabled;
};
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = CAST6_BLOCK_SIZE;
struct crypt_priv *ctx = priv;
int i;
ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
cast6_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
return;
}
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
__cast6_encrypt(ctx->ctx, srcdst, srcdst);
}
static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = CAST6_BLOCK_SIZE;
struct crypt_priv *ctx = priv;
int i;
ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
cast6_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
return;
}
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
__cast6_decrypt(ctx->ctx, srcdst, srcdst);
}
struct cast6_lrw_ctx {
struct lrw_table_ctx lrw_table;
struct cast6_ctx cast6_ctx;
};
static int lrw_cast6_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
{
struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
int err;
err = __cast6_setkey(&ctx->cast6_ctx, key, keylen - CAST6_BLOCK_SIZE,
&tfm->crt_flags);
if (err)
return err;
return lrw_init_table(&ctx->lrw_table, key + keylen - CAST6_BLOCK_SIZE);
}
static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[CAST6_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->cast6_ctx,
.fpu_enabled = false,
};
struct lrw_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.table_ctx = &ctx->lrw_table,
.crypt_ctx = &crypt_ctx,
.crypt_fn = encrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = lrw_crypt(desc, dst, src, nbytes, &req);
cast6_fpu_end(crypt_ctx.fpu_enabled);
return ret;
}
static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[CAST6_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->cast6_ctx,
.fpu_enabled = false,
};
struct lrw_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.table_ctx = &ctx->lrw_table,
.crypt_ctx = &crypt_ctx,
.crypt_fn = decrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = lrw_crypt(desc, dst, src, nbytes, &req);
cast6_fpu_end(crypt_ctx.fpu_enabled);
return ret;
}
static void lrw_exit_tfm(struct crypto_tfm *tfm)
{
struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
lrw_free_table(&ctx->lrw_table);
}
struct cast6_xts_ctx {
struct cast6_ctx tweak_ctx;
struct cast6_ctx crypt_ctx;
};
static int xts_cast6_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
{
struct cast6_xts_ctx *ctx = crypto_tfm_ctx(tfm);
u32 *flags = &tfm->crt_flags;
int err;
/* key consists of keys of equal size concatenated, therefore
* the length must be even
*/
if (keylen % 2) {
*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
return -EINVAL;
}
/* first half of xts-key is for crypt */
err = __cast6_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
if (err)
return err;
/* second half of xts-key is for tweak */
return __cast6_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
flags);
}
static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
return glue_xts_crypt_128bit(&cast6_enc_xts, desc, dst, src, nbytes,
XTS_TWEAK_CAST(__cast6_encrypt),
&ctx->tweak_ctx, &ctx->crypt_ctx);
}
static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
return glue_xts_crypt_128bit(&cast6_dec_xts, desc, dst, src, nbytes,
XTS_TWEAK_CAST(__cast6_encrypt),
&ctx->tweak_ctx, &ctx->crypt_ctx);
}
static struct crypto_alg cast6_algs[10] = { {
.cra_name = "__ecb-cast6-avx",
.cra_driver_name = "__driver-ecb-cast6-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = CAST6_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct cast6_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = CAST6_MIN_KEY_SIZE,
.max_keysize = CAST6_MAX_KEY_SIZE,
.setkey = cast6_setkey,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
},
},
}, {
.cra_name = "__cbc-cast6-avx",
.cra_driver_name = "__driver-cbc-cast6-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = CAST6_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct cast6_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = CAST6_MIN_KEY_SIZE,
.max_keysize = CAST6_MAX_KEY_SIZE,
.setkey = cast6_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
},
},
}, {
.cra_name = "__ctr-cast6-avx",
.cra_driver_name = "__driver-ctr-cast6-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct cast6_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = CAST6_MIN_KEY_SIZE,
.max_keysize = CAST6_MAX_KEY_SIZE,
.ivsize = CAST6_BLOCK_SIZE,
.setkey = cast6_setkey,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
},
},
}, {
.cra_name = "__lrw-cast6-avx",
.cra_driver_name = "__driver-lrw-cast6-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = CAST6_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct cast6_lrw_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_exit = lrw_exit_tfm,
.cra_u = {
.blkcipher = {
.min_keysize = CAST6_MIN_KEY_SIZE +
CAST6_BLOCK_SIZE,
.max_keysize = CAST6_MAX_KEY_SIZE +
CAST6_BLOCK_SIZE,
.ivsize = CAST6_BLOCK_SIZE,
.setkey = lrw_cast6_setkey,
.encrypt = lrw_encrypt,
.decrypt = lrw_decrypt,
},
},
}, {
.cra_name = "__xts-cast6-avx",
.cra_driver_name = "__driver-xts-cast6-avx",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = CAST6_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct cast6_xts_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = CAST6_MIN_KEY_SIZE * 2,
.max_keysize = CAST6_MAX_KEY_SIZE * 2,
.ivsize = CAST6_BLOCK_SIZE,
.setkey = xts_cast6_setkey,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
},
},
}, {
.cra_name = "ecb(cast6)",
.cra_driver_name = "ecb-cast6-avx",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = CAST6_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAST6_MIN_KEY_SIZE,
.max_keysize = CAST6_MAX_KEY_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "cbc(cast6)",
.cra_driver_name = "cbc-cast6-avx",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = CAST6_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAST6_MIN_KEY_SIZE,
.max_keysize = CAST6_MAX_KEY_SIZE,
.ivsize = CAST6_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = __ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "ctr(cast6)",
.cra_driver_name = "ctr-cast6-avx",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAST6_MIN_KEY_SIZE,
.max_keysize = CAST6_MAX_KEY_SIZE,
.ivsize = CAST6_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_encrypt,
.geniv = "chainiv",
},
},
}, {
.cra_name = "lrw(cast6)",
.cra_driver_name = "lrw-cast6-avx",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = CAST6_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAST6_MIN_KEY_SIZE +
CAST6_BLOCK_SIZE,
.max_keysize = CAST6_MAX_KEY_SIZE +
CAST6_BLOCK_SIZE,
.ivsize = CAST6_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "xts(cast6)",
.cra_driver_name = "xts-cast6-avx",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = CAST6_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = CAST6_MIN_KEY_SIZE * 2,
.max_keysize = CAST6_MAX_KEY_SIZE * 2,
.ivsize = CAST6_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
} };
static int __init cast6_init(void)
{
u64 xcr0;
if (!cpu_has_avx || !cpu_has_osxsave) {
pr_info("AVX instructions are not detected.\n");
return -ENODEV;
}
xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
pr_info("AVX detected but unusable.\n");
return -ENODEV;
}
return crypto_register_algs(cast6_algs, ARRAY_SIZE(cast6_algs));
}
static void __exit cast6_exit(void)
{
crypto_unregister_algs(cast6_algs, ARRAY_SIZE(cast6_algs));
}
module_init(cast6_init);
module_exit(cast6_exit);
MODULE_DESCRIPTION("Cast6 Cipher Algorithm, AVX optimized");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("cast6");

View file

@ -0,0 +1,246 @@
/* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see http://www.gnu.org/licenses
*
* Please visit http://www.xyratex.com/contact if you need additional
* information or have any questions.
*
* GPL HEADER END
*/
/*
* Copyright 2012 Xyratex Technology Limited
*
* Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
* calculation.
* CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
* PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
* at:
* http://www.intel.com/products/processor/manuals/
* Intel(R) 64 and IA-32 Architectures Software Developer's Manual
* Volume 2B: Instruction Set Reference, N-Z
*
* Authors: Gregory Prestas <Gregory_Prestas@us.xyratex.com>
* Alexander Boyko <Alexander_Boyko@xyratex.com>
*/
#include <linux/linkage.h>
#include <asm/inst.h>
.align 16
/*
* [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4
* #define CONSTANT_R1 0x154442bd4LL
*
* [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596
* #define CONSTANT_R2 0x1c6e41596LL
*/
.Lconstant_R2R1:
.octa 0x00000001c6e415960000000154442bd4
/*
* [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0
* #define CONSTANT_R3 0x1751997d0LL
*
* [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e
* #define CONSTANT_R4 0x0ccaa009eLL
*/
.Lconstant_R4R3:
.octa 0x00000000ccaa009e00000001751997d0
/*
* [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124
* #define CONSTANT_R5 0x163cd6124LL
*/
.Lconstant_R5:
.octa 0x00000000000000000000000163cd6124
.Lconstant_mask32:
.octa 0x000000000000000000000000FFFFFFFF
/*
* #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
*
* Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL
* #define CONSTANT_RU 0x1F7011641LL
*/
.Lconstant_RUpoly:
.octa 0x00000001F701164100000001DB710641
#define CONSTANT %xmm0
#ifdef __x86_64__
#define BUF %rdi
#define LEN %rsi
#define CRC %edx
#else
#define BUF %eax
#define LEN %edx
#define CRC %ecx
#endif
.text
/**
* Calculate crc32
* BUF - buffer (16 bytes aligned)
* LEN - sizeof buffer (16 bytes aligned), LEN should be grater than 63
* CRC - initial crc32
* return %eax crc32
* uint crc32_pclmul_le_16(unsigned char const *buffer,
* size_t len, uint crc32)
*/
ENTRY(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */
movdqa (BUF), %xmm1
movdqa 0x10(BUF), %xmm2
movdqa 0x20(BUF), %xmm3
movdqa 0x30(BUF), %xmm4
movd CRC, CONSTANT
pxor CONSTANT, %xmm1
sub $0x40, LEN
add $0x40, BUF
#ifndef __x86_64__
/* This is for position independent code(-fPIC) support for 32bit */
call delta
delta:
pop %ecx
#endif
cmp $0x40, LEN
jb less_64
#ifdef __x86_64__
movdqa .Lconstant_R2R1(%rip), CONSTANT
#else
movdqa .Lconstant_R2R1 - delta(%ecx), CONSTANT
#endif
loop_64:/* 64 bytes Full cache line folding */
prefetchnta 0x40(BUF)
movdqa %xmm1, %xmm5
movdqa %xmm2, %xmm6
movdqa %xmm3, %xmm7
#ifdef __x86_64__
movdqa %xmm4, %xmm8
#endif
PCLMULQDQ 00, CONSTANT, %xmm1
PCLMULQDQ 00, CONSTANT, %xmm2
PCLMULQDQ 00, CONSTANT, %xmm3
#ifdef __x86_64__
PCLMULQDQ 00, CONSTANT, %xmm4
#endif
PCLMULQDQ 0x11, CONSTANT, %xmm5
PCLMULQDQ 0x11, CONSTANT, %xmm6
PCLMULQDQ 0x11, CONSTANT, %xmm7
#ifdef __x86_64__
PCLMULQDQ 0x11, CONSTANT, %xmm8
#endif
pxor %xmm5, %xmm1
pxor %xmm6, %xmm2
pxor %xmm7, %xmm3
#ifdef __x86_64__
pxor %xmm8, %xmm4
#else
/* xmm8 unsupported for x32 */
movdqa %xmm4, %xmm5
PCLMULQDQ 00, CONSTANT, %xmm4
PCLMULQDQ 0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm4
#endif
pxor (BUF), %xmm1
pxor 0x10(BUF), %xmm2
pxor 0x20(BUF), %xmm3
pxor 0x30(BUF), %xmm4
sub $0x40, LEN
add $0x40, BUF
cmp $0x40, LEN
jge loop_64
less_64:/* Folding cache line into 128bit */
#ifdef __x86_64__
movdqa .Lconstant_R4R3(%rip), CONSTANT
#else
movdqa .Lconstant_R4R3 - delta(%ecx), CONSTANT
#endif
prefetchnta (BUF)
movdqa %xmm1, %xmm5
PCLMULQDQ 0x00, CONSTANT, %xmm1
PCLMULQDQ 0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm1
pxor %xmm2, %xmm1
movdqa %xmm1, %xmm5
PCLMULQDQ 0x00, CONSTANT, %xmm1
PCLMULQDQ 0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm1
pxor %xmm3, %xmm1
movdqa %xmm1, %xmm5
PCLMULQDQ 0x00, CONSTANT, %xmm1
PCLMULQDQ 0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm1
pxor %xmm4, %xmm1
cmp $0x10, LEN
jb fold_64
loop_16:/* Folding rest buffer into 128bit */
movdqa %xmm1, %xmm5
PCLMULQDQ 0x00, CONSTANT, %xmm1
PCLMULQDQ 0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm1
pxor (BUF), %xmm1
sub $0x10, LEN
add $0x10, BUF
cmp $0x10, LEN
jge loop_16
fold_64:
/* perform the last 64 bit fold, also adds 32 zeroes
* to the input stream */
PCLMULQDQ 0x01, %xmm1, CONSTANT /* R4 * xmm1.low */
psrldq $0x08, %xmm1
pxor CONSTANT, %xmm1
/* final 32-bit fold */
movdqa %xmm1, %xmm2
#ifdef __x86_64__
movdqa .Lconstant_R5(%rip), CONSTANT
movdqa .Lconstant_mask32(%rip), %xmm3
#else
movdqa .Lconstant_R5 - delta(%ecx), CONSTANT
movdqa .Lconstant_mask32 - delta(%ecx), %xmm3
#endif
psrldq $0x04, %xmm2
pand %xmm3, %xmm1
PCLMULQDQ 0x00, CONSTANT, %xmm1
pxor %xmm2, %xmm1
/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
#ifdef __x86_64__
movdqa .Lconstant_RUpoly(%rip), CONSTANT
#else
movdqa .Lconstant_RUpoly - delta(%ecx), CONSTANT
#endif
movdqa %xmm1, %xmm2
pand %xmm3, %xmm1
PCLMULQDQ 0x10, CONSTANT, %xmm1
pand %xmm3, %xmm1
PCLMULQDQ 0x00, CONSTANT, %xmm1
pxor %xmm2, %xmm1
PEXTRD 0x01, %xmm1, %eax
ret
ENDPROC(crc32_pclmul_le_16)

View file

@ -0,0 +1,201 @@
/* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see http://www.gnu.org/licenses
*
* Please visit http://www.xyratex.com/contact if you need additional
* information or have any questions.
*
* GPL HEADER END
*/
/*
* Copyright 2012 Xyratex Technology Limited
*
* Wrappers for kernel crypto shash api to pclmulqdq crc32 imlementation.
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/crc32.h>
#include <crypto/internal/hash.h>
#include <asm/cpufeature.h>
#include <asm/cpu_device_id.h>
#include <asm/i387.h>
#define CHKSUM_BLOCK_SIZE 1
#define CHKSUM_DIGEST_SIZE 4
#define PCLMUL_MIN_LEN 64L /* minimum size of buffer
* for crc32_pclmul_le_16 */
#define SCALE_F 16L /* size of xmm register */
#define SCALE_F_MASK (SCALE_F - 1)
u32 crc32_pclmul_le_16(unsigned char const *buffer, size_t len, u32 crc32);
static u32 __attribute__((pure))
crc32_pclmul_le(u32 crc, unsigned char const *p, size_t len)
{
unsigned int iquotient;
unsigned int iremainder;
unsigned int prealign;
if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !irq_fpu_usable())
return crc32_le(crc, p, len);
if ((long)p & SCALE_F_MASK) {
/* align p to 16 byte */
prealign = SCALE_F - ((long)p & SCALE_F_MASK);
crc = crc32_le(crc, p, prealign);
len -= prealign;
p = (unsigned char *)(((unsigned long)p + SCALE_F_MASK) &
~SCALE_F_MASK);
}
iquotient = len & (~SCALE_F_MASK);
iremainder = len & SCALE_F_MASK;
kernel_fpu_begin();
crc = crc32_pclmul_le_16(p, iquotient, crc);
kernel_fpu_end();
if (iremainder)
crc = crc32_le(crc, p + iquotient, iremainder);
return crc;
}
static int crc32_pclmul_cra_init(struct crypto_tfm *tfm)
{
u32 *key = crypto_tfm_ctx(tfm);
*key = 0;
return 0;
}
static int crc32_pclmul_setkey(struct crypto_shash *hash, const u8 *key,
unsigned int keylen)
{
u32 *mctx = crypto_shash_ctx(hash);
if (keylen != sizeof(u32)) {
crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
*mctx = le32_to_cpup((__le32 *)key);
return 0;
}
static int crc32_pclmul_init(struct shash_desc *desc)
{
u32 *mctx = crypto_shash_ctx(desc->tfm);
u32 *crcp = shash_desc_ctx(desc);
*crcp = *mctx;
return 0;
}
static int crc32_pclmul_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
u32 *crcp = shash_desc_ctx(desc);
*crcp = crc32_pclmul_le(*crcp, data, len);
return 0;
}
/* No final XOR 0xFFFFFFFF, like crc32_le */
static int __crc32_pclmul_finup(u32 *crcp, const u8 *data, unsigned int len,
u8 *out)
{
*(__le32 *)out = cpu_to_le32(crc32_pclmul_le(*crcp, data, len));
return 0;
}
static int crc32_pclmul_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return __crc32_pclmul_finup(shash_desc_ctx(desc), data, len, out);
}
static int crc32_pclmul_final(struct shash_desc *desc, u8 *out)
{
u32 *crcp = shash_desc_ctx(desc);
*(__le32 *)out = cpu_to_le32p(crcp);
return 0;
}
static int crc32_pclmul_digest(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return __crc32_pclmul_finup(crypto_shash_ctx(desc->tfm), data, len,
out);
}
static struct shash_alg alg = {
.setkey = crc32_pclmul_setkey,
.init = crc32_pclmul_init,
.update = crc32_pclmul_update,
.final = crc32_pclmul_final,
.finup = crc32_pclmul_finup,
.digest = crc32_pclmul_digest,
.descsize = sizeof(u32),
.digestsize = CHKSUM_DIGEST_SIZE,
.base = {
.cra_name = "crc32",
.cra_driver_name = "crc32-pclmul",
.cra_priority = 200,
.cra_blocksize = CHKSUM_BLOCK_SIZE,
.cra_ctxsize = sizeof(u32),
.cra_module = THIS_MODULE,
.cra_init = crc32_pclmul_cra_init,
}
};
static const struct x86_cpu_id crc32pclmul_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ),
{}
};
MODULE_DEVICE_TABLE(x86cpu, crc32pclmul_cpu_id);
static int __init crc32_pclmul_mod_init(void)
{
if (!x86_match_cpu(crc32pclmul_cpu_id)) {
pr_info("PCLMULQDQ-NI instructions are not detected.\n");
return -ENODEV;
}
return crypto_register_shash(&alg);
}
static void __exit crc32_pclmul_mod_fini(void)
{
crypto_unregister_shash(&alg);
}
module_init(crc32_pclmul_mod_init);
module_exit(crc32_pclmul_mod_fini);
MODULE_AUTHOR("Alexander Boyko <alexander_boyko@xyratex.com>");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("crc32");
MODULE_ALIAS_CRYPTO("crc32-pclmul");

View file

@ -0,0 +1,284 @@
/*
* Using hardware provided CRC32 instruction to accelerate the CRC32 disposal.
* CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE)
* CRC32 is a new instruction in Intel SSE4.2, the reference can be found at:
* http://www.intel.com/products/processor/manuals/
* Intel(R) 64 and IA-32 Architectures Software Developer's Manual
* Volume 2A: Instruction Set Reference, A-M
*
* Copyright (C) 2008 Intel Corporation
* Authors: Austin Zhang <austin_zhang@linux.intel.com>
* Kent Liu <kent.liu@intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <crypto/internal/hash.h>
#include <asm/cpufeature.h>
#include <asm/cpu_device_id.h>
#include <asm/i387.h>
#include <asm/fpu-internal.h>
#define CHKSUM_BLOCK_SIZE 1
#define CHKSUM_DIGEST_SIZE 4
#define SCALE_F sizeof(unsigned long)
#ifdef CONFIG_X86_64
#define REX_PRE "0x48, "
#else
#define REX_PRE
#endif
#ifdef CONFIG_X86_64
/*
* use carryless multiply version of crc32c when buffer
* size is >= 512 (when eager fpu is enabled) or
* >= 1024 (when eager fpu is disabled) to account
* for fpu state save/restore overhead.
*/
#define CRC32C_PCL_BREAKEVEN_EAGERFPU 512
#define CRC32C_PCL_BREAKEVEN_NOEAGERFPU 1024
asmlinkage unsigned int crc_pcl(const u8 *buffer, int len,
unsigned int crc_init);
static int crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_EAGERFPU;
#if defined(X86_FEATURE_EAGER_FPU)
#define set_pcl_breakeven_point() \
do { \
if (!use_eager_fpu()) \
crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU; \
} while (0)
#else
#define set_pcl_breakeven_point() \
(crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU)
#endif
#endif /* CONFIG_X86_64 */
static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
{
while (length--) {
__asm__ __volatile__(
".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
:"=S"(crc)
:"0"(crc), "c"(*data)
);
data++;
}
return crc;
}
static u32 __pure crc32c_intel_le_hw(u32 crc, unsigned char const *p, size_t len)
{
unsigned int iquotient = len / SCALE_F;
unsigned int iremainder = len % SCALE_F;
unsigned long *ptmp = (unsigned long *)p;
while (iquotient--) {
__asm__ __volatile__(
".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;"
:"=S"(crc)
:"0"(crc), "c"(*ptmp)
);
ptmp++;
}
if (iremainder)
crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp,
iremainder);
return crc;
}
/*
* Setting the seed allows arbitrary accumulators and flexible XOR policy
* If your algorithm starts with ~0, then XOR with ~0 before you set
* the seed.
*/
static int crc32c_intel_setkey(struct crypto_shash *hash, const u8 *key,
unsigned int keylen)
{
u32 *mctx = crypto_shash_ctx(hash);
if (keylen != sizeof(u32)) {
crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
*mctx = le32_to_cpup((__le32 *)key);
return 0;
}
static int crc32c_intel_init(struct shash_desc *desc)
{
u32 *mctx = crypto_shash_ctx(desc->tfm);
u32 *crcp = shash_desc_ctx(desc);
*crcp = *mctx;
return 0;
}
static int crc32c_intel_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
u32 *crcp = shash_desc_ctx(desc);
*crcp = crc32c_intel_le_hw(*crcp, data, len);
return 0;
}
static int __crc32c_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
u8 *out)
{
*(__le32 *)out = ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len));
return 0;
}
static int crc32c_intel_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return __crc32c_intel_finup(shash_desc_ctx(desc), data, len, out);
}
static int crc32c_intel_final(struct shash_desc *desc, u8 *out)
{
u32 *crcp = shash_desc_ctx(desc);
*(__le32 *)out = ~cpu_to_le32p(crcp);
return 0;
}
static int crc32c_intel_digest(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return __crc32c_intel_finup(crypto_shash_ctx(desc->tfm), data, len,
out);
}
static int crc32c_intel_cra_init(struct crypto_tfm *tfm)
{
u32 *key = crypto_tfm_ctx(tfm);
*key = ~0;
return 0;
}
#ifdef CONFIG_X86_64
static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
u32 *crcp = shash_desc_ctx(desc);
/*
* use faster PCL version if datasize is large enough to
* overcome kernel fpu state save/restore overhead
*/
if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) {
kernel_fpu_begin();
*crcp = crc_pcl(data, len, *crcp);
kernel_fpu_end();
} else
*crcp = crc32c_intel_le_hw(*crcp, data, len);
return 0;
}
static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
u8 *out)
{
if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) {
kernel_fpu_begin();
*(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp));
kernel_fpu_end();
} else
*(__le32 *)out =
~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len));
return 0;
}
static int crc32c_pcl_intel_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return __crc32c_pcl_intel_finup(shash_desc_ctx(desc), data, len, out);
}
static int crc32c_pcl_intel_digest(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return __crc32c_pcl_intel_finup(crypto_shash_ctx(desc->tfm), data, len,
out);
}
#endif /* CONFIG_X86_64 */
static struct shash_alg alg = {
.setkey = crc32c_intel_setkey,
.init = crc32c_intel_init,
.update = crc32c_intel_update,
.final = crc32c_intel_final,
.finup = crc32c_intel_finup,
.digest = crc32c_intel_digest,
.descsize = sizeof(u32),
.digestsize = CHKSUM_DIGEST_SIZE,
.base = {
.cra_name = "crc32c",
.cra_driver_name = "crc32c-intel",
.cra_priority = 200,
.cra_blocksize = CHKSUM_BLOCK_SIZE,
.cra_ctxsize = sizeof(u32),
.cra_module = THIS_MODULE,
.cra_init = crc32c_intel_cra_init,
}
};
static const struct x86_cpu_id crc32c_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_XMM4_2),
{}
};
MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id);
static int __init crc32c_intel_mod_init(void)
{
if (!x86_match_cpu(crc32c_cpu_id))
return -ENODEV;
#ifdef CONFIG_X86_64
if (cpu_has_pclmulqdq) {
alg.update = crc32c_pcl_intel_update;
alg.finup = crc32c_pcl_intel_finup;
alg.digest = crc32c_pcl_intel_digest;
set_pcl_breakeven_point();
}
#endif
return crypto_register_shash(&alg);
}
static void __exit crc32c_intel_mod_fini(void)
{
crypto_unregister_shash(&alg);
}
module_init(crc32c_intel_mod_init);
module_exit(crc32c_intel_mod_fini);
MODULE_AUTHOR("Austin Zhang <austin.zhang@intel.com>, Kent Liu <kent.liu@intel.com>");
MODULE_DESCRIPTION("CRC32c (Castagnoli) optimization using Intel Hardware.");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("crc32c");
MODULE_ALIAS_CRYPTO("crc32c-intel");

View file

@ -0,0 +1,463 @@
/*
* Implement fast CRC32C with PCLMULQDQ instructions. (x86_64)
*
* The white papers on CRC32C calculations with PCLMULQDQ instruction can be
* downloaded from:
* http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/crc-iscsi-polynomial-crc32-instruction-paper.pdf
* http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-paper.pdf
*
* Copyright (C) 2012 Intel Corporation.
*
* Authors:
* Wajdi Feghali <wajdi.k.feghali@intel.com>
* James Guilford <james.guilford@intel.com>
* David Cote <david.m.cote@intel.com>
* Tim Chen <tim.c.chen@linux.intel.com>
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <asm/inst.h>
#include <linux/linkage.h>
## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
.macro LABEL prefix n
\prefix\n\():
.endm
.macro JMPTBL_ENTRY i
.word crc_\i - crc_array
.endm
.macro JNC_LESS_THAN j
jnc less_than_\j
.endm
# Define threshold where buffers are considered "small" and routed to more
# efficient "by-1" code. This "by-1" code only handles up to 255 bytes, so
# SMALL_SIZE can be no larger than 255.
#define SMALL_SIZE 200
.if (SMALL_SIZE > 255)
.error "SMALL_ SIZE must be < 256"
.endif
# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init);
.text
ENTRY(crc_pcl)
#define bufp %rdi
#define bufp_dw %edi
#define bufp_w %di
#define bufp_b %dil
#define bufptmp %rcx
#define block_0 %rcx
#define block_1 %rdx
#define block_2 %r11
#define len %rsi
#define len_dw %esi
#define len_w %si
#define len_b %sil
#define crc_init_arg %rdx
#define tmp %rbx
#define crc_init %r8
#define crc_init_dw %r8d
#define crc1 %r9
#define crc2 %r10
pushq %rbx
pushq %rdi
pushq %rsi
## Move crc_init for Linux to a different
mov crc_init_arg, crc_init
################################################################
## 1) ALIGN:
################################################################
mov bufp, bufptmp # rdi = *buf
neg bufp
and $7, bufp # calculate the unalignment amount of
# the address
je proc_block # Skip if aligned
## If len is less than 8 and we're unaligned, we need to jump
## to special code to avoid reading beyond the end of the buffer
cmp $8, len
jae do_align
# less_than_8 expects length in upper 3 bits of len_dw
# less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30]
shl $32-3+1, len_dw
jmp less_than_8_post_shl1
do_align:
#### Calculate CRC of unaligned bytes of the buffer (if any)
movq (bufptmp), tmp # load a quadward from the buffer
add bufp, bufptmp # align buffer pointer for quadword
# processing
sub bufp, len # update buffer length
align_loop:
crc32b %bl, crc_init_dw # compute crc32 of 1-byte
shr $8, tmp # get next byte
dec bufp
jne align_loop
proc_block:
################################################################
## 2) PROCESS BLOCKS:
################################################################
## compute num of bytes to be processed
movq len, tmp # save num bytes in tmp
cmpq $128*24, len
jae full_block
continue_block:
cmpq $SMALL_SIZE, len
jb small
## len < 128*24
movq $2731, %rax # 2731 = ceil(2^16 / 24)
mul len_dw
shrq $16, %rax
## eax contains floor(bytes / 24) = num 24-byte chunks to do
## process rax 24-byte chunks (128 >= rax >= 0)
## compute end address of each block
## block 0 (base addr + RAX * 8)
## block 1 (base addr + RAX * 16)
## block 2 (base addr + RAX * 24)
lea (bufptmp, %rax, 8), block_0
lea (block_0, %rax, 8), block_1
lea (block_1, %rax, 8), block_2
xor crc1, crc1
xor crc2, crc2
## branch into array
lea jump_table(%rip), bufp
movzxw (bufp, %rax, 2), len
offset=crc_array-jump_table
lea offset(bufp, len, 1), bufp
jmp *bufp
################################################################
## 2a) PROCESS FULL BLOCKS:
################################################################
full_block:
movq $128,%rax
lea 128*8*2(block_0), block_1
lea 128*8*3(block_0), block_2
add $128*8*1, block_0
xor crc1,crc1
xor crc2,crc2
# Fall thruogh into top of crc array (crc_128)
################################################################
## 3) CRC Array:
################################################################
crc_array:
i=128
.rept 128-1
.altmacro
LABEL crc_ %i
.noaltmacro
crc32q -i*8(block_0), crc_init
crc32q -i*8(block_1), crc1
crc32q -i*8(block_2), crc2
i=(i-1)
.endr
.altmacro
LABEL crc_ %i
.noaltmacro
crc32q -i*8(block_0), crc_init
crc32q -i*8(block_1), crc1
# SKIP crc32 -i*8(block_2), crc2 ; Don't do this one yet
mov block_2, block_0
################################################################
## 4) Combine three results:
################################################################
lea (K_table-8)(%rip), bufp # first entry is for idx 1
shlq $3, %rax # rax *= 8
pmovzxdq (bufp,%rax), %xmm0 # 2 consts: K1:K2
leal (%eax,%eax,2), %eax # rax *= 3 (total *24)
subq %rax, tmp # tmp -= rax*24
movq crc_init, %xmm1 # CRC for block 1
PCLMULQDQ 0x00,%xmm0,%xmm1 # Multiply by K2
movq crc1, %xmm2 # CRC for block 2
PCLMULQDQ 0x10, %xmm0, %xmm2 # Multiply by K1
pxor %xmm2,%xmm1
movq %xmm1, %rax
xor -i*8(block_2), %rax
mov crc2, crc_init
crc32 %rax, crc_init
################################################################
## 5) Check for end:
################################################################
LABEL crc_ 0
mov tmp, len
cmp $128*24, tmp
jae full_block
cmp $24, tmp
jae continue_block
less_than_24:
shl $32-4, len_dw # less_than_16 expects length
# in upper 4 bits of len_dw
jnc less_than_16
crc32q (bufptmp), crc_init
crc32q 8(bufptmp), crc_init
jz do_return
add $16, bufptmp
# len is less than 8 if we got here
# less_than_8 expects length in upper 3 bits of len_dw
# less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30]
shl $2, len_dw
jmp less_than_8_post_shl1
#######################################################################
## 6) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of len are full)
#######################################################################
small:
shl $32-8, len_dw # Prepare len_dw for less_than_256
j=256
.rept 5 # j = {256, 128, 64, 32, 16}
.altmacro
LABEL less_than_ %j # less_than_j: Length should be in
# upper lg(j) bits of len_dw
j=(j/2)
shl $1, len_dw # Get next MSB
JNC_LESS_THAN %j
.noaltmacro
i=0
.rept (j/8)
crc32q i(bufptmp), crc_init # Compute crc32 of 8-byte data
i=i+8
.endr
jz do_return # Return if remaining length is zero
add $j, bufptmp # Advance buf
.endr
less_than_8: # Length should be stored in
# upper 3 bits of len_dw
shl $1, len_dw
less_than_8_post_shl1:
jnc less_than_4
crc32l (bufptmp), crc_init_dw # CRC of 4 bytes
jz do_return # return if remaining data is zero
add $4, bufptmp
less_than_4: # Length should be stored in
# upper 2 bits of len_dw
shl $1, len_dw
jnc less_than_2
crc32w (bufptmp), crc_init_dw # CRC of 2 bytes
jz do_return # return if remaining data is zero
add $2, bufptmp
less_than_2: # Length should be stored in the MSB
# of len_dw
shl $1, len_dw
jnc less_than_1
crc32b (bufptmp), crc_init_dw # CRC of 1 byte
less_than_1: # Length should be zero
do_return:
movq crc_init, %rax
popq %rsi
popq %rdi
popq %rbx
ret
################################################################
## jump table Table is 129 entries x 2 bytes each
################################################################
.align 4
jump_table:
i=0
.rept 129
.altmacro
JMPTBL_ENTRY %i
.noaltmacro
i=i+1
.endr
ENDPROC(crc_pcl)
################################################################
## PCLMULQDQ tables
## Table is 128 entries x 2 words (8 bytes) each
################################################################
.section .rotata, "a", %progbits
.align 8
K_table:
.long 0x493c7d27, 0x00000001
.long 0xba4fc28e, 0x493c7d27
.long 0xddc0152b, 0xf20c0dfe
.long 0x9e4addf8, 0xba4fc28e
.long 0x39d3b296, 0x3da6d0cb
.long 0x0715ce53, 0xddc0152b
.long 0x47db8317, 0x1c291d04
.long 0x0d3b6092, 0x9e4addf8
.long 0xc96cfdc0, 0x740eef02
.long 0x878a92a7, 0x39d3b296
.long 0xdaece73e, 0x083a6eec
.long 0xab7aff2a, 0x0715ce53
.long 0x2162d385, 0xc49f4f67
.long 0x83348832, 0x47db8317
.long 0x299847d5, 0x2ad91c30
.long 0xb9e02b86, 0x0d3b6092
.long 0x18b33a4e, 0x6992cea2
.long 0xb6dd949b, 0xc96cfdc0
.long 0x78d9ccb7, 0x7e908048
.long 0xbac2fd7b, 0x878a92a7
.long 0xa60ce07b, 0x1b3d8f29
.long 0xce7f39f4, 0xdaece73e
.long 0x61d82e56, 0xf1d0f55e
.long 0xd270f1a2, 0xab7aff2a
.long 0xc619809d, 0xa87ab8a8
.long 0x2b3cac5d, 0x2162d385
.long 0x65863b64, 0x8462d800
.long 0x1b03397f, 0x83348832
.long 0xebb883bd, 0x71d111a8
.long 0xb3e32c28, 0x299847d5
.long 0x064f7f26, 0xffd852c6
.long 0xdd7e3b0c, 0xb9e02b86
.long 0xf285651c, 0xdcb17aa4
.long 0x10746f3c, 0x18b33a4e
.long 0xc7a68855, 0xf37c5aee
.long 0x271d9844, 0xb6dd949b
.long 0x8e766a0c, 0x6051d5a2
.long 0x93a5f730, 0x78d9ccb7
.long 0x6cb08e5c, 0x18b0d4ff
.long 0x6b749fb2, 0xbac2fd7b
.long 0x1393e203, 0x21f3d99c
.long 0xcec3662e, 0xa60ce07b
.long 0x96c515bb, 0x8f158014
.long 0xe6fc4e6a, 0xce7f39f4
.long 0x8227bb8a, 0xa00457f7
.long 0xb0cd4768, 0x61d82e56
.long 0x39c7ff35, 0x8d6d2c43
.long 0xd7a4825c, 0xd270f1a2
.long 0x0ab3844b, 0x00ac29cf
.long 0x0167d312, 0xc619809d
.long 0xf6076544, 0xe9adf796
.long 0x26f6a60a, 0x2b3cac5d
.long 0xa741c1bf, 0x96638b34
.long 0x98d8d9cb, 0x65863b64
.long 0x49c3cc9c, 0xe0e9f351
.long 0x68bce87a, 0x1b03397f
.long 0x57a3d037, 0x9af01f2d
.long 0x6956fc3b, 0xebb883bd
.long 0x42d98888, 0x2cff42cf
.long 0x3771e98f, 0xb3e32c28
.long 0xb42ae3d9, 0x88f25a3a
.long 0x2178513a, 0x064f7f26
.long 0xe0ac139e, 0x4e36f0b0
.long 0x170076fa, 0xdd7e3b0c
.long 0x444dd413, 0xbd6f81f8
.long 0x6f345e45, 0xf285651c
.long 0x41d17b64, 0x91c9bd4b
.long 0xff0dba97, 0x10746f3c
.long 0xa2b73df1, 0x885f087b
.long 0xf872e54c, 0xc7a68855
.long 0x1e41e9fc, 0x4c144932
.long 0x86d8e4d2, 0x271d9844
.long 0x651bd98b, 0x52148f02
.long 0x5bb8f1bc, 0x8e766a0c
.long 0xa90fd27a, 0xa3c6f37a
.long 0xb3af077a, 0x93a5f730
.long 0x4984d782, 0xd7c0557f
.long 0xca6ef3ac, 0x6cb08e5c
.long 0x234e0b26, 0x63ded06a
.long 0xdd66cbbb, 0x6b749fb2
.long 0x4597456a, 0x4d56973c
.long 0xe9e28eb4, 0x1393e203
.long 0x7b3ff57a, 0x9669c9df
.long 0xc9c8b782, 0xcec3662e
.long 0x3f70cc6f, 0xe417f38a
.long 0x93e106a4, 0x96c515bb
.long 0x62ec6c6d, 0x4b9e0f71
.long 0xd813b325, 0xe6fc4e6a
.long 0x0df04680, 0xd104b8fc
.long 0x2342001e, 0x8227bb8a
.long 0x0a2a8d7e, 0x5b397730
.long 0x6d9a4957, 0xb0cd4768
.long 0xe8b6368b, 0xe78eb416
.long 0xd2c3ed1a, 0x39c7ff35
.long 0x995a5724, 0x61ff0e01
.long 0x9ef68d35, 0xd7a4825c
.long 0x0c139b31, 0x8d96551c
.long 0xf2271e60, 0x0ab3844b
.long 0x0b0bf8ca, 0x0bf80dd2
.long 0x2664fd8b, 0x0167d312
.long 0xed64812d, 0x8821abed
.long 0x02ee03b2, 0xf6076544
.long 0x8604ae0f, 0x6a45d2b2
.long 0x363bd6b3, 0x26f6a60a
.long 0x135c83fd, 0xd8d26619
.long 0x5fabe670, 0xa741c1bf
.long 0x35ec3279, 0xde87806c
.long 0x00bcf5f6, 0x98d8d9cb
.long 0x8ae00689, 0x14338754
.long 0x17f27698, 0x49c3cc9c
.long 0x58ca5f00, 0x5bd2011f
.long 0xaa7c7ad5, 0x68bce87a
.long 0xb5cfca28, 0xdd07448e
.long 0xded288f8, 0x57a3d037
.long 0x59f229bc, 0xdde8f5b9
.long 0x6d390dec, 0x6956fc3b
.long 0x37170390, 0xa3e3e02c
.long 0x6353c1cc, 0x42d98888
.long 0xc4584f5c, 0xd73c7bea
.long 0xf48642e9, 0x3771e98f
.long 0x531377e2, 0x80ff0093
.long 0xdd35bc8d, 0xb42ae3d9
.long 0xb25b29f2, 0x8fe4c34d
.long 0x9a5ede41, 0x2178513a
.long 0xa563905d, 0xdf99fc11
.long 0x45cddf4e, 0xe0ac139e
.long 0xacfa3103, 0x6c23e841
.long 0xa51b6135, 0x170076fa

View file

@ -0,0 +1,643 @@
########################################################################
# Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
#
# Copyright (c) 2013, Intel Corporation
#
# Authors:
# Erdinc Ozturk <erdinc.ozturk@intel.com>
# Vinodh Gopal <vinodh.gopal@intel.com>
# James Guilford <james.guilford@intel.com>
# Tim Chen <tim.c.chen@linux.intel.com>
#
# This software is available to you under a choice of one of two
# licenses. You may choose to be licensed under the terms of the GNU
# General Public License (GPL) Version 2, available from the file
# COPYING in the main directory of this source tree, or the
# OpenIB.org BSD license below:
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the
# distribution.
#
# * Neither the name of the Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
#
# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
########################################################################
# Function API:
# UINT16 crc_t10dif_pcl(
# UINT16 init_crc, //initial CRC value, 16 bits
# const unsigned char *buf, //buffer pointer to calculate CRC on
# UINT64 len //buffer length in bytes (64-bit data)
# );
#
# Reference paper titled "Fast CRC Computation for Generic
# Polynomials Using PCLMULQDQ Instruction"
# URL: http://www.intel.com/content/dam/www/public/us/en/documents
# /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
#
#
#include <linux/linkage.h>
.text
#define arg1 %rdi
#define arg2 %rsi
#define arg3 %rdx
#define arg1_low32 %edi
ENTRY(crc_t10dif_pcl)
.align 16
# adjust the 16-bit initial_crc value, scale it to 32 bits
shl $16, arg1_low32
# Allocate Stack Space
mov %rsp, %rcx
sub $16*2, %rsp
# align stack to 16 byte boundary
and $~(0x10 - 1), %rsp
# check if smaller than 256
cmp $256, arg3
# for sizes less than 128, we can't fold 64B at a time...
jl _less_than_128
# load the initial crc value
movd arg1_low32, %xmm10 # initial crc
# crc value does not need to be byte-reflected, but it needs
# to be moved to the high part of the register.
# because data will be byte-reflected and will align with
# initial crc at correct place.
pslldq $12, %xmm10
movdqa SHUF_MASK(%rip), %xmm11
# receive the initial 64B data, xor the initial crc value
movdqu 16*0(arg2), %xmm0
movdqu 16*1(arg2), %xmm1
movdqu 16*2(arg2), %xmm2
movdqu 16*3(arg2), %xmm3
movdqu 16*4(arg2), %xmm4
movdqu 16*5(arg2), %xmm5
movdqu 16*6(arg2), %xmm6
movdqu 16*7(arg2), %xmm7
pshufb %xmm11, %xmm0
# XOR the initial_crc value
pxor %xmm10, %xmm0
pshufb %xmm11, %xmm1
pshufb %xmm11, %xmm2
pshufb %xmm11, %xmm3
pshufb %xmm11, %xmm4
pshufb %xmm11, %xmm5
pshufb %xmm11, %xmm6
pshufb %xmm11, %xmm7
movdqa rk3(%rip), %xmm10 #xmm10 has rk3 and rk4
#imm value of pclmulqdq instruction
#will determine which constant to use
#################################################################
# we subtract 256 instead of 128 to save one instruction from the loop
sub $256, arg3
# at this section of the code, there is 64*x+y (0<=y<64) bytes of
# buffer. The _fold_64_B_loop will fold 64B at a time
# until we have 64+y Bytes of buffer
# fold 64B at a time. This section of the code folds 4 xmm
# registers in parallel
_fold_64_B_loop:
# update the buffer pointer
add $128, arg2 # buf += 64#
movdqu 16*0(arg2), %xmm9
movdqu 16*1(arg2), %xmm12
pshufb %xmm11, %xmm9
pshufb %xmm11, %xmm12
movdqa %xmm0, %xmm8
movdqa %xmm1, %xmm13
pclmulqdq $0x0 , %xmm10, %xmm0
pclmulqdq $0x11, %xmm10, %xmm8
pclmulqdq $0x0 , %xmm10, %xmm1
pclmulqdq $0x11, %xmm10, %xmm13
pxor %xmm9 , %xmm0
xorps %xmm8 , %xmm0
pxor %xmm12, %xmm1
xorps %xmm13, %xmm1
movdqu 16*2(arg2), %xmm9
movdqu 16*3(arg2), %xmm12
pshufb %xmm11, %xmm9
pshufb %xmm11, %xmm12
movdqa %xmm2, %xmm8
movdqa %xmm3, %xmm13
pclmulqdq $0x0, %xmm10, %xmm2
pclmulqdq $0x11, %xmm10, %xmm8
pclmulqdq $0x0, %xmm10, %xmm3
pclmulqdq $0x11, %xmm10, %xmm13
pxor %xmm9 , %xmm2
xorps %xmm8 , %xmm2
pxor %xmm12, %xmm3
xorps %xmm13, %xmm3
movdqu 16*4(arg2), %xmm9
movdqu 16*5(arg2), %xmm12
pshufb %xmm11, %xmm9
pshufb %xmm11, %xmm12
movdqa %xmm4, %xmm8
movdqa %xmm5, %xmm13
pclmulqdq $0x0, %xmm10, %xmm4
pclmulqdq $0x11, %xmm10, %xmm8
pclmulqdq $0x0, %xmm10, %xmm5
pclmulqdq $0x11, %xmm10, %xmm13
pxor %xmm9 , %xmm4
xorps %xmm8 , %xmm4
pxor %xmm12, %xmm5
xorps %xmm13, %xmm5
movdqu 16*6(arg2), %xmm9
movdqu 16*7(arg2), %xmm12
pshufb %xmm11, %xmm9
pshufb %xmm11, %xmm12
movdqa %xmm6 , %xmm8
movdqa %xmm7 , %xmm13
pclmulqdq $0x0 , %xmm10, %xmm6
pclmulqdq $0x11, %xmm10, %xmm8
pclmulqdq $0x0 , %xmm10, %xmm7
pclmulqdq $0x11, %xmm10, %xmm13
pxor %xmm9 , %xmm6
xorps %xmm8 , %xmm6
pxor %xmm12, %xmm7
xorps %xmm13, %xmm7
sub $128, arg3
# check if there is another 64B in the buffer to be able to fold
jge _fold_64_B_loop
##################################################################
add $128, arg2
# at this point, the buffer pointer is pointing at the last y Bytes
# of the buffer the 64B of folded data is in 4 of the xmm
# registers: xmm0, xmm1, xmm2, xmm3
# fold the 8 xmm registers to 1 xmm register with different constants
movdqa rk9(%rip), %xmm10
movdqa %xmm0, %xmm8
pclmulqdq $0x11, %xmm10, %xmm0
pclmulqdq $0x0 , %xmm10, %xmm8
pxor %xmm8, %xmm7
xorps %xmm0, %xmm7
movdqa rk11(%rip), %xmm10
movdqa %xmm1, %xmm8
pclmulqdq $0x11, %xmm10, %xmm1
pclmulqdq $0x0 , %xmm10, %xmm8
pxor %xmm8, %xmm7
xorps %xmm1, %xmm7
movdqa rk13(%rip), %xmm10
movdqa %xmm2, %xmm8
pclmulqdq $0x11, %xmm10, %xmm2
pclmulqdq $0x0 , %xmm10, %xmm8
pxor %xmm8, %xmm7
pxor %xmm2, %xmm7
movdqa rk15(%rip), %xmm10
movdqa %xmm3, %xmm8
pclmulqdq $0x11, %xmm10, %xmm3
pclmulqdq $0x0 , %xmm10, %xmm8
pxor %xmm8, %xmm7
xorps %xmm3, %xmm7
movdqa rk17(%rip), %xmm10
movdqa %xmm4, %xmm8
pclmulqdq $0x11, %xmm10, %xmm4
pclmulqdq $0x0 , %xmm10, %xmm8
pxor %xmm8, %xmm7
pxor %xmm4, %xmm7
movdqa rk19(%rip), %xmm10
movdqa %xmm5, %xmm8
pclmulqdq $0x11, %xmm10, %xmm5
pclmulqdq $0x0 , %xmm10, %xmm8
pxor %xmm8, %xmm7
xorps %xmm5, %xmm7
movdqa rk1(%rip), %xmm10 #xmm10 has rk1 and rk2
#imm value of pclmulqdq instruction
#will determine which constant to use
movdqa %xmm6, %xmm8
pclmulqdq $0x11, %xmm10, %xmm6
pclmulqdq $0x0 , %xmm10, %xmm8
pxor %xmm8, %xmm7
pxor %xmm6, %xmm7
# instead of 64, we add 48 to the loop counter to save 1 instruction
# from the loop instead of a cmp instruction, we use the negative
# flag with the jl instruction
add $128-16, arg3
jl _final_reduction_for_128
# now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7
# and the rest is in memory. We can fold 16 bytes at a time if y>=16
# continue folding 16B at a time
_16B_reduction_loop:
movdqa %xmm7, %xmm8
pclmulqdq $0x11, %xmm10, %xmm7
pclmulqdq $0x0 , %xmm10, %xmm8
pxor %xmm8, %xmm7
movdqu (arg2), %xmm0
pshufb %xmm11, %xmm0
pxor %xmm0 , %xmm7
add $16, arg2
sub $16, arg3
# instead of a cmp instruction, we utilize the flags with the
# jge instruction equivalent of: cmp arg3, 16-16
# check if there is any more 16B in the buffer to be able to fold
jge _16B_reduction_loop
#now we have 16+z bytes left to reduce, where 0<= z < 16.
#first, we reduce the data in the xmm7 register
_final_reduction_for_128:
# check if any more data to fold. If not, compute the CRC of
# the final 128 bits
add $16, arg3
je _128_done
# here we are getting data that is less than 16 bytes.
# since we know that there was data before the pointer, we can
# offset the input pointer before the actual point, to receive
# exactly 16 bytes. after that the registers need to be adjusted.
_get_last_two_xmms:
movdqa %xmm7, %xmm2
movdqu -16(arg2, arg3), %xmm1
pshufb %xmm11, %xmm1
# get rid of the extra data that was loaded before
# load the shift constant
lea pshufb_shf_table+16(%rip), %rax
sub arg3, %rax
movdqu (%rax), %xmm0
# shift xmm2 to the left by arg3 bytes
pshufb %xmm0, %xmm2
# shift xmm7 to the right by 16-arg3 bytes
pxor mask1(%rip), %xmm0
pshufb %xmm0, %xmm7
pblendvb %xmm2, %xmm1 #xmm0 is implicit
# fold 16 Bytes
movdqa %xmm1, %xmm2
movdqa %xmm7, %xmm8
pclmulqdq $0x11, %xmm10, %xmm7
pclmulqdq $0x0 , %xmm10, %xmm8
pxor %xmm8, %xmm7
pxor %xmm2, %xmm7
_128_done:
# compute crc of a 128-bit value
movdqa rk5(%rip), %xmm10 # rk5 and rk6 in xmm10
movdqa %xmm7, %xmm0
#64b fold
pclmulqdq $0x1, %xmm10, %xmm7
pslldq $8 , %xmm0
pxor %xmm0, %xmm7
#32b fold
movdqa %xmm7, %xmm0
pand mask2(%rip), %xmm0
psrldq $12, %xmm7
pclmulqdq $0x10, %xmm10, %xmm7
pxor %xmm0, %xmm7
#barrett reduction
_barrett:
movdqa rk7(%rip), %xmm10 # rk7 and rk8 in xmm10
movdqa %xmm7, %xmm0
pclmulqdq $0x01, %xmm10, %xmm7
pslldq $4, %xmm7
pclmulqdq $0x11, %xmm10, %xmm7
pslldq $4, %xmm7
pxor %xmm0, %xmm7
pextrd $1, %xmm7, %eax
_cleanup:
# scale the result back to 16 bits
shr $16, %eax
mov %rcx, %rsp
ret
########################################################################
.align 16
_less_than_128:
# check if there is enough buffer to be able to fold 16B at a time
cmp $32, arg3
jl _less_than_32
movdqa SHUF_MASK(%rip), %xmm11
# now if there is, load the constants
movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10
movd arg1_low32, %xmm0 # get the initial crc value
pslldq $12, %xmm0 # align it to its correct place
movdqu (arg2), %xmm7 # load the plaintext
pshufb %xmm11, %xmm7 # byte-reflect the plaintext
pxor %xmm0, %xmm7
# update the buffer pointer
add $16, arg2
# update the counter. subtract 32 instead of 16 to save one
# instruction from the loop
sub $32, arg3
jmp _16B_reduction_loop
.align 16
_less_than_32:
# mov initial crc to the return value. this is necessary for
# zero-length buffers.
mov arg1_low32, %eax
test arg3, arg3
je _cleanup
movdqa SHUF_MASK(%rip), %xmm11
movd arg1_low32, %xmm0 # get the initial crc value
pslldq $12, %xmm0 # align it to its correct place
cmp $16, arg3
je _exact_16_left
jl _less_than_16_left
movdqu (arg2), %xmm7 # load the plaintext
pshufb %xmm11, %xmm7 # byte-reflect the plaintext
pxor %xmm0 , %xmm7 # xor the initial crc value
add $16, arg2
sub $16, arg3
movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10
jmp _get_last_two_xmms
.align 16
_less_than_16_left:
# use stack space to load data less than 16 bytes, zero-out
# the 16B in memory first.
pxor %xmm1, %xmm1
mov %rsp, %r11
movdqa %xmm1, (%r11)
cmp $4, arg3
jl _only_less_than_4
# backup the counter value
mov arg3, %r9
cmp $8, arg3
jl _less_than_8_left
# load 8 Bytes
mov (arg2), %rax
mov %rax, (%r11)
add $8, %r11
sub $8, arg3
add $8, arg2
_less_than_8_left:
cmp $4, arg3
jl _less_than_4_left
# load 4 Bytes
mov (arg2), %eax
mov %eax, (%r11)
add $4, %r11
sub $4, arg3
add $4, arg2
_less_than_4_left:
cmp $2, arg3
jl _less_than_2_left
# load 2 Bytes
mov (arg2), %ax
mov %ax, (%r11)
add $2, %r11
sub $2, arg3
add $2, arg2
_less_than_2_left:
cmp $1, arg3
jl _zero_left
# load 1 Byte
mov (arg2), %al
mov %al, (%r11)
_zero_left:
movdqa (%rsp), %xmm7
pshufb %xmm11, %xmm7
pxor %xmm0 , %xmm7 # xor the initial crc value
# shl r9, 4
lea pshufb_shf_table+16(%rip), %rax
sub %r9, %rax
movdqu (%rax), %xmm0
pxor mask1(%rip), %xmm0
pshufb %xmm0, %xmm7
jmp _128_done
.align 16
_exact_16_left:
movdqu (arg2), %xmm7
pshufb %xmm11, %xmm7
pxor %xmm0 , %xmm7 # xor the initial crc value
jmp _128_done
_only_less_than_4:
cmp $3, arg3
jl _only_less_than_3
# load 3 Bytes
mov (arg2), %al
mov %al, (%r11)
mov 1(arg2), %al
mov %al, 1(%r11)
mov 2(arg2), %al
mov %al, 2(%r11)
movdqa (%rsp), %xmm7
pshufb %xmm11, %xmm7
pxor %xmm0 , %xmm7 # xor the initial crc value
psrldq $5, %xmm7
jmp _barrett
_only_less_than_3:
cmp $2, arg3
jl _only_less_than_2
# load 2 Bytes
mov (arg2), %al
mov %al, (%r11)
mov 1(arg2), %al
mov %al, 1(%r11)
movdqa (%rsp), %xmm7
pshufb %xmm11, %xmm7
pxor %xmm0 , %xmm7 # xor the initial crc value
psrldq $6, %xmm7
jmp _barrett
_only_less_than_2:
# load 1 Byte
mov (arg2), %al
mov %al, (%r11)
movdqa (%rsp), %xmm7
pshufb %xmm11, %xmm7
pxor %xmm0 , %xmm7 # xor the initial crc value
psrldq $7, %xmm7
jmp _barrett
ENDPROC(crc_t10dif_pcl)
.data
# precomputed constants
# these constants are precomputed from the poly:
# 0x8bb70000 (0x8bb7 scaled to 32 bits)
.align 16
# Q = 0x18BB70000
# rk1 = 2^(32*3) mod Q << 32
# rk2 = 2^(32*5) mod Q << 32
# rk3 = 2^(32*15) mod Q << 32
# rk4 = 2^(32*17) mod Q << 32
# rk5 = 2^(32*3) mod Q << 32
# rk6 = 2^(32*2) mod Q << 32
# rk7 = floor(2^64/Q)
# rk8 = Q
rk1:
.quad 0x2d56000000000000
rk2:
.quad 0x06df000000000000
rk3:
.quad 0x9d9d000000000000
rk4:
.quad 0x7cf5000000000000
rk5:
.quad 0x2d56000000000000
rk6:
.quad 0x1368000000000000
rk7:
.quad 0x00000001f65a57f8
rk8:
.quad 0x000000018bb70000
rk9:
.quad 0xceae000000000000
rk10:
.quad 0xbfd6000000000000
rk11:
.quad 0x1e16000000000000
rk12:
.quad 0x713c000000000000
rk13:
.quad 0xf7f9000000000000
rk14:
.quad 0x80a6000000000000
rk15:
.quad 0x044c000000000000
rk16:
.quad 0xe658000000000000
rk17:
.quad 0xad18000000000000
rk18:
.quad 0xa497000000000000
rk19:
.quad 0x6ee3000000000000
rk20:
.quad 0xe7b5000000000000
mask1:
.octa 0x80808080808080808080808080808080
mask2:
.octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF
SHUF_MASK:
.octa 0x000102030405060708090A0B0C0D0E0F
pshufb_shf_table:
# use these values for shift constants for the pshufb instruction
# different alignments result in values as shown:
# DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1
# DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2
# DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3
# DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4
# DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5
# DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6
# DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9 (16-7) / shr7
# DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8 (16-8) / shr8
# DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7 (16-9) / shr9
# DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6 (16-10) / shr10
# DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5 (16-11) / shr11
# DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4 (16-12) / shr12
# DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3 (16-13) / shr13
# DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2 (16-14) / shr14
# DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1 (16-15) / shr15
.octa 0x8f8e8d8c8b8a89888786858483828100
.octa 0x000e0d0c0b0a09080706050403020100

View file

@ -0,0 +1,151 @@
/*
* Cryptographic API.
*
* T10 Data Integrity Field CRC16 Crypto Transform using PCLMULQDQ Instructions
*
* Copyright (C) 2013 Intel Corporation
* Author: Tim Chen <tim.c.chen@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <linux/types.h>
#include <linux/module.h>
#include <linux/crc-t10dif.h>
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <asm/i387.h>
#include <asm/cpufeature.h>
#include <asm/cpu_device_id.h>
asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf,
size_t len);
struct chksum_desc_ctx {
__u16 crc;
};
/*
* Steps through buffer one byte at at time, calculates reflected
* crc using table.
*/
static int chksum_init(struct shash_desc *desc)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
ctx->crc = 0;
return 0;
}
static int chksum_update(struct shash_desc *desc, const u8 *data,
unsigned int length)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
if (irq_fpu_usable()) {
kernel_fpu_begin();
ctx->crc = crc_t10dif_pcl(ctx->crc, data, length);
kernel_fpu_end();
} else
ctx->crc = crc_t10dif_generic(ctx->crc, data, length);
return 0;
}
static int chksum_final(struct shash_desc *desc, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
*(__u16 *)out = ctx->crc;
return 0;
}
static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
u8 *out)
{
if (irq_fpu_usable()) {
kernel_fpu_begin();
*(__u16 *)out = crc_t10dif_pcl(*crcp, data, len);
kernel_fpu_end();
} else
*(__u16 *)out = crc_t10dif_generic(*crcp, data, len);
return 0;
}
static int chksum_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
return __chksum_finup(&ctx->crc, data, len, out);
}
static int chksum_digest(struct shash_desc *desc, const u8 *data,
unsigned int length, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
return __chksum_finup(&ctx->crc, data, length, out);
}
static struct shash_alg alg = {
.digestsize = CRC_T10DIF_DIGEST_SIZE,
.init = chksum_init,
.update = chksum_update,
.final = chksum_final,
.finup = chksum_finup,
.digest = chksum_digest,
.descsize = sizeof(struct chksum_desc_ctx),
.base = {
.cra_name = "crct10dif",
.cra_driver_name = "crct10dif-pclmul",
.cra_priority = 200,
.cra_blocksize = CRC_T10DIF_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static const struct x86_cpu_id crct10dif_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ),
{}
};
MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id);
static int __init crct10dif_intel_mod_init(void)
{
if (!x86_match_cpu(crct10dif_cpu_id))
return -ENODEV;
return crypto_register_shash(&alg);
}
static void __exit crct10dif_intel_mod_fini(void)
{
crypto_unregister_shash(&alg);
}
module_init(crct10dif_intel_mod_init);
module_exit(crct10dif_intel_mod_fini);
MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ.");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("crct10dif");
MODULE_ALIAS_CRYPTO("crct10dif-pclmul");

View file

@ -0,0 +1,805 @@
/*
* des3_ede-asm_64.S - x86-64 assembly implementation of 3DES cipher
*
* Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <linux/linkage.h>
.file "des3_ede-asm_64.S"
.text
#define s1 .L_s1
#define s2 ((s1) + (64*8))
#define s3 ((s2) + (64*8))
#define s4 ((s3) + (64*8))
#define s5 ((s4) + (64*8))
#define s6 ((s5) + (64*8))
#define s7 ((s6) + (64*8))
#define s8 ((s7) + (64*8))
/* register macros */
#define CTX %rdi
#define RL0 %r8
#define RL1 %r9
#define RL2 %r10
#define RL0d %r8d
#define RL1d %r9d
#define RL2d %r10d
#define RR0 %r11
#define RR1 %r12
#define RR2 %r13
#define RR0d %r11d
#define RR1d %r12d
#define RR2d %r13d
#define RW0 %rax
#define RW1 %rbx
#define RW2 %rcx
#define RW0d %eax
#define RW1d %ebx
#define RW2d %ecx
#define RW0bl %al
#define RW1bl %bl
#define RW2bl %cl
#define RW0bh %ah
#define RW1bh %bh
#define RW2bh %ch
#define RT0 %r15
#define RT1 %rbp
#define RT2 %r14
#define RT3 %rdx
#define RT0d %r15d
#define RT1d %ebp
#define RT2d %r14d
#define RT3d %edx
/***********************************************************************
* 1-way 3DES
***********************************************************************/
#define do_permutation(a, b, offset, mask) \
movl a, RT0d; \
shrl $(offset), RT0d; \
xorl b, RT0d; \
andl $(mask), RT0d; \
xorl RT0d, b; \
shll $(offset), RT0d; \
xorl RT0d, a;
#define expand_to_64bits(val, mask) \
movl val##d, RT0d; \
rorl $4, RT0d; \
shlq $32, RT0; \
orq RT0, val; \
andq mask, val;
#define compress_to_64bits(val) \
movq val, RT0; \
shrq $32, RT0; \
roll $4, RT0d; \
orl RT0d, val##d;
#define initial_permutation(left, right) \
do_permutation(left##d, right##d, 4, 0x0f0f0f0f); \
do_permutation(left##d, right##d, 16, 0x0000ffff); \
do_permutation(right##d, left##d, 2, 0x33333333); \
do_permutation(right##d, left##d, 8, 0x00ff00ff); \
movabs $0x3f3f3f3f3f3f3f3f, RT3; \
movl left##d, RW0d; \
roll $1, right##d; \
xorl right##d, RW0d; \
andl $0xaaaaaaaa, RW0d; \
xorl RW0d, left##d; \
xorl RW0d, right##d; \
roll $1, left##d; \
expand_to_64bits(right, RT3); \
expand_to_64bits(left, RT3);
#define final_permutation(left, right) \
compress_to_64bits(right); \
compress_to_64bits(left); \
movl right##d, RW0d; \
rorl $1, left##d; \
xorl left##d, RW0d; \
andl $0xaaaaaaaa, RW0d; \
xorl RW0d, right##d; \
xorl RW0d, left##d; \
rorl $1, right##d; \
do_permutation(right##d, left##d, 8, 0x00ff00ff); \
do_permutation(right##d, left##d, 2, 0x33333333); \
do_permutation(left##d, right##d, 16, 0x0000ffff); \
do_permutation(left##d, right##d, 4, 0x0f0f0f0f);
#define round1(n, from, to, load_next_key) \
xorq from, RW0; \
\
movzbl RW0bl, RT0d; \
movzbl RW0bh, RT1d; \
shrq $16, RW0; \
movzbl RW0bl, RT2d; \
movzbl RW0bh, RT3d; \
shrq $16, RW0; \
movq s8(, RT0, 8), RT0; \
xorq s6(, RT1, 8), to; \
movzbl RW0bl, RL1d; \
movzbl RW0bh, RT1d; \
shrl $16, RW0d; \
xorq s4(, RT2, 8), RT0; \
xorq s2(, RT3, 8), to; \
movzbl RW0bl, RT2d; \
movzbl RW0bh, RT3d; \
xorq s7(, RL1, 8), RT0; \
xorq s5(, RT1, 8), to; \
xorq s3(, RT2, 8), RT0; \
load_next_key(n, RW0); \
xorq RT0, to; \
xorq s1(, RT3, 8), to; \
#define load_next_key(n, RWx) \
movq (((n) + 1) * 8)(CTX), RWx;
#define dummy2(a, b) /*_*/
#define read_block(io, left, right) \
movl (io), left##d; \
movl 4(io), right##d; \
bswapl left##d; \
bswapl right##d;
#define write_block(io, left, right) \
bswapl left##d; \
bswapl right##d; \
movl left##d, (io); \
movl right##d, 4(io);
ENTRY(des3_ede_x86_64_crypt_blk)
/* input:
* %rdi: round keys, CTX
* %rsi: dst
* %rdx: src
*/
pushq %rbp;
pushq %rbx;
pushq %r12;
pushq %r13;
pushq %r14;
pushq %r15;
read_block(%rdx, RL0, RR0);
initial_permutation(RL0, RR0);
movq (CTX), RW0;
round1(0, RR0, RL0, load_next_key);
round1(1, RL0, RR0, load_next_key);
round1(2, RR0, RL0, load_next_key);
round1(3, RL0, RR0, load_next_key);
round1(4, RR0, RL0, load_next_key);
round1(5, RL0, RR0, load_next_key);
round1(6, RR0, RL0, load_next_key);
round1(7, RL0, RR0, load_next_key);
round1(8, RR0, RL0, load_next_key);
round1(9, RL0, RR0, load_next_key);
round1(10, RR0, RL0, load_next_key);
round1(11, RL0, RR0, load_next_key);
round1(12, RR0, RL0, load_next_key);
round1(13, RL0, RR0, load_next_key);
round1(14, RR0, RL0, load_next_key);
round1(15, RL0, RR0, load_next_key);
round1(16+0, RL0, RR0, load_next_key);
round1(16+1, RR0, RL0, load_next_key);
round1(16+2, RL0, RR0, load_next_key);
round1(16+3, RR0, RL0, load_next_key);
round1(16+4, RL0, RR0, load_next_key);
round1(16+5, RR0, RL0, load_next_key);
round1(16+6, RL0, RR0, load_next_key);
round1(16+7, RR0, RL0, load_next_key);
round1(16+8, RL0, RR0, load_next_key);
round1(16+9, RR0, RL0, load_next_key);
round1(16+10, RL0, RR0, load_next_key);
round1(16+11, RR0, RL0, load_next_key);
round1(16+12, RL0, RR0, load_next_key);
round1(16+13, RR0, RL0, load_next_key);
round1(16+14, RL0, RR0, load_next_key);
round1(16+15, RR0, RL0, load_next_key);
round1(32+0, RR0, RL0, load_next_key);
round1(32+1, RL0, RR0, load_next_key);
round1(32+2, RR0, RL0, load_next_key);
round1(32+3, RL0, RR0, load_next_key);
round1(32+4, RR0, RL0, load_next_key);
round1(32+5, RL0, RR0, load_next_key);
round1(32+6, RR0, RL0, load_next_key);
round1(32+7, RL0, RR0, load_next_key);
round1(32+8, RR0, RL0, load_next_key);
round1(32+9, RL0, RR0, load_next_key);
round1(32+10, RR0, RL0, load_next_key);
round1(32+11, RL0, RR0, load_next_key);
round1(32+12, RR0, RL0, load_next_key);
round1(32+13, RL0, RR0, load_next_key);
round1(32+14, RR0, RL0, load_next_key);
round1(32+15, RL0, RR0, dummy2);
final_permutation(RR0, RL0);
write_block(%rsi, RR0, RL0);
popq %r15;
popq %r14;
popq %r13;
popq %r12;
popq %rbx;
popq %rbp;
ret;
ENDPROC(des3_ede_x86_64_crypt_blk)
/***********************************************************************
* 3-way 3DES
***********************************************************************/
#define expand_to_64bits(val, mask) \
movl val##d, RT0d; \
rorl $4, RT0d; \
shlq $32, RT0; \
orq RT0, val; \
andq mask, val;
#define compress_to_64bits(val) \
movq val, RT0; \
shrq $32, RT0; \
roll $4, RT0d; \
orl RT0d, val##d;
#define initial_permutation3(left, right) \
do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \
do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \
do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); \
do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
\
do_permutation(right##0d, left##0d, 2, 0x33333333); \
do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \
do_permutation(right##1d, left##1d, 2, 0x33333333); \
do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \
do_permutation(right##2d, left##2d, 2, 0x33333333); \
do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \
\
movabs $0x3f3f3f3f3f3f3f3f, RT3; \
\
movl left##0d, RW0d; \
roll $1, right##0d; \
xorl right##0d, RW0d; \
andl $0xaaaaaaaa, RW0d; \
xorl RW0d, left##0d; \
xorl RW0d, right##0d; \
roll $1, left##0d; \
expand_to_64bits(right##0, RT3); \
expand_to_64bits(left##0, RT3); \
movl left##1d, RW1d; \
roll $1, right##1d; \
xorl right##1d, RW1d; \
andl $0xaaaaaaaa, RW1d; \
xorl RW1d, left##1d; \
xorl RW1d, right##1d; \
roll $1, left##1d; \
expand_to_64bits(right##1, RT3); \
expand_to_64bits(left##1, RT3); \
movl left##2d, RW2d; \
roll $1, right##2d; \
xorl right##2d, RW2d; \
andl $0xaaaaaaaa, RW2d; \
xorl RW2d, left##2d; \
xorl RW2d, right##2d; \
roll $1, left##2d; \
expand_to_64bits(right##2, RT3); \
expand_to_64bits(left##2, RT3);
#define final_permutation3(left, right) \
compress_to_64bits(right##0); \
compress_to_64bits(left##0); \
movl right##0d, RW0d; \
rorl $1, left##0d; \
xorl left##0d, RW0d; \
andl $0xaaaaaaaa, RW0d; \
xorl RW0d, right##0d; \
xorl RW0d, left##0d; \
rorl $1, right##0d; \
compress_to_64bits(right##1); \
compress_to_64bits(left##1); \
movl right##1d, RW1d; \
rorl $1, left##1d; \
xorl left##1d, RW1d; \
andl $0xaaaaaaaa, RW1d; \
xorl RW1d, right##1d; \
xorl RW1d, left##1d; \
rorl $1, right##1d; \
compress_to_64bits(right##2); \
compress_to_64bits(left##2); \
movl right##2d, RW2d; \
rorl $1, left##2d; \
xorl left##2d, RW2d; \
andl $0xaaaaaaaa, RW2d; \
xorl RW2d, right##2d; \
xorl RW2d, left##2d; \
rorl $1, right##2d; \
\
do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \
do_permutation(right##0d, left##0d, 2, 0x33333333); \
do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \
do_permutation(right##1d, left##1d, 2, 0x33333333); \
do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \
do_permutation(right##2d, left##2d, 2, 0x33333333); \
\
do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \
do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \
do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f);
#define round3(n, from, to, load_next_key, do_movq) \
xorq from##0, RW0; \
movzbl RW0bl, RT3d; \
movzbl RW0bh, RT1d; \
shrq $16, RW0; \
xorq s8(, RT3, 8), to##0; \
xorq s6(, RT1, 8), to##0; \
movzbl RW0bl, RT3d; \
movzbl RW0bh, RT1d; \
shrq $16, RW0; \
xorq s4(, RT3, 8), to##0; \
xorq s2(, RT1, 8), to##0; \
movzbl RW0bl, RT3d; \
movzbl RW0bh, RT1d; \
shrl $16, RW0d; \
xorq s7(, RT3, 8), to##0; \
xorq s5(, RT1, 8), to##0; \
movzbl RW0bl, RT3d; \
movzbl RW0bh, RT1d; \
load_next_key(n, RW0); \
xorq s3(, RT3, 8), to##0; \
xorq s1(, RT1, 8), to##0; \
xorq from##1, RW1; \
movzbl RW1bl, RT3d; \
movzbl RW1bh, RT1d; \
shrq $16, RW1; \
xorq s8(, RT3, 8), to##1; \
xorq s6(, RT1, 8), to##1; \
movzbl RW1bl, RT3d; \
movzbl RW1bh, RT1d; \
shrq $16, RW1; \
xorq s4(, RT3, 8), to##1; \
xorq s2(, RT1, 8), to##1; \
movzbl RW1bl, RT3d; \
movzbl RW1bh, RT1d; \
shrl $16, RW1d; \
xorq s7(, RT3, 8), to##1; \
xorq s5(, RT1, 8), to##1; \
movzbl RW1bl, RT3d; \
movzbl RW1bh, RT1d; \
do_movq(RW0, RW1); \
xorq s3(, RT3, 8), to##1; \
xorq s1(, RT1, 8), to##1; \
xorq from##2, RW2; \
movzbl RW2bl, RT3d; \
movzbl RW2bh, RT1d; \
shrq $16, RW2; \
xorq s8(, RT3, 8), to##2; \
xorq s6(, RT1, 8), to##2; \
movzbl RW2bl, RT3d; \
movzbl RW2bh, RT1d; \
shrq $16, RW2; \
xorq s4(, RT3, 8), to##2; \
xorq s2(, RT1, 8), to##2; \
movzbl RW2bl, RT3d; \
movzbl RW2bh, RT1d; \
shrl $16, RW2d; \
xorq s7(, RT3, 8), to##2; \
xorq s5(, RT1, 8), to##2; \
movzbl RW2bl, RT3d; \
movzbl RW2bh, RT1d; \
do_movq(RW0, RW2); \
xorq s3(, RT3, 8), to##2; \
xorq s1(, RT1, 8), to##2;
#define __movq(src, dst) \
movq src, dst;
ENTRY(des3_ede_x86_64_crypt_blk_3way)
/* input:
* %rdi: ctx, round keys
* %rsi: dst (3 blocks)
* %rdx: src (3 blocks)
*/
pushq %rbp;
pushq %rbx;
pushq %r12;
pushq %r13;
pushq %r14;
pushq %r15;
/* load input */
movl 0 * 4(%rdx), RL0d;
movl 1 * 4(%rdx), RR0d;
movl 2 * 4(%rdx), RL1d;
movl 3 * 4(%rdx), RR1d;
movl 4 * 4(%rdx), RL2d;
movl 5 * 4(%rdx), RR2d;
bswapl RL0d;
bswapl RR0d;
bswapl RL1d;
bswapl RR1d;
bswapl RL2d;
bswapl RR2d;
initial_permutation3(RL, RR);
movq 0(CTX), RW0;
movq RW0, RW1;
movq RW0, RW2;
round3(0, RR, RL, load_next_key, __movq);
round3(1, RL, RR, load_next_key, __movq);
round3(2, RR, RL, load_next_key, __movq);
round3(3, RL, RR, load_next_key, __movq);
round3(4, RR, RL, load_next_key, __movq);
round3(5, RL, RR, load_next_key, __movq);
round3(6, RR, RL, load_next_key, __movq);
round3(7, RL, RR, load_next_key, __movq);
round3(8, RR, RL, load_next_key, __movq);
round3(9, RL, RR, load_next_key, __movq);
round3(10, RR, RL, load_next_key, __movq);
round3(11, RL, RR, load_next_key, __movq);
round3(12, RR, RL, load_next_key, __movq);
round3(13, RL, RR, load_next_key, __movq);
round3(14, RR, RL, load_next_key, __movq);
round3(15, RL, RR, load_next_key, __movq);
round3(16+0, RL, RR, load_next_key, __movq);
round3(16+1, RR, RL, load_next_key, __movq);
round3(16+2, RL, RR, load_next_key, __movq);
round3(16+3, RR, RL, load_next_key, __movq);
round3(16+4, RL, RR, load_next_key, __movq);
round3(16+5, RR, RL, load_next_key, __movq);
round3(16+6, RL, RR, load_next_key, __movq);
round3(16+7, RR, RL, load_next_key, __movq);
round3(16+8, RL, RR, load_next_key, __movq);
round3(16+9, RR, RL, load_next_key, __movq);
round3(16+10, RL, RR, load_next_key, __movq);
round3(16+11, RR, RL, load_next_key, __movq);
round3(16+12, RL, RR, load_next_key, __movq);
round3(16+13, RR, RL, load_next_key, __movq);
round3(16+14, RL, RR, load_next_key, __movq);
round3(16+15, RR, RL, load_next_key, __movq);
round3(32+0, RR, RL, load_next_key, __movq);
round3(32+1, RL, RR, load_next_key, __movq);
round3(32+2, RR, RL, load_next_key, __movq);
round3(32+3, RL, RR, load_next_key, __movq);
round3(32+4, RR, RL, load_next_key, __movq);
round3(32+5, RL, RR, load_next_key, __movq);
round3(32+6, RR, RL, load_next_key, __movq);
round3(32+7, RL, RR, load_next_key, __movq);
round3(32+8, RR, RL, load_next_key, __movq);
round3(32+9, RL, RR, load_next_key, __movq);
round3(32+10, RR, RL, load_next_key, __movq);
round3(32+11, RL, RR, load_next_key, __movq);
round3(32+12, RR, RL, load_next_key, __movq);
round3(32+13, RL, RR, load_next_key, __movq);
round3(32+14, RR, RL, load_next_key, __movq);
round3(32+15, RL, RR, dummy2, dummy2);
final_permutation3(RR, RL);
bswapl RR0d;
bswapl RL0d;
bswapl RR1d;
bswapl RL1d;
bswapl RR2d;
bswapl RL2d;
movl RR0d, 0 * 4(%rsi);
movl RL0d, 1 * 4(%rsi);
movl RR1d, 2 * 4(%rsi);
movl RL1d, 3 * 4(%rsi);
movl RR2d, 4 * 4(%rsi);
movl RL2d, 5 * 4(%rsi);
popq %r15;
popq %r14;
popq %r13;
popq %r12;
popq %rbx;
popq %rbp;
ret;
ENDPROC(des3_ede_x86_64_crypt_blk_3way)
.data
.align 16
.L_s1:
.quad 0x0010100001010400, 0x0000000000000000
.quad 0x0000100000010000, 0x0010100001010404
.quad 0x0010100001010004, 0x0000100000010404
.quad 0x0000000000000004, 0x0000100000010000
.quad 0x0000000000000400, 0x0010100001010400
.quad 0x0010100001010404, 0x0000000000000400
.quad 0x0010000001000404, 0x0010100001010004
.quad 0x0010000001000000, 0x0000000000000004
.quad 0x0000000000000404, 0x0010000001000400
.quad 0x0010000001000400, 0x0000100000010400
.quad 0x0000100000010400, 0x0010100001010000
.quad 0x0010100001010000, 0x0010000001000404
.quad 0x0000100000010004, 0x0010000001000004
.quad 0x0010000001000004, 0x0000100000010004
.quad 0x0000000000000000, 0x0000000000000404
.quad 0x0000100000010404, 0x0010000001000000
.quad 0x0000100000010000, 0x0010100001010404
.quad 0x0000000000000004, 0x0010100001010000
.quad 0x0010100001010400, 0x0010000001000000
.quad 0x0010000001000000, 0x0000000000000400
.quad 0x0010100001010004, 0x0000100000010000
.quad 0x0000100000010400, 0x0010000001000004
.quad 0x0000000000000400, 0x0000000000000004
.quad 0x0010000001000404, 0x0000100000010404
.quad 0x0010100001010404, 0x0000100000010004
.quad 0x0010100001010000, 0x0010000001000404
.quad 0x0010000001000004, 0x0000000000000404
.quad 0x0000100000010404, 0x0010100001010400
.quad 0x0000000000000404, 0x0010000001000400
.quad 0x0010000001000400, 0x0000000000000000
.quad 0x0000100000010004, 0x0000100000010400
.quad 0x0000000000000000, 0x0010100001010004
.L_s2:
.quad 0x0801080200100020, 0x0800080000000000
.quad 0x0000080000000000, 0x0001080200100020
.quad 0x0001000000100000, 0x0000000200000020
.quad 0x0801000200100020, 0x0800080200000020
.quad 0x0800000200000020, 0x0801080200100020
.quad 0x0801080000100000, 0x0800000000000000
.quad 0x0800080000000000, 0x0001000000100000
.quad 0x0000000200000020, 0x0801000200100020
.quad 0x0001080000100000, 0x0001000200100020
.quad 0x0800080200000020, 0x0000000000000000
.quad 0x0800000000000000, 0x0000080000000000
.quad 0x0001080200100020, 0x0801000000100000
.quad 0x0001000200100020, 0x0800000200000020
.quad 0x0000000000000000, 0x0001080000100000
.quad 0x0000080200000020, 0x0801080000100000
.quad 0x0801000000100000, 0x0000080200000020
.quad 0x0000000000000000, 0x0001080200100020
.quad 0x0801000200100020, 0x0001000000100000
.quad 0x0800080200000020, 0x0801000000100000
.quad 0x0801080000100000, 0x0000080000000000
.quad 0x0801000000100000, 0x0800080000000000
.quad 0x0000000200000020, 0x0801080200100020
.quad 0x0001080200100020, 0x0000000200000020
.quad 0x0000080000000000, 0x0800000000000000
.quad 0x0000080200000020, 0x0801080000100000
.quad 0x0001000000100000, 0x0800000200000020
.quad 0x0001000200100020, 0x0800080200000020
.quad 0x0800000200000020, 0x0001000200100020
.quad 0x0001080000100000, 0x0000000000000000
.quad 0x0800080000000000, 0x0000080200000020
.quad 0x0800000000000000, 0x0801000200100020
.quad 0x0801080200100020, 0x0001080000100000
.L_s3:
.quad 0x0000002000000208, 0x0000202008020200
.quad 0x0000000000000000, 0x0000200008020008
.quad 0x0000002008000200, 0x0000000000000000
.quad 0x0000202000020208, 0x0000002008000200
.quad 0x0000200000020008, 0x0000000008000008
.quad 0x0000000008000008, 0x0000200000020000
.quad 0x0000202008020208, 0x0000200000020008
.quad 0x0000200008020000, 0x0000002000000208
.quad 0x0000000008000000, 0x0000000000000008
.quad 0x0000202008020200, 0x0000002000000200
.quad 0x0000202000020200, 0x0000200008020000
.quad 0x0000200008020008, 0x0000202000020208
.quad 0x0000002008000208, 0x0000202000020200
.quad 0x0000200000020000, 0x0000002008000208
.quad 0x0000000000000008, 0x0000202008020208
.quad 0x0000002000000200, 0x0000000008000000
.quad 0x0000202008020200, 0x0000000008000000
.quad 0x0000200000020008, 0x0000002000000208
.quad 0x0000200000020000, 0x0000202008020200
.quad 0x0000002008000200, 0x0000000000000000
.quad 0x0000002000000200, 0x0000200000020008
.quad 0x0000202008020208, 0x0000002008000200
.quad 0x0000000008000008, 0x0000002000000200
.quad 0x0000000000000000, 0x0000200008020008
.quad 0x0000002008000208, 0x0000200000020000
.quad 0x0000000008000000, 0x0000202008020208
.quad 0x0000000000000008, 0x0000202000020208
.quad 0x0000202000020200, 0x0000000008000008
.quad 0x0000200008020000, 0x0000002008000208
.quad 0x0000002000000208, 0x0000200008020000
.quad 0x0000202000020208, 0x0000000000000008
.quad 0x0000200008020008, 0x0000202000020200
.L_s4:
.quad 0x1008020000002001, 0x1000020800002001
.quad 0x1000020800002001, 0x0000000800000000
.quad 0x0008020800002000, 0x1008000800000001
.quad 0x1008000000000001, 0x1000020000002001
.quad 0x0000000000000000, 0x0008020000002000
.quad 0x0008020000002000, 0x1008020800002001
.quad 0x1000000800000001, 0x0000000000000000
.quad 0x0008000800000000, 0x1008000000000001
.quad 0x1000000000000001, 0x0000020000002000
.quad 0x0008000000000000, 0x1008020000002001
.quad 0x0000000800000000, 0x0008000000000000
.quad 0x1000020000002001, 0x0000020800002000
.quad 0x1008000800000001, 0x1000000000000001
.quad 0x0000020800002000, 0x0008000800000000
.quad 0x0000020000002000, 0x0008020800002000
.quad 0x1008020800002001, 0x1000000800000001
.quad 0x0008000800000000, 0x1008000000000001
.quad 0x0008020000002000, 0x1008020800002001
.quad 0x1000000800000001, 0x0000000000000000
.quad 0x0000000000000000, 0x0008020000002000
.quad 0x0000020800002000, 0x0008000800000000
.quad 0x1008000800000001, 0x1000000000000001
.quad 0x1008020000002001, 0x1000020800002001
.quad 0x1000020800002001, 0x0000000800000000
.quad 0x1008020800002001, 0x1000000800000001
.quad 0x1000000000000001, 0x0000020000002000
.quad 0x1008000000000001, 0x1000020000002001
.quad 0x0008020800002000, 0x1008000800000001
.quad 0x1000020000002001, 0x0000020800002000
.quad 0x0008000000000000, 0x1008020000002001
.quad 0x0000000800000000, 0x0008000000000000
.quad 0x0000020000002000, 0x0008020800002000
.L_s5:
.quad 0x0000001000000100, 0x0020001002080100
.quad 0x0020000002080000, 0x0420001002000100
.quad 0x0000000000080000, 0x0000001000000100
.quad 0x0400000000000000, 0x0020000002080000
.quad 0x0400001000080100, 0x0000000000080000
.quad 0x0020001002000100, 0x0400001000080100
.quad 0x0420001002000100, 0x0420000002080000
.quad 0x0000001000080100, 0x0400000000000000
.quad 0x0020000002000000, 0x0400000000080000
.quad 0x0400000000080000, 0x0000000000000000
.quad 0x0400001000000100, 0x0420001002080100
.quad 0x0420001002080100, 0x0020001002000100
.quad 0x0420000002080000, 0x0400001000000100
.quad 0x0000000000000000, 0x0420000002000000
.quad 0x0020001002080100, 0x0020000002000000
.quad 0x0420000002000000, 0x0000001000080100
.quad 0x0000000000080000, 0x0420001002000100
.quad 0x0000001000000100, 0x0020000002000000
.quad 0x0400000000000000, 0x0020000002080000
.quad 0x0420001002000100, 0x0400001000080100
.quad 0x0020001002000100, 0x0400000000000000
.quad 0x0420000002080000, 0x0020001002080100
.quad 0x0400001000080100, 0x0000001000000100
.quad 0x0020000002000000, 0x0420000002080000
.quad 0x0420001002080100, 0x0000001000080100
.quad 0x0420000002000000, 0x0420001002080100
.quad 0x0020000002080000, 0x0000000000000000
.quad 0x0400000000080000, 0x0420000002000000
.quad 0x0000001000080100, 0x0020001002000100
.quad 0x0400001000000100, 0x0000000000080000
.quad 0x0000000000000000, 0x0400000000080000
.quad 0x0020001002080100, 0x0400001000000100
.L_s6:
.quad 0x0200000120000010, 0x0204000020000000
.quad 0x0000040000000000, 0x0204040120000010
.quad 0x0204000020000000, 0x0000000100000010
.quad 0x0204040120000010, 0x0004000000000000
.quad 0x0200040020000000, 0x0004040100000010
.quad 0x0004000000000000, 0x0200000120000010
.quad 0x0004000100000010, 0x0200040020000000
.quad 0x0200000020000000, 0x0000040100000010
.quad 0x0000000000000000, 0x0004000100000010
.quad 0x0200040120000010, 0x0000040000000000
.quad 0x0004040000000000, 0x0200040120000010
.quad 0x0000000100000010, 0x0204000120000010
.quad 0x0204000120000010, 0x0000000000000000
.quad 0x0004040100000010, 0x0204040020000000
.quad 0x0000040100000010, 0x0004040000000000
.quad 0x0204040020000000, 0x0200000020000000
.quad 0x0200040020000000, 0x0000000100000010
.quad 0x0204000120000010, 0x0004040000000000
.quad 0x0204040120000010, 0x0004000000000000
.quad 0x0000040100000010, 0x0200000120000010
.quad 0x0004000000000000, 0x0200040020000000
.quad 0x0200000020000000, 0x0000040100000010
.quad 0x0200000120000010, 0x0204040120000010
.quad 0x0004040000000000, 0x0204000020000000
.quad 0x0004040100000010, 0x0204040020000000
.quad 0x0000000000000000, 0x0204000120000010
.quad 0x0000000100000010, 0x0000040000000000
.quad 0x0204000020000000, 0x0004040100000010
.quad 0x0000040000000000, 0x0004000100000010
.quad 0x0200040120000010, 0x0000000000000000
.quad 0x0204040020000000, 0x0200000020000000
.quad 0x0004000100000010, 0x0200040120000010
.L_s7:
.quad 0x0002000000200000, 0x2002000004200002
.quad 0x2000000004000802, 0x0000000000000000
.quad 0x0000000000000800, 0x2000000004000802
.quad 0x2002000000200802, 0x0002000004200800
.quad 0x2002000004200802, 0x0002000000200000
.quad 0x0000000000000000, 0x2000000004000002
.quad 0x2000000000000002, 0x0000000004000000
.quad 0x2002000004200002, 0x2000000000000802
.quad 0x0000000004000800, 0x2002000000200802
.quad 0x2002000000200002, 0x0000000004000800
.quad 0x2000000004000002, 0x0002000004200000
.quad 0x0002000004200800, 0x2002000000200002
.quad 0x0002000004200000, 0x0000000000000800
.quad 0x2000000000000802, 0x2002000004200802
.quad 0x0002000000200800, 0x2000000000000002
.quad 0x0000000004000000, 0x0002000000200800
.quad 0x0000000004000000, 0x0002000000200800
.quad 0x0002000000200000, 0x2000000004000802
.quad 0x2000000004000802, 0x2002000004200002
.quad 0x2002000004200002, 0x2000000000000002
.quad 0x2002000000200002, 0x0000000004000000
.quad 0x0000000004000800, 0x0002000000200000
.quad 0x0002000004200800, 0x2000000000000802
.quad 0x2002000000200802, 0x0002000004200800
.quad 0x2000000000000802, 0x2000000004000002
.quad 0x2002000004200802, 0x0002000004200000
.quad 0x0002000000200800, 0x0000000000000000
.quad 0x2000000000000002, 0x2002000004200802
.quad 0x0000000000000000, 0x2002000000200802
.quad 0x0002000004200000, 0x0000000000000800
.quad 0x2000000004000002, 0x0000000004000800
.quad 0x0000000000000800, 0x2002000000200002
.L_s8:
.quad 0x0100010410001000, 0x0000010000001000
.quad 0x0000000000040000, 0x0100010410041000
.quad 0x0100000010000000, 0x0100010410001000
.quad 0x0000000400000000, 0x0100000010000000
.quad 0x0000000400040000, 0x0100000010040000
.quad 0x0100010410041000, 0x0000010000041000
.quad 0x0100010010041000, 0x0000010400041000
.quad 0x0000010000001000, 0x0000000400000000
.quad 0x0100000010040000, 0x0100000410000000
.quad 0x0100010010001000, 0x0000010400001000
.quad 0x0000010000041000, 0x0000000400040000
.quad 0x0100000410040000, 0x0100010010041000
.quad 0x0000010400001000, 0x0000000000000000
.quad 0x0000000000000000, 0x0100000410040000
.quad 0x0100000410000000, 0x0100010010001000
.quad 0x0000010400041000, 0x0000000000040000
.quad 0x0000010400041000, 0x0000000000040000
.quad 0x0100010010041000, 0x0000010000001000
.quad 0x0000000400000000, 0x0100000410040000
.quad 0x0000010000001000, 0x0000010400041000
.quad 0x0100010010001000, 0x0000000400000000
.quad 0x0100000410000000, 0x0100000010040000
.quad 0x0100000410040000, 0x0100000010000000
.quad 0x0000000000040000, 0x0100010410001000
.quad 0x0000000000000000, 0x0100010410041000
.quad 0x0000000400040000, 0x0100000410000000
.quad 0x0100000010040000, 0x0100010010001000
.quad 0x0100010410001000, 0x0000000000000000
.quad 0x0100010410041000, 0x0000010000041000
.quad 0x0000010000041000, 0x0000010400001000
.quad 0x0000010400001000, 0x0000000400040000
.quad 0x0100000010000000, 0x0100010010041000

View file

@ -0,0 +1,509 @@
/*
* Glue Code for assembler optimized version of 3DES
*
* Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
* CTR part based on code (crypto/ctr.c) by:
* (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include <asm/processor.h>
#include <crypto/des.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/types.h>
#include <crypto/algapi.h>
struct des3_ede_x86_ctx {
u32 enc_expkey[DES3_EDE_EXPKEY_WORDS];
u32 dec_expkey[DES3_EDE_EXPKEY_WORDS];
};
/* regular block cipher functions */
asmlinkage void des3_ede_x86_64_crypt_blk(const u32 *expkey, u8 *dst,
const u8 *src);
/* 3-way parallel cipher functions */
asmlinkage void des3_ede_x86_64_crypt_blk_3way(const u32 *expkey, u8 *dst,
const u8 *src);
static inline void des3_ede_enc_blk(struct des3_ede_x86_ctx *ctx, u8 *dst,
const u8 *src)
{
u32 *enc_ctx = ctx->enc_expkey;
des3_ede_x86_64_crypt_blk(enc_ctx, dst, src);
}
static inline void des3_ede_dec_blk(struct des3_ede_x86_ctx *ctx, u8 *dst,
const u8 *src)
{
u32 *dec_ctx = ctx->dec_expkey;
des3_ede_x86_64_crypt_blk(dec_ctx, dst, src);
}
static inline void des3_ede_enc_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst,
const u8 *src)
{
u32 *enc_ctx = ctx->enc_expkey;
des3_ede_x86_64_crypt_blk_3way(enc_ctx, dst, src);
}
static inline void des3_ede_dec_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst,
const u8 *src)
{
u32 *dec_ctx = ctx->dec_expkey;
des3_ede_x86_64_crypt_blk_3way(dec_ctx, dst, src);
}
static void des3_ede_x86_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
des3_ede_enc_blk(crypto_tfm_ctx(tfm), dst, src);
}
static void des3_ede_x86_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
des3_ede_dec_blk(crypto_tfm_ctx(tfm), dst, src);
}
static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
const u32 *expkey)
{
unsigned int bsize = DES3_EDE_BLOCK_SIZE;
unsigned int nbytes;
int err;
err = blkcipher_walk_virt(desc, walk);
while ((nbytes = walk->nbytes)) {
u8 *wsrc = walk->src.virt.addr;
u8 *wdst = walk->dst.virt.addr;
/* Process four block batch */
if (nbytes >= bsize * 3) {
do {
des3_ede_x86_64_crypt_blk_3way(expkey, wdst,
wsrc);
wsrc += bsize * 3;
wdst += bsize * 3;
nbytes -= bsize * 3;
} while (nbytes >= bsize * 3);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
do {
des3_ede_x86_64_crypt_blk(expkey, wdst, wsrc);
wsrc += bsize;
wdst += bsize;
nbytes -= bsize;
} while (nbytes >= bsize);
done:
err = blkcipher_walk_done(desc, walk, nbytes);
}
return err;
}
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
return ecb_crypt(desc, &walk, ctx->enc_expkey);
}
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
return ecb_crypt(desc, &walk, ctx->dec_expkey);
}
static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = DES3_EDE_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
u64 *dst = (u64 *)walk->dst.virt.addr;
u64 *iv = (u64 *)walk->iv;
do {
*dst = *src ^ *iv;
des3_ede_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
iv = dst;
src += 1;
dst += 1;
nbytes -= bsize;
} while (nbytes >= bsize);
*(u64 *)walk->iv = *iv;
return nbytes;
}
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
nbytes = __cbc_encrypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
return err;
}
static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = DES3_EDE_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
u64 *dst = (u64 *)walk->dst.virt.addr;
u64 ivs[3 - 1];
u64 last_iv;
/* Start of the last block. */
src += nbytes / bsize - 1;
dst += nbytes / bsize - 1;
last_iv = *src;
/* Process four block batch */
if (nbytes >= bsize * 3) {
do {
nbytes -= bsize * 3 - bsize;
src -= 3 - 1;
dst -= 3 - 1;
ivs[0] = src[0];
ivs[1] = src[1];
des3_ede_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
dst[1] ^= ivs[0];
dst[2] ^= ivs[1];
nbytes -= bsize;
if (nbytes < bsize)
goto done;
*dst ^= *(src - 1);
src -= 1;
dst -= 1;
} while (nbytes >= bsize * 3);
}
/* Handle leftovers */
for (;;) {
des3_ede_dec_blk(ctx, (u8 *)dst, (u8 *)src);
nbytes -= bsize;
if (nbytes < bsize)
break;
*dst ^= *(src - 1);
src -= 1;
dst -= 1;
}
done:
*dst ^= *(u64 *)walk->iv;
*(u64 *)walk->iv = last_iv;
return nbytes;
}
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
nbytes = __cbc_decrypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
return err;
}
static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx,
struct blkcipher_walk *walk)
{
u8 *ctrblk = walk->iv;
u8 keystream[DES3_EDE_BLOCK_SIZE];
u8 *src = walk->src.virt.addr;
u8 *dst = walk->dst.virt.addr;
unsigned int nbytes = walk->nbytes;
des3_ede_enc_blk(ctx, keystream, ctrblk);
crypto_xor(keystream, src, nbytes);
memcpy(dst, keystream, nbytes);
crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE);
}
static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = DES3_EDE_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
__be64 *src = (__be64 *)walk->src.virt.addr;
__be64 *dst = (__be64 *)walk->dst.virt.addr;
u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
__be64 ctrblocks[3];
/* Process four block batch */
if (nbytes >= bsize * 3) {
do {
/* create ctrblks for parallel encrypt */
ctrblocks[0] = cpu_to_be64(ctrblk++);
ctrblocks[1] = cpu_to_be64(ctrblk++);
ctrblocks[2] = cpu_to_be64(ctrblk++);
des3_ede_enc_blk_3way(ctx, (u8 *)ctrblocks,
(u8 *)ctrblocks);
dst[0] = src[0] ^ ctrblocks[0];
dst[1] = src[1] ^ ctrblocks[1];
dst[2] = src[2] ^ ctrblocks[2];
src += 3;
dst += 3;
} while ((nbytes -= bsize * 3) >= bsize * 3);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
do {
ctrblocks[0] = cpu_to_be64(ctrblk++);
des3_ede_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
dst[0] = src[0] ^ ctrblocks[0];
src += 1;
dst += 1;
} while ((nbytes -= bsize) >= bsize);
done:
*(__be64 *)walk->iv = cpu_to_be64(ctrblk);
return nbytes;
}
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt_block(desc, &walk, DES3_EDE_BLOCK_SIZE);
while ((nbytes = walk.nbytes) >= DES3_EDE_BLOCK_SIZE) {
nbytes = __ctr_crypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
if (walk.nbytes) {
ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk);
err = blkcipher_walk_done(desc, &walk, 0);
}
return err;
}
static int des3_ede_x86_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
{
struct des3_ede_x86_ctx *ctx = crypto_tfm_ctx(tfm);
u32 i, j, tmp;
int err;
/* Generate encryption context using generic implementation. */
err = __des3_ede_setkey(ctx->enc_expkey, &tfm->crt_flags, key, keylen);
if (err < 0)
return err;
/* Fix encryption context for this implementation and form decryption
* context. */
j = DES3_EDE_EXPKEY_WORDS - 2;
for (i = 0; i < DES3_EDE_EXPKEY_WORDS; i += 2, j -= 2) {
tmp = ror32(ctx->enc_expkey[i + 1], 4);
ctx->enc_expkey[i + 1] = tmp;
ctx->dec_expkey[j + 0] = ctx->enc_expkey[i + 0];
ctx->dec_expkey[j + 1] = tmp;
}
return 0;
}
static struct crypto_alg des3_ede_algs[4] = { {
.cra_name = "des3_ede",
.cra_driver_name = "des3_ede-asm",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
.cra_alignmask = 0,
.cra_module = THIS_MODULE,
.cra_u = {
.cipher = {
.cia_min_keysize = DES3_EDE_KEY_SIZE,
.cia_max_keysize = DES3_EDE_KEY_SIZE,
.cia_setkey = des3_ede_x86_setkey,
.cia_encrypt = des3_ede_x86_encrypt,
.cia_decrypt = des3_ede_x86_decrypt,
}
}
}, {
.cra_name = "ecb(des3_ede)",
.cra_driver_name = "ecb-des3_ede-asm",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = DES3_EDE_KEY_SIZE,
.max_keysize = DES3_EDE_KEY_SIZE,
.setkey = des3_ede_x86_setkey,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
},
},
}, {
.cra_name = "cbc(des3_ede)",
.cra_driver_name = "cbc-des3_ede-asm",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = DES3_EDE_KEY_SIZE,
.max_keysize = DES3_EDE_KEY_SIZE,
.ivsize = DES3_EDE_BLOCK_SIZE,
.setkey = des3_ede_x86_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
},
},
}, {
.cra_name = "ctr(des3_ede)",
.cra_driver_name = "ctr-des3_ede-asm",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = DES3_EDE_KEY_SIZE,
.max_keysize = DES3_EDE_KEY_SIZE,
.ivsize = DES3_EDE_BLOCK_SIZE,
.setkey = des3_ede_x86_setkey,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
},
},
} };
static bool is_blacklisted_cpu(void)
{
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return false;
if (boot_cpu_data.x86 == 0x0f) {
/*
* On Pentium 4, des3_ede-x86_64 is slower than generic C
* implementation because use of 64bit rotates (which are really
* slow on P4). Therefore blacklist P4s.
*/
return true;
}
return false;
}
static int force;
module_param(force, int, 0);
MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
static int __init des3_ede_x86_init(void)
{
if (!force && is_blacklisted_cpu()) {
pr_info("des3_ede-x86_64: performance on this CPU would be suboptimal: disabling des3_ede-x86_64.\n");
return -ENODEV;
}
return crypto_register_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs));
}
static void __exit des3_ede_x86_fini(void)
{
crypto_unregister_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs));
}
module_init(des3_ede_x86_init);
module_exit(des3_ede_x86_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Triple DES EDE Cipher Algorithm, asm optimized");
MODULE_ALIAS_CRYPTO("des3_ede");
MODULE_ALIAS_CRYPTO("des3_ede-asm");
MODULE_ALIAS_CRYPTO("des");
MODULE_ALIAS_CRYPTO("des-asm");
MODULE_AUTHOR("Jussi Kivilinna <jussi.kivilinna@iki.fi>");

164
arch/x86/crypto/fpu.c Normal file
View file

@ -0,0 +1,164 @@
/*
* FPU: Wrapper for blkcipher touching fpu
*
* Copyright (c) Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <crypto/algapi.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/crypto.h>
#include <asm/i387.h>
struct crypto_fpu_ctx {
struct crypto_blkcipher *child;
};
static int crypto_fpu_setkey(struct crypto_tfm *parent, const u8 *key,
unsigned int keylen)
{
struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(parent);
struct crypto_blkcipher *child = ctx->child;
int err;
crypto_blkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
crypto_blkcipher_set_flags(child, crypto_tfm_get_flags(parent) &
CRYPTO_TFM_REQ_MASK);
err = crypto_blkcipher_setkey(child, key, keylen);
crypto_tfm_set_flags(parent, crypto_blkcipher_get_flags(child) &
CRYPTO_TFM_RES_MASK);
return err;
}
static int crypto_fpu_encrypt(struct blkcipher_desc *desc_in,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
int err;
struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm);
struct crypto_blkcipher *child = ctx->child;
struct blkcipher_desc desc = {
.tfm = child,
.info = desc_in->info,
.flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP,
};
kernel_fpu_begin();
err = crypto_blkcipher_crt(desc.tfm)->encrypt(&desc, dst, src, nbytes);
kernel_fpu_end();
return err;
}
static int crypto_fpu_decrypt(struct blkcipher_desc *desc_in,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
int err;
struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm);
struct crypto_blkcipher *child = ctx->child;
struct blkcipher_desc desc = {
.tfm = child,
.info = desc_in->info,
.flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP,
};
kernel_fpu_begin();
err = crypto_blkcipher_crt(desc.tfm)->decrypt(&desc, dst, src, nbytes);
kernel_fpu_end();
return err;
}
static int crypto_fpu_init_tfm(struct crypto_tfm *tfm)
{
struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
struct crypto_spawn *spawn = crypto_instance_ctx(inst);
struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm);
struct crypto_blkcipher *cipher;
cipher = crypto_spawn_blkcipher(spawn);
if (IS_ERR(cipher))
return PTR_ERR(cipher);
ctx->child = cipher;
return 0;
}
static void crypto_fpu_exit_tfm(struct crypto_tfm *tfm)
{
struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm);
crypto_free_blkcipher(ctx->child);
}
static struct crypto_instance *crypto_fpu_alloc(struct rtattr **tb)
{
struct crypto_instance *inst;
struct crypto_alg *alg;
int err;
err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
if (err)
return ERR_PTR(err);
alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_BLKCIPHER,
CRYPTO_ALG_TYPE_MASK);
if (IS_ERR(alg))
return ERR_CAST(alg);
inst = crypto_alloc_instance("fpu", alg);
if (IS_ERR(inst))
goto out_put_alg;
inst->alg.cra_flags = alg->cra_flags;
inst->alg.cra_priority = alg->cra_priority;
inst->alg.cra_blocksize = alg->cra_blocksize;
inst->alg.cra_alignmask = alg->cra_alignmask;
inst->alg.cra_type = alg->cra_type;
inst->alg.cra_blkcipher.ivsize = alg->cra_blkcipher.ivsize;
inst->alg.cra_blkcipher.min_keysize = alg->cra_blkcipher.min_keysize;
inst->alg.cra_blkcipher.max_keysize = alg->cra_blkcipher.max_keysize;
inst->alg.cra_ctxsize = sizeof(struct crypto_fpu_ctx);
inst->alg.cra_init = crypto_fpu_init_tfm;
inst->alg.cra_exit = crypto_fpu_exit_tfm;
inst->alg.cra_blkcipher.setkey = crypto_fpu_setkey;
inst->alg.cra_blkcipher.encrypt = crypto_fpu_encrypt;
inst->alg.cra_blkcipher.decrypt = crypto_fpu_decrypt;
out_put_alg:
crypto_mod_put(alg);
return inst;
}
static void crypto_fpu_free(struct crypto_instance *inst)
{
crypto_drop_spawn(crypto_instance_ctx(inst));
kfree(inst);
}
static struct crypto_template crypto_fpu_tmpl = {
.name = "fpu",
.alloc = crypto_fpu_alloc,
.free = crypto_fpu_free,
.module = THIS_MODULE,
};
int __init crypto_fpu_init(void)
{
return crypto_register_template(&crypto_fpu_tmpl);
}
void __exit crypto_fpu_exit(void)
{
crypto_unregister_template(&crypto_fpu_tmpl);
}
MODULE_ALIAS_CRYPTO("fpu");

View file

@ -0,0 +1,132 @@
/*
* Accelerated GHASH implementation with Intel PCLMULQDQ-NI
* instructions. This file contains accelerated part of ghash
* implementation. More information about PCLMULQDQ can be found at:
*
* http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/
*
* Copyright (c) 2009 Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
* Vinodh Gopal
* Erdinc Ozturk
* Deniz Karakoyunlu
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/inst.h>
.data
.align 16
.Lbswap_mask:
.octa 0x000102030405060708090a0b0c0d0e0f
#define DATA %xmm0
#define SHASH %xmm1
#define T1 %xmm2
#define T2 %xmm3
#define T3 %xmm4
#define BSWAP %xmm5
#define IN1 %xmm6
.text
/*
* __clmul_gf128mul_ble: internal ABI
* input:
* DATA: operand1
* SHASH: operand2, hash_key << 1 mod poly
* output:
* DATA: operand1 * operand2 mod poly
* changed:
* T1
* T2
* T3
*/
__clmul_gf128mul_ble:
movaps DATA, T1
pshufd $0b01001110, DATA, T2
pshufd $0b01001110, SHASH, T3
pxor DATA, T2
pxor SHASH, T3
PCLMULQDQ 0x00 SHASH DATA # DATA = a0 * b0
PCLMULQDQ 0x11 SHASH T1 # T1 = a1 * b1
PCLMULQDQ 0x00 T3 T2 # T2 = (a1 + a0) * (b1 + b0)
pxor DATA, T2
pxor T1, T2 # T2 = a0 * b1 + a1 * b0
movaps T2, T3
pslldq $8, T3
psrldq $8, T2
pxor T3, DATA
pxor T2, T1 # <T1:DATA> is result of
# carry-less multiplication
# first phase of the reduction
movaps DATA, T3
psllq $1, T3
pxor DATA, T3
psllq $5, T3
pxor DATA, T3
psllq $57, T3
movaps T3, T2
pslldq $8, T2
psrldq $8, T3
pxor T2, DATA
pxor T3, T1
# second phase of the reduction
movaps DATA, T2
psrlq $5, T2
pxor DATA, T2
psrlq $1, T2
pxor DATA, T2
psrlq $1, T2
pxor T2, T1
pxor T1, DATA
ret
ENDPROC(__clmul_gf128mul_ble)
/* void clmul_ghash_mul(char *dst, const u128 *shash) */
ENTRY(clmul_ghash_mul)
movups (%rdi), DATA
movups (%rsi), SHASH
movaps .Lbswap_mask, BSWAP
PSHUFB_XMM BSWAP DATA
call __clmul_gf128mul_ble
PSHUFB_XMM BSWAP DATA
movups DATA, (%rdi)
ret
ENDPROC(clmul_ghash_mul)
/*
* void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
* const u128 *shash);
*/
ENTRY(clmul_ghash_update)
cmp $16, %rdx
jb .Lupdate_just_ret # check length
movaps .Lbswap_mask, BSWAP
movups (%rdi), DATA
movups (%rcx), SHASH
PSHUFB_XMM BSWAP DATA
.align 4
.Lupdate_loop:
movups (%rsi), IN1
PSHUFB_XMM BSWAP IN1
pxor IN1, DATA
call __clmul_gf128mul_ble
sub $16, %rdx
add $16, %rsi
cmp $16, %rdx
jge .Lupdate_loop
PSHUFB_XMM BSWAP DATA
movups DATA, (%rdi)
.Lupdate_just_ret:
ret
ENDPROC(clmul_ghash_update)

View file

@ -0,0 +1,344 @@
/*
* Accelerated GHASH implementation with Intel PCLMULQDQ-NI
* instructions. This file contains glue code.
*
* Copyright (c) 2009 Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*/
#include <linux/err.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/crypto.h>
#include <crypto/algapi.h>
#include <crypto/cryptd.h>
#include <crypto/gf128mul.h>
#include <crypto/internal/hash.h>
#include <asm/i387.h>
#include <asm/cpu_device_id.h>
#define GHASH_BLOCK_SIZE 16
#define GHASH_DIGEST_SIZE 16
void clmul_ghash_mul(char *dst, const u128 *shash);
void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
const u128 *shash);
struct ghash_async_ctx {
struct cryptd_ahash *cryptd_tfm;
};
struct ghash_ctx {
u128 shash;
};
struct ghash_desc_ctx {
u8 buffer[GHASH_BLOCK_SIZE];
u32 bytes;
};
static int ghash_init(struct shash_desc *desc)
{
struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
memset(dctx, 0, sizeof(*dctx));
return 0;
}
static int ghash_setkey(struct crypto_shash *tfm,
const u8 *key, unsigned int keylen)
{
struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
be128 *x = (be128 *)key;
u64 a, b;
if (keylen != GHASH_BLOCK_SIZE) {
crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
/* perform multiplication by 'x' in GF(2^128) */
a = be64_to_cpu(x->a);
b = be64_to_cpu(x->b);
ctx->shash.a = (b << 1) | (a >> 63);
ctx->shash.b = (a << 1) | (b >> 63);
if (a >> 63)
ctx->shash.b ^= ((u64)0xc2) << 56;
return 0;
}
static int ghash_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
{
struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
u8 *dst = dctx->buffer;
kernel_fpu_begin();
if (dctx->bytes) {
int n = min(srclen, dctx->bytes);
u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
dctx->bytes -= n;
srclen -= n;
while (n--)
*pos++ ^= *src++;
if (!dctx->bytes)
clmul_ghash_mul(dst, &ctx->shash);
}
clmul_ghash_update(dst, src, srclen, &ctx->shash);
kernel_fpu_end();
if (srclen & 0xf) {
src += srclen - (srclen & 0xf);
srclen &= 0xf;
dctx->bytes = GHASH_BLOCK_SIZE - srclen;
while (srclen--)
*dst++ ^= *src++;
}
return 0;
}
static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
{
u8 *dst = dctx->buffer;
if (dctx->bytes) {
u8 *tmp = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
while (dctx->bytes--)
*tmp++ ^= 0;
kernel_fpu_begin();
clmul_ghash_mul(dst, &ctx->shash);
kernel_fpu_end();
}
dctx->bytes = 0;
}
static int ghash_final(struct shash_desc *desc, u8 *dst)
{
struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
u8 *buf = dctx->buffer;
ghash_flush(ctx, dctx);
memcpy(dst, buf, GHASH_BLOCK_SIZE);
return 0;
}
static struct shash_alg ghash_alg = {
.digestsize = GHASH_DIGEST_SIZE,
.init = ghash_init,
.update = ghash_update,
.final = ghash_final,
.setkey = ghash_setkey,
.descsize = sizeof(struct ghash_desc_ctx),
.base = {
.cra_name = "__ghash",
.cra_driver_name = "__ghash-pclmulqdqni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = GHASH_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct ghash_ctx),
.cra_module = THIS_MODULE,
},
};
static int ghash_async_init(struct ahash_request *req)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
struct ahash_request *cryptd_req = ahash_request_ctx(req);
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
if (!irq_fpu_usable()) {
memcpy(cryptd_req, req, sizeof(*req));
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
return crypto_ahash_init(cryptd_req);
} else {
struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
desc->tfm = child;
desc->flags = req->base.flags;
return crypto_shash_init(desc);
}
}
static int ghash_async_update(struct ahash_request *req)
{
struct ahash_request *cryptd_req = ahash_request_ctx(req);
if (!irq_fpu_usable()) {
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
memcpy(cryptd_req, req, sizeof(*req));
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
return crypto_ahash_update(cryptd_req);
} else {
struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
return shash_ahash_update(req, desc);
}
}
static int ghash_async_final(struct ahash_request *req)
{
struct ahash_request *cryptd_req = ahash_request_ctx(req);
if (!irq_fpu_usable()) {
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
memcpy(cryptd_req, req, sizeof(*req));
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
return crypto_ahash_final(cryptd_req);
} else {
struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
return crypto_shash_final(desc, req->result);
}
}
static int ghash_async_digest(struct ahash_request *req)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
struct ahash_request *cryptd_req = ahash_request_ctx(req);
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
if (!irq_fpu_usable()) {
memcpy(cryptd_req, req, sizeof(*req));
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
return crypto_ahash_digest(cryptd_req);
} else {
struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
desc->tfm = child;
desc->flags = req->base.flags;
return shash_ahash_digest(req, desc);
}
}
static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
unsigned int keylen)
{
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
struct crypto_ahash *child = &ctx->cryptd_tfm->base;
int err;
crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm)
& CRYPTO_TFM_REQ_MASK);
err = crypto_ahash_setkey(child, key, keylen);
crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child)
& CRYPTO_TFM_RES_MASK);
return err;
}
static int ghash_async_init_tfm(struct crypto_tfm *tfm)
{
struct cryptd_ahash *cryptd_tfm;
struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", 0, 0);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
ctx->cryptd_tfm = cryptd_tfm;
crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
sizeof(struct ahash_request) +
crypto_ahash_reqsize(&cryptd_tfm->base));
return 0;
}
static void ghash_async_exit_tfm(struct crypto_tfm *tfm)
{
struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
cryptd_free_ahash(ctx->cryptd_tfm);
}
static struct ahash_alg ghash_async_alg = {
.init = ghash_async_init,
.update = ghash_async_update,
.final = ghash_async_final,
.setkey = ghash_async_setkey,
.digest = ghash_async_digest,
.halg = {
.digestsize = GHASH_DIGEST_SIZE,
.base = {
.cra_name = "ghash",
.cra_driver_name = "ghash-clmulni",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
.cra_blocksize = GHASH_BLOCK_SIZE,
.cra_type = &crypto_ahash_type,
.cra_module = THIS_MODULE,
.cra_init = ghash_async_init_tfm,
.cra_exit = ghash_async_exit_tfm,
},
},
};
static const struct x86_cpu_id pcmul_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), /* Pickle-Mickle-Duck */
{}
};
MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id);
static int __init ghash_pclmulqdqni_mod_init(void)
{
int err;
if (!x86_match_cpu(pcmul_cpu_id))
return -ENODEV;
err = crypto_register_shash(&ghash_alg);
if (err)
goto err_out;
err = crypto_register_ahash(&ghash_async_alg);
if (err)
goto err_shash;
return 0;
err_shash:
crypto_unregister_shash(&ghash_alg);
err_out:
return err;
}
static void __exit ghash_pclmulqdqni_mod_exit(void)
{
crypto_unregister_ahash(&ghash_async_alg);
crypto_unregister_shash(&ghash_alg);
}
module_init(ghash_pclmulqdqni_mod_init);
module_exit(ghash_pclmulqdqni_mod_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("GHASH Message Digest Algorithm, "
"acclerated by PCLMULQDQ-NI");
MODULE_ALIAS_CRYPTO("ghash");

View file

@ -0,0 +1,150 @@
/*
* Shared glue code for 128bit block ciphers, AVX assembler macros
*
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#define load_8way(src, x0, x1, x2, x3, x4, x5, x6, x7) \
vmovdqu (0*16)(src), x0; \
vmovdqu (1*16)(src), x1; \
vmovdqu (2*16)(src), x2; \
vmovdqu (3*16)(src), x3; \
vmovdqu (4*16)(src), x4; \
vmovdqu (5*16)(src), x5; \
vmovdqu (6*16)(src), x6; \
vmovdqu (7*16)(src), x7;
#define store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \
vmovdqu x0, (0*16)(dst); \
vmovdqu x1, (1*16)(dst); \
vmovdqu x2, (2*16)(dst); \
vmovdqu x3, (3*16)(dst); \
vmovdqu x4, (4*16)(dst); \
vmovdqu x5, (5*16)(dst); \
vmovdqu x6, (6*16)(dst); \
vmovdqu x7, (7*16)(dst);
#define store_cbc_8way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \
vpxor (0*16)(src), x1, x1; \
vpxor (1*16)(src), x2, x2; \
vpxor (2*16)(src), x3, x3; \
vpxor (3*16)(src), x4, x4; \
vpxor (4*16)(src), x5, x5; \
vpxor (5*16)(src), x6, x6; \
vpxor (6*16)(src), x7, x7; \
store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
#define inc_le128(x, minus_one, tmp) \
vpcmpeqq minus_one, x, tmp; \
vpsubq minus_one, x, x; \
vpslldq $8, tmp, tmp; \
vpsubq tmp, x, x;
#define load_ctr_8way(iv, bswap, x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2) \
vpcmpeqd t0, t0, t0; \
vpsrldq $8, t0, t0; /* low: -1, high: 0 */ \
vmovdqa bswap, t1; \
\
/* load IV and byteswap */ \
vmovdqu (iv), x7; \
vpshufb t1, x7, x0; \
\
/* construct IVs */ \
inc_le128(x7, t0, t2); \
vpshufb t1, x7, x1; \
inc_le128(x7, t0, t2); \
vpshufb t1, x7, x2; \
inc_le128(x7, t0, t2); \
vpshufb t1, x7, x3; \
inc_le128(x7, t0, t2); \
vpshufb t1, x7, x4; \
inc_le128(x7, t0, t2); \
vpshufb t1, x7, x5; \
inc_le128(x7, t0, t2); \
vpshufb t1, x7, x6; \
inc_le128(x7, t0, t2); \
vmovdqa x7, t2; \
vpshufb t1, x7, x7; \
inc_le128(t2, t0, t1); \
vmovdqu t2, (iv);
#define store_ctr_8way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \
vpxor (0*16)(src), x0, x0; \
vpxor (1*16)(src), x1, x1; \
vpxor (2*16)(src), x2, x2; \
vpxor (3*16)(src), x3, x3; \
vpxor (4*16)(src), x4, x4; \
vpxor (5*16)(src), x5, x5; \
vpxor (6*16)(src), x6, x6; \
vpxor (7*16)(src), x7, x7; \
store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
#define gf128mul_x_ble(iv, mask, tmp) \
vpsrad $31, iv, tmp; \
vpaddq iv, iv, iv; \
vpshufd $0x13, tmp, tmp; \
vpand mask, tmp, tmp; \
vpxor tmp, iv, iv;
#define load_xts_8way(iv, src, dst, x0, x1, x2, x3, x4, x5, x6, x7, tiv, t0, \
t1, xts_gf128mul_and_shl1_mask) \
vmovdqa xts_gf128mul_and_shl1_mask, t0; \
\
/* load IV */ \
vmovdqu (iv), tiv; \
vpxor (0*16)(src), tiv, x0; \
vmovdqu tiv, (0*16)(dst); \
\
/* construct and store IVs, also xor with source */ \
gf128mul_x_ble(tiv, t0, t1); \
vpxor (1*16)(src), tiv, x1; \
vmovdqu tiv, (1*16)(dst); \
\
gf128mul_x_ble(tiv, t0, t1); \
vpxor (2*16)(src), tiv, x2; \
vmovdqu tiv, (2*16)(dst); \
\
gf128mul_x_ble(tiv, t0, t1); \
vpxor (3*16)(src), tiv, x3; \
vmovdqu tiv, (3*16)(dst); \
\
gf128mul_x_ble(tiv, t0, t1); \
vpxor (4*16)(src), tiv, x4; \
vmovdqu tiv, (4*16)(dst); \
\
gf128mul_x_ble(tiv, t0, t1); \
vpxor (5*16)(src), tiv, x5; \
vmovdqu tiv, (5*16)(dst); \
\
gf128mul_x_ble(tiv, t0, t1); \
vpxor (6*16)(src), tiv, x6; \
vmovdqu tiv, (6*16)(dst); \
\
gf128mul_x_ble(tiv, t0, t1); \
vpxor (7*16)(src), tiv, x7; \
vmovdqu tiv, (7*16)(dst); \
\
gf128mul_x_ble(tiv, t0, t1); \
vmovdqu tiv, (iv);
#define store_xts_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \
vpxor (0*16)(dst), x0, x0; \
vpxor (1*16)(dst), x1, x1; \
vpxor (2*16)(dst), x2, x2; \
vpxor (3*16)(dst), x3, x3; \
vpxor (4*16)(dst), x4, x4; \
vpxor (5*16)(dst), x5, x5; \
vpxor (6*16)(dst), x6, x6; \
vpxor (7*16)(dst), x7, x7; \
store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7);

Some files were not shown because too many files have changed in this diff Show more