mirror of
https://github.com/AetherDroid/android_kernel_samsung_on5xelte.git
synced 2025-10-28 23:08:52 +01:00
Fixed MTP to work with TWRP
This commit is contained in:
commit
f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions
20
arch/x86/Kbuild
Normal file
20
arch/x86/Kbuild
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
obj-$(CONFIG_KVM) += kvm/
|
||||
|
||||
# Xen paravirtualization support
|
||||
obj-$(CONFIG_XEN) += xen/
|
||||
|
||||
# lguest paravirtualization support
|
||||
obj-$(CONFIG_LGUEST_GUEST) += lguest/
|
||||
|
||||
obj-y += realmode/
|
||||
obj-y += kernel/
|
||||
obj-y += mm/
|
||||
|
||||
obj-y += crypto/
|
||||
obj-y += vdso/
|
||||
obj-$(CONFIG_IA32_EMULATION) += ia32/
|
||||
|
||||
obj-y += platform/
|
||||
obj-y += net/
|
||||
|
||||
obj-$(CONFIG_KEXEC_FILE) += purgatory/
|
||||
2515
arch/x86/Kconfig
Normal file
2515
arch/x86/Kconfig
Normal file
File diff suppressed because it is too large
Load diff
470
arch/x86/Kconfig.cpu
Normal file
470
arch/x86/Kconfig.cpu
Normal file
|
|
@ -0,0 +1,470 @@
|
|||
# Put here option for CPU selection and depending optimization
|
||||
choice
|
||||
prompt "Processor family"
|
||||
default M686 if X86_32
|
||||
default GENERIC_CPU if X86_64
|
||||
|
||||
config M486
|
||||
bool "486"
|
||||
depends on X86_32
|
||||
---help---
|
||||
This is the processor type of your CPU. This information is
|
||||
used for optimizing purposes. In order to compile a kernel
|
||||
that can run on all supported x86 CPU types (albeit not
|
||||
optimally fast), you can specify "486" here.
|
||||
|
||||
Note that the 386 is no longer supported, this includes
|
||||
AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI 486DLC/DLC2,
|
||||
UMC 486SX-S and the NexGen Nx586.
|
||||
|
||||
The kernel will not necessarily run on earlier architectures than
|
||||
the one you have chosen, e.g. a Pentium optimized kernel will run on
|
||||
a PPro, but not necessarily on a i486.
|
||||
|
||||
Here are the settings recommended for greatest speed:
|
||||
- "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or
|
||||
SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S.
|
||||
- "586" for generic Pentium CPUs lacking the TSC
|
||||
(time stamp counter) register.
|
||||
- "Pentium-Classic" for the Intel Pentium.
|
||||
- "Pentium-MMX" for the Intel Pentium MMX.
|
||||
- "Pentium-Pro" for the Intel Pentium Pro.
|
||||
- "Pentium-II" for the Intel Pentium II or pre-Coppermine Celeron.
|
||||
- "Pentium-III" for the Intel Pentium III or Coppermine Celeron.
|
||||
- "Pentium-4" for the Intel Pentium 4 or P4-based Celeron.
|
||||
- "K6" for the AMD K6, K6-II and K6-III (aka K6-3D).
|
||||
- "Athlon" for the AMD K7 family (Athlon/Duron/Thunderbird).
|
||||
- "Crusoe" for the Transmeta Crusoe series.
|
||||
- "Efficeon" for the Transmeta Efficeon series.
|
||||
- "Winchip-C6" for original IDT Winchip.
|
||||
- "Winchip-2" for IDT Winchips with 3dNow! capabilities.
|
||||
- "GeodeGX1" for Geode GX1 (Cyrix MediaGX).
|
||||
- "Geode GX/LX" For AMD Geode GX and LX processors.
|
||||
- "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3.
|
||||
- "VIA C3-2" for VIA C3-2 "Nehemiah" (model 9 and above).
|
||||
- "VIA C7" for VIA C7.
|
||||
|
||||
If you don't know what to do, choose "486".
|
||||
|
||||
config M586
|
||||
bool "586/K5/5x86/6x86/6x86MX"
|
||||
depends on X86_32
|
||||
---help---
|
||||
Select this for an 586 or 686 series processor such as the AMD K5,
|
||||
the Cyrix 5x86, 6x86 and 6x86MX. This choice does not
|
||||
assume the RDTSC (Read Time Stamp Counter) instruction.
|
||||
|
||||
config M586TSC
|
||||
bool "Pentium-Classic"
|
||||
depends on X86_32
|
||||
---help---
|
||||
Select this for a Pentium Classic processor with the RDTSC (Read
|
||||
Time Stamp Counter) instruction for benchmarking.
|
||||
|
||||
config M586MMX
|
||||
bool "Pentium-MMX"
|
||||
depends on X86_32
|
||||
---help---
|
||||
Select this for a Pentium with the MMX graphics/multimedia
|
||||
extended instructions.
|
||||
|
||||
config M686
|
||||
bool "Pentium-Pro"
|
||||
depends on X86_32
|
||||
---help---
|
||||
Select this for Intel Pentium Pro chips. This enables the use of
|
||||
Pentium Pro extended instructions, and disables the init-time guard
|
||||
against the f00f bug found in earlier Pentiums.
|
||||
|
||||
config MPENTIUMII
|
||||
bool "Pentium-II/Celeron(pre-Coppermine)"
|
||||
depends on X86_32
|
||||
---help---
|
||||
Select this for Intel chips based on the Pentium-II and
|
||||
pre-Coppermine Celeron core. This option enables an unaligned
|
||||
copy optimization, compiles the kernel with optimization flags
|
||||
tailored for the chip, and applies any applicable Pentium Pro
|
||||
optimizations.
|
||||
|
||||
config MPENTIUMIII
|
||||
bool "Pentium-III/Celeron(Coppermine)/Pentium-III Xeon"
|
||||
depends on X86_32
|
||||
---help---
|
||||
Select this for Intel chips based on the Pentium-III and
|
||||
Celeron-Coppermine core. This option enables use of some
|
||||
extended prefetch instructions in addition to the Pentium II
|
||||
extensions.
|
||||
|
||||
config MPENTIUMM
|
||||
bool "Pentium M"
|
||||
depends on X86_32
|
||||
---help---
|
||||
Select this for Intel Pentium M (not Pentium-4 M)
|
||||
notebook chips.
|
||||
|
||||
config MPENTIUM4
|
||||
bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon"
|
||||
depends on X86_32
|
||||
---help---
|
||||
Select this for Intel Pentium 4 chips. This includes the
|
||||
Pentium 4, Pentium D, P4-based Celeron and Xeon, and
|
||||
Pentium-4 M (not Pentium M) chips. This option enables compile
|
||||
flags optimized for the chip, uses the correct cache line size, and
|
||||
applies any applicable optimizations.
|
||||
|
||||
CPUIDs: F[0-6][1-A] (in /proc/cpuinfo show = cpu family : 15 )
|
||||
|
||||
Select this for:
|
||||
Pentiums (Pentium 4, Pentium D, Celeron, Celeron D) corename:
|
||||
-Willamette
|
||||
-Northwood
|
||||
-Mobile Pentium 4
|
||||
-Mobile Pentium 4 M
|
||||
-Extreme Edition (Gallatin)
|
||||
-Prescott
|
||||
-Prescott 2M
|
||||
-Cedar Mill
|
||||
-Presler
|
||||
-Smithfiled
|
||||
Xeons (Intel Xeon, Xeon MP, Xeon LV, Xeon MV) corename:
|
||||
-Foster
|
||||
-Prestonia
|
||||
-Gallatin
|
||||
-Nocona
|
||||
-Irwindale
|
||||
-Cranford
|
||||
-Potomac
|
||||
-Paxville
|
||||
-Dempsey
|
||||
|
||||
|
||||
config MK6
|
||||
bool "K6/K6-II/K6-III"
|
||||
depends on X86_32
|
||||
---help---
|
||||
Select this for an AMD K6-family processor. Enables use of
|
||||
some extended instructions, and passes appropriate optimization
|
||||
flags to GCC.
|
||||
|
||||
config MK7
|
||||
bool "Athlon/Duron/K7"
|
||||
depends on X86_32
|
||||
---help---
|
||||
Select this for an AMD Athlon K7-family processor. Enables use of
|
||||
some extended instructions, and passes appropriate optimization
|
||||
flags to GCC.
|
||||
|
||||
config MK8
|
||||
bool "Opteron/Athlon64/Hammer/K8"
|
||||
---help---
|
||||
Select this for an AMD Opteron or Athlon64 Hammer-family processor.
|
||||
Enables use of some extended instructions, and passes appropriate
|
||||
optimization flags to GCC.
|
||||
|
||||
config MCRUSOE
|
||||
bool "Crusoe"
|
||||
depends on X86_32
|
||||
---help---
|
||||
Select this for a Transmeta Crusoe processor. Treats the processor
|
||||
like a 586 with TSC, and sets some GCC optimization flags (like a
|
||||
Pentium Pro with no alignment requirements).
|
||||
|
||||
config MEFFICEON
|
||||
bool "Efficeon"
|
||||
depends on X86_32
|
||||
---help---
|
||||
Select this for a Transmeta Efficeon processor.
|
||||
|
||||
config MWINCHIPC6
|
||||
bool "Winchip-C6"
|
||||
depends on X86_32
|
||||
---help---
|
||||
Select this for an IDT Winchip C6 chip. Linux and GCC
|
||||
treat this chip as a 586TSC with some extended instructions
|
||||
and alignment requirements.
|
||||
|
||||
config MWINCHIP3D
|
||||
bool "Winchip-2/Winchip-2A/Winchip-3"
|
||||
depends on X86_32
|
||||
---help---
|
||||
Select this for an IDT Winchip-2, 2A or 3. Linux and GCC
|
||||
treat this chip as a 586TSC with some extended instructions
|
||||
and alignment requirements. Also enable out of order memory
|
||||
stores for this CPU, which can increase performance of some
|
||||
operations.
|
||||
|
||||
config MELAN
|
||||
bool "AMD Elan"
|
||||
depends on X86_32
|
||||
---help---
|
||||
Select this for an AMD Elan processor.
|
||||
|
||||
Do not use this option for K6/Athlon/Opteron processors!
|
||||
|
||||
config MGEODEGX1
|
||||
bool "GeodeGX1"
|
||||
depends on X86_32
|
||||
---help---
|
||||
Select this for a Geode GX1 (Cyrix MediaGX) chip.
|
||||
|
||||
config MGEODE_LX
|
||||
bool "Geode GX/LX"
|
||||
depends on X86_32
|
||||
---help---
|
||||
Select this for AMD Geode GX and LX processors.
|
||||
|
||||
config MCYRIXIII
|
||||
bool "CyrixIII/VIA-C3"
|
||||
depends on X86_32
|
||||
---help---
|
||||
Select this for a Cyrix III or C3 chip. Presently Linux and GCC
|
||||
treat this chip as a generic 586. Whilst the CPU is 686 class,
|
||||
it lacks the cmov extension which gcc assumes is present when
|
||||
generating 686 code.
|
||||
Note that Nehemiah (Model 9) and above will not boot with this
|
||||
kernel due to them lacking the 3DNow! instructions used in earlier
|
||||
incarnations of the CPU.
|
||||
|
||||
config MVIAC3_2
|
||||
bool "VIA C3-2 (Nehemiah)"
|
||||
depends on X86_32
|
||||
---help---
|
||||
Select this for a VIA C3 "Nehemiah". Selecting this enables usage
|
||||
of SSE and tells gcc to treat the CPU as a 686.
|
||||
Note, this kernel will not boot on older (pre model 9) C3s.
|
||||
|
||||
config MVIAC7
|
||||
bool "VIA C7"
|
||||
depends on X86_32
|
||||
---help---
|
||||
Select this for a VIA C7. Selecting this uses the correct cache
|
||||
shift and tells gcc to treat the CPU as a 686.
|
||||
|
||||
config MPSC
|
||||
bool "Intel P4 / older Netburst based Xeon"
|
||||
depends on X86_64
|
||||
---help---
|
||||
Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey
|
||||
Xeon CPUs with Intel 64bit which is compatible with x86-64.
|
||||
Note that the latest Xeons (Xeon 51xx and 53xx) are not based on the
|
||||
Netburst core and shouldn't use this option. You can distinguish them
|
||||
using the cpu family field
|
||||
in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
|
||||
|
||||
config MCORE2
|
||||
bool "Core 2/newer Xeon"
|
||||
---help---
|
||||
|
||||
Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
|
||||
53xx) CPUs. You can distinguish newer from older Xeons by the CPU
|
||||
family in /proc/cpuinfo. Newer ones have 6 and older ones 15
|
||||
(not a typo)
|
||||
|
||||
config MATOM
|
||||
bool "Intel Atom"
|
||||
---help---
|
||||
|
||||
Select this for the Intel Atom platform. Intel Atom CPUs have an
|
||||
in-order pipelining architecture and thus can benefit from
|
||||
accordingly optimized code. Use a recent GCC with specific Atom
|
||||
support in order to fully benefit from selecting this option.
|
||||
|
||||
config GENERIC_CPU
|
||||
bool "Generic-x86-64"
|
||||
depends on X86_64
|
||||
---help---
|
||||
Generic x86-64 CPU.
|
||||
Run equally well on all x86-64 CPUs.
|
||||
|
||||
endchoice
|
||||
|
||||
config X86_GENERIC
|
||||
bool "Generic x86 support"
|
||||
depends on X86_32
|
||||
---help---
|
||||
Instead of just including optimizations for the selected
|
||||
x86 variant (e.g. PII, Crusoe or Athlon), include some more
|
||||
generic optimizations as well. This will make the kernel
|
||||
perform better on x86 CPUs other than that selected.
|
||||
|
||||
This is really intended for distributors who need more
|
||||
generic optimizations.
|
||||
|
||||
#
|
||||
# Define implied options from the CPU selection here
|
||||
config X86_INTERNODE_CACHE_SHIFT
|
||||
int
|
||||
default "12" if X86_VSMP
|
||||
default X86_L1_CACHE_SHIFT
|
||||
|
||||
config X86_L1_CACHE_SHIFT
|
||||
int
|
||||
default "7" if MPENTIUM4 || MPSC
|
||||
default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
|
||||
default "4" if MELAN || M486 || MGEODEGX1
|
||||
default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
|
||||
|
||||
config X86_PPRO_FENCE
|
||||
bool "PentiumPro memory ordering errata workaround"
|
||||
depends on M686 || M586MMX || M586TSC || M586 || M486 || MGEODEGX1
|
||||
---help---
|
||||
Old PentiumPro multiprocessor systems had errata that could cause
|
||||
memory operations to violate the x86 ordering standard in rare cases.
|
||||
Enabling this option will attempt to work around some (but not all)
|
||||
occurrences of this problem, at the cost of much heavier spinlock and
|
||||
memory barrier operations.
|
||||
|
||||
If unsure, say n here. Even distro kernels should think twice before
|
||||
enabling this: there are few systems, and an unlikely bug.
|
||||
|
||||
config X86_F00F_BUG
|
||||
def_bool y
|
||||
depends on M586MMX || M586TSC || M586 || M486
|
||||
|
||||
config X86_INVD_BUG
|
||||
def_bool y
|
||||
depends on M486
|
||||
|
||||
config X86_ALIGNMENT_16
|
||||
def_bool y
|
||||
depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
|
||||
|
||||
config X86_INTEL_USERCOPY
|
||||
def_bool y
|
||||
depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
|
||||
|
||||
config X86_USE_PPRO_CHECKSUM
|
||||
def_bool y
|
||||
depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
|
||||
|
||||
config X86_USE_3DNOW
|
||||
def_bool y
|
||||
depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML
|
||||
|
||||
#
|
||||
# P6_NOPs are a relatively minor optimization that require a family >=
|
||||
# 6 processor, except that it is broken on certain VIA chips.
|
||||
# Furthermore, AMD chips prefer a totally different sequence of NOPs
|
||||
# (which work on all CPUs). In addition, it looks like Virtual PC
|
||||
# does not understand them.
|
||||
#
|
||||
# As a result, disallow these if we're not compiling for X86_64 (these
|
||||
# NOPs do work on all x86-64 capable chips); the list of processors in
|
||||
# the right-hand clause are the cores that benefit from this optimization.
|
||||
#
|
||||
config X86_P6_NOP
|
||||
def_bool y
|
||||
depends on X86_64
|
||||
depends on (MCORE2 || MPENTIUM4 || MPSC)
|
||||
|
||||
config X86_TSC
|
||||
def_bool y
|
||||
depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64
|
||||
|
||||
config X86_CMPXCHG64
|
||||
def_bool y
|
||||
depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM
|
||||
|
||||
# this should be set for all -march=.. options where the compiler
|
||||
# generates cmov.
|
||||
config X86_CMOV
|
||||
def_bool y
|
||||
depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX)
|
||||
|
||||
config X86_MINIMUM_CPU_FAMILY
|
||||
int
|
||||
default "64" if X86_64
|
||||
default "6" if X86_32 && X86_P6_NOP
|
||||
default "5" if X86_32 && X86_CMPXCHG64
|
||||
default "4"
|
||||
|
||||
config X86_DEBUGCTLMSR
|
||||
def_bool y
|
||||
depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486) && !UML
|
||||
|
||||
menuconfig PROCESSOR_SELECT
|
||||
bool "Supported processor vendors" if EXPERT
|
||||
---help---
|
||||
This lets you choose what x86 vendor support code your kernel
|
||||
will include.
|
||||
|
||||
config CPU_SUP_INTEL
|
||||
default y
|
||||
bool "Support Intel processors" if PROCESSOR_SELECT
|
||||
---help---
|
||||
This enables detection, tunings and quirks for Intel processors
|
||||
|
||||
You need this enabled if you want your kernel to run on an
|
||||
Intel CPU. Disabling this option on other types of CPUs
|
||||
makes the kernel a tiny bit smaller. Disabling it on an Intel
|
||||
CPU might render the kernel unbootable.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config CPU_SUP_CYRIX_32
|
||||
default y
|
||||
bool "Support Cyrix processors" if PROCESSOR_SELECT
|
||||
depends on M486 || M586 || M586TSC || M586MMX || (EXPERT && !64BIT)
|
||||
---help---
|
||||
This enables detection, tunings and quirks for Cyrix processors
|
||||
|
||||
You need this enabled if you want your kernel to run on a
|
||||
Cyrix CPU. Disabling this option on other types of CPUs
|
||||
makes the kernel a tiny bit smaller. Disabling it on a Cyrix
|
||||
CPU might render the kernel unbootable.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config CPU_SUP_AMD
|
||||
default y
|
||||
bool "Support AMD processors" if PROCESSOR_SELECT
|
||||
---help---
|
||||
This enables detection, tunings and quirks for AMD processors
|
||||
|
||||
You need this enabled if you want your kernel to run on an
|
||||
AMD CPU. Disabling this option on other types of CPUs
|
||||
makes the kernel a tiny bit smaller. Disabling it on an AMD
|
||||
CPU might render the kernel unbootable.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config CPU_SUP_CENTAUR
|
||||
default y
|
||||
bool "Support Centaur processors" if PROCESSOR_SELECT
|
||||
---help---
|
||||
This enables detection, tunings and quirks for Centaur processors
|
||||
|
||||
You need this enabled if you want your kernel to run on a
|
||||
Centaur CPU. Disabling this option on other types of CPUs
|
||||
makes the kernel a tiny bit smaller. Disabling it on a Centaur
|
||||
CPU might render the kernel unbootable.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config CPU_SUP_TRANSMETA_32
|
||||
default y
|
||||
bool "Support Transmeta processors" if PROCESSOR_SELECT
|
||||
depends on !64BIT
|
||||
---help---
|
||||
This enables detection, tunings and quirks for Transmeta processors
|
||||
|
||||
You need this enabled if you want your kernel to run on a
|
||||
Transmeta CPU. Disabling this option on other types of CPUs
|
||||
makes the kernel a tiny bit smaller. Disabling it on a Transmeta
|
||||
CPU might render the kernel unbootable.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config CPU_SUP_UMC_32
|
||||
default y
|
||||
bool "Support UMC processors" if PROCESSOR_SELECT
|
||||
depends on M486 || (EXPERT && !64BIT)
|
||||
---help---
|
||||
This enables detection, tunings and quirks for UMC processors
|
||||
|
||||
You need this enabled if you want your kernel to run on a
|
||||
UMC CPU. Disabling this option on other types of CPUs
|
||||
makes the kernel a tiny bit smaller. Disabling it on a UMC
|
||||
CPU might render the kernel unbootable.
|
||||
|
||||
If unsure, say N.
|
||||
326
arch/x86/Kconfig.debug
Normal file
326
arch/x86/Kconfig.debug
Normal file
|
|
@ -0,0 +1,326 @@
|
|||
menu "Kernel hacking"
|
||||
|
||||
config TRACE_IRQFLAGS_SUPPORT
|
||||
def_bool y
|
||||
|
||||
source "lib/Kconfig.debug"
|
||||
|
||||
config STRICT_DEVMEM
|
||||
bool "Filter access to /dev/mem"
|
||||
---help---
|
||||
If this option is disabled, you allow userspace (root) access to all
|
||||
of memory, including kernel and userspace memory. Accidental
|
||||
access to this is obviously disastrous, but specific access can
|
||||
be used by people debugging the kernel. Note that with PAT support
|
||||
enabled, even in this case there are restrictions on /dev/mem
|
||||
use due to the cache aliasing requirements.
|
||||
|
||||
If this option is switched on, the /dev/mem file only allows
|
||||
userspace access to PCI space and the BIOS code and data regions.
|
||||
This is sufficient for dosemu and X and all common users of
|
||||
/dev/mem.
|
||||
|
||||
If in doubt, say Y.
|
||||
|
||||
config X86_VERBOSE_BOOTUP
|
||||
bool "Enable verbose x86 bootup info messages"
|
||||
default y
|
||||
---help---
|
||||
Enables the informational output from the decompression stage
|
||||
(e.g. bzImage) of the boot. If you disable this you will still
|
||||
see errors. Disable this if you want silent bootup.
|
||||
|
||||
config EARLY_PRINTK
|
||||
bool "Early printk" if EXPERT
|
||||
default y
|
||||
---help---
|
||||
Write kernel log output directly into the VGA buffer or to a serial
|
||||
port.
|
||||
|
||||
This is useful for kernel debugging when your machine crashes very
|
||||
early before the console code is initialized. For normal operation
|
||||
it is not recommended because it looks ugly and doesn't cooperate
|
||||
with klogd/syslogd or the X server. You should normally N here,
|
||||
unless you want to debug such a crash.
|
||||
|
||||
config EARLY_PRINTK_INTEL_MID
|
||||
bool "Early printk for Intel MID platform support"
|
||||
depends on EARLY_PRINTK && X86_INTEL_MID
|
||||
|
||||
config EARLY_PRINTK_DBGP
|
||||
bool "Early printk via EHCI debug port"
|
||||
depends on EARLY_PRINTK && PCI
|
||||
---help---
|
||||
Write kernel log output directly into the EHCI debug port.
|
||||
|
||||
This is useful for kernel debugging when your machine crashes very
|
||||
early before the console code is initialized. For normal operation
|
||||
it is not recommended because it looks ugly and doesn't cooperate
|
||||
with klogd/syslogd or the X server. You should normally N here,
|
||||
unless you want to debug such a crash. You need usb debug device.
|
||||
|
||||
config EARLY_PRINTK_EFI
|
||||
bool "Early printk via the EFI framebuffer"
|
||||
depends on EFI && EARLY_PRINTK
|
||||
select FONT_SUPPORT
|
||||
---help---
|
||||
Write kernel log output directly into the EFI framebuffer.
|
||||
|
||||
This is useful for kernel debugging when your machine crashes very
|
||||
early before the console code is initialized.
|
||||
|
||||
config X86_PTDUMP
|
||||
bool "Export kernel pagetable layout to userspace via debugfs"
|
||||
depends on DEBUG_KERNEL
|
||||
select DEBUG_FS
|
||||
---help---
|
||||
Say Y here if you want to show the kernel pagetable layout in a
|
||||
debugfs file. This information is only useful for kernel developers
|
||||
who are working in architecture specific areas of the kernel.
|
||||
It is probably not a good idea to enable this feature in a production
|
||||
kernel.
|
||||
If in doubt, say "N"
|
||||
|
||||
config EFI_PGT_DUMP
|
||||
bool "Dump the EFI pagetable"
|
||||
depends on EFI && X86_PTDUMP
|
||||
---help---
|
||||
Enable this if you want to dump the EFI page table before
|
||||
enabling virtual mode. This can be used to debug miscellaneous
|
||||
issues with the mapping of the EFI runtime regions into that
|
||||
table.
|
||||
|
||||
config DEBUG_RODATA
|
||||
bool "Write protect kernel read-only data structures"
|
||||
default y
|
||||
depends on DEBUG_KERNEL
|
||||
---help---
|
||||
Mark the kernel read-only data as write-protected in the pagetables,
|
||||
in order to catch accidental (and incorrect) writes to such const
|
||||
data. This is recommended so that we can catch kernel bugs sooner.
|
||||
If in doubt, say "Y".
|
||||
|
||||
config DEBUG_RODATA_TEST
|
||||
bool "Testcase for the DEBUG_RODATA feature"
|
||||
depends on DEBUG_RODATA
|
||||
default y
|
||||
---help---
|
||||
This option enables a testcase for the DEBUG_RODATA
|
||||
feature as well as for the change_page_attr() infrastructure.
|
||||
If in doubt, say "N"
|
||||
|
||||
config DEBUG_SET_MODULE_RONX
|
||||
bool "Set loadable kernel module data as NX and text as RO"
|
||||
depends on MODULES
|
||||
---help---
|
||||
This option helps catch unintended modifications to loadable
|
||||
kernel module's text and read-only data. It also prevents execution
|
||||
of module data. Such protection may interfere with run-time code
|
||||
patching and dynamic kernel tracing - and they might also protect
|
||||
against certain classes of kernel exploits.
|
||||
If in doubt, say "N".
|
||||
|
||||
config DEBUG_NX_TEST
|
||||
tristate "Testcase for the NX non-executable stack feature"
|
||||
depends on DEBUG_KERNEL && m
|
||||
---help---
|
||||
This option enables a testcase for the CPU NX capability
|
||||
and the software setup of this feature.
|
||||
If in doubt, say "N"
|
||||
|
||||
config DOUBLEFAULT
|
||||
default y
|
||||
bool "Enable doublefault exception handler" if EXPERT
|
||||
---help---
|
||||
This option allows trapping of rare doublefault exceptions that
|
||||
would otherwise cause a system to silently reboot. Disabling this
|
||||
option saves about 4k and might cause you much additional grey
|
||||
hair.
|
||||
|
||||
config DEBUG_TLBFLUSH
|
||||
bool "Set upper limit of TLB entries to flush one-by-one"
|
||||
depends on DEBUG_KERNEL
|
||||
---help---
|
||||
|
||||
X86-only for now.
|
||||
|
||||
This option allows the user to tune the amount of TLB entries the
|
||||
kernel flushes one-by-one instead of doing a full TLB flush. In
|
||||
certain situations, the former is cheaper. This is controlled by the
|
||||
tlb_flushall_shift knob under /sys/kernel/debug/x86. If you set it
|
||||
to -1, the code flushes the whole TLB unconditionally. Otherwise,
|
||||
for positive values of it, the kernel will use single TLB entry
|
||||
invalidating instructions according to the following formula:
|
||||
|
||||
flush_entries <= active_tlb_entries / 2^tlb_flushall_shift
|
||||
|
||||
If in doubt, say "N".
|
||||
|
||||
config IOMMU_DEBUG
|
||||
bool "Enable IOMMU debugging"
|
||||
depends on GART_IOMMU && DEBUG_KERNEL
|
||||
depends on X86_64
|
||||
---help---
|
||||
Force the IOMMU to on even when you have less than 4GB of
|
||||
memory and add debugging code. On overflow always panic. And
|
||||
allow to enable IOMMU leak tracing. Can be disabled at boot
|
||||
time with iommu=noforce. This will also enable scatter gather
|
||||
list merging. Currently not recommended for production
|
||||
code. When you use it make sure you have a big enough
|
||||
IOMMU/AGP aperture. Most of the options enabled by this can
|
||||
be set more finegrained using the iommu= command line
|
||||
options. See Documentation/x86/x86_64/boot-options.txt for more
|
||||
details.
|
||||
|
||||
config IOMMU_STRESS
|
||||
bool "Enable IOMMU stress-test mode"
|
||||
---help---
|
||||
This option disables various optimizations in IOMMU related
|
||||
code to do real stress testing of the IOMMU code. This option
|
||||
will cause a performance drop and should only be enabled for
|
||||
testing.
|
||||
|
||||
config IOMMU_LEAK
|
||||
bool "IOMMU leak tracing"
|
||||
depends on IOMMU_DEBUG && DMA_API_DEBUG
|
||||
---help---
|
||||
Add a simple leak tracer to the IOMMU code. This is useful when you
|
||||
are debugging a buggy device driver that leaks IOMMU mappings.
|
||||
|
||||
config HAVE_MMIOTRACE_SUPPORT
|
||||
def_bool y
|
||||
|
||||
config X86_DECODER_SELFTEST
|
||||
bool "x86 instruction decoder selftest"
|
||||
depends on DEBUG_KERNEL && KPROBES
|
||||
depends on !COMPILE_TEST
|
||||
---help---
|
||||
Perform x86 instruction decoder selftests at build time.
|
||||
This option is useful for checking the sanity of x86 instruction
|
||||
decoder code.
|
||||
If unsure, say "N".
|
||||
|
||||
#
|
||||
# IO delay types:
|
||||
#
|
||||
|
||||
config IO_DELAY_TYPE_0X80
|
||||
int
|
||||
default "0"
|
||||
|
||||
config IO_DELAY_TYPE_0XED
|
||||
int
|
||||
default "1"
|
||||
|
||||
config IO_DELAY_TYPE_UDELAY
|
||||
int
|
||||
default "2"
|
||||
|
||||
config IO_DELAY_TYPE_NONE
|
||||
int
|
||||
default "3"
|
||||
|
||||
choice
|
||||
prompt "IO delay type"
|
||||
default IO_DELAY_0X80
|
||||
|
||||
config IO_DELAY_0X80
|
||||
bool "port 0x80 based port-IO delay [recommended]"
|
||||
---help---
|
||||
This is the traditional Linux IO delay used for in/out_p.
|
||||
It is the most tested hence safest selection here.
|
||||
|
||||
config IO_DELAY_0XED
|
||||
bool "port 0xed based port-IO delay"
|
||||
---help---
|
||||
Use port 0xed as the IO delay. This frees up port 0x80 which is
|
||||
often used as a hardware-debug port.
|
||||
|
||||
config IO_DELAY_UDELAY
|
||||
bool "udelay based port-IO delay"
|
||||
---help---
|
||||
Use udelay(2) as the IO delay method. This provides the delay
|
||||
while not having any side-effect on the IO port space.
|
||||
|
||||
config IO_DELAY_NONE
|
||||
bool "no port-IO delay"
|
||||
---help---
|
||||
No port-IO delay. Will break on old boxes that require port-IO
|
||||
delay for certain operations. Should work on most new machines.
|
||||
|
||||
endchoice
|
||||
|
||||
if IO_DELAY_0X80
|
||||
config DEFAULT_IO_DELAY_TYPE
|
||||
int
|
||||
default IO_DELAY_TYPE_0X80
|
||||
endif
|
||||
|
||||
if IO_DELAY_0XED
|
||||
config DEFAULT_IO_DELAY_TYPE
|
||||
int
|
||||
default IO_DELAY_TYPE_0XED
|
||||
endif
|
||||
|
||||
if IO_DELAY_UDELAY
|
||||
config DEFAULT_IO_DELAY_TYPE
|
||||
int
|
||||
default IO_DELAY_TYPE_UDELAY
|
||||
endif
|
||||
|
||||
if IO_DELAY_NONE
|
||||
config DEFAULT_IO_DELAY_TYPE
|
||||
int
|
||||
default IO_DELAY_TYPE_NONE
|
||||
endif
|
||||
|
||||
config DEBUG_BOOT_PARAMS
|
||||
bool "Debug boot parameters"
|
||||
depends on DEBUG_KERNEL
|
||||
depends on DEBUG_FS
|
||||
---help---
|
||||
This option will cause struct boot_params to be exported via debugfs.
|
||||
|
||||
config CPA_DEBUG
|
||||
bool "CPA self-test code"
|
||||
depends on DEBUG_KERNEL
|
||||
---help---
|
||||
Do change_page_attr() self-tests every 30 seconds.
|
||||
|
||||
config OPTIMIZE_INLINING
|
||||
bool "Allow gcc to uninline functions marked 'inline'"
|
||||
---help---
|
||||
This option determines if the kernel forces gcc to inline the functions
|
||||
developers have marked 'inline'. Doing so takes away freedom from gcc to
|
||||
do what it thinks is best, which is desirable for the gcc 3.x series of
|
||||
compilers. The gcc 4.x series have a rewritten inlining algorithm and
|
||||
enabling this option will generate a smaller kernel there. Hopefully
|
||||
this algorithm is so good that allowing gcc 4.x and above to make the
|
||||
decision will become the default in the future. Until then this option
|
||||
is there to test gcc for this.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config DEBUG_NMI_SELFTEST
|
||||
bool "NMI Selftest"
|
||||
depends on DEBUG_KERNEL && X86_LOCAL_APIC
|
||||
---help---
|
||||
Enabling this option turns on a quick NMI selftest to verify
|
||||
that the NMI behaves correctly.
|
||||
|
||||
This might help diagnose strange hangs that rely on NMI to
|
||||
function properly.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config X86_DEBUG_STATIC_CPU_HAS
|
||||
bool "Debug alternatives"
|
||||
depends on DEBUG_KERNEL
|
||||
---help---
|
||||
This option causes additional code to be generated which
|
||||
fails if static_cpu_has() is used before alternatives have
|
||||
run.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
endmenu
|
||||
265
arch/x86/Makefile
Normal file
265
arch/x86/Makefile
Normal file
|
|
@ -0,0 +1,265 @@
|
|||
# Unified Makefile for i386 and x86_64
|
||||
|
||||
# select defconfig based on actual architecture
|
||||
ifeq ($(ARCH),x86)
|
||||
ifeq ($(shell uname -m),x86_64)
|
||||
KBUILD_DEFCONFIG := x86_64_defconfig
|
||||
else
|
||||
KBUILD_DEFCONFIG := i386_defconfig
|
||||
endif
|
||||
else
|
||||
KBUILD_DEFCONFIG := $(ARCH)_defconfig
|
||||
endif
|
||||
|
||||
# How to compile the 16-bit code. Note we always compile for -march=i386;
|
||||
# that way we can complain to the user if the CPU is insufficient.
|
||||
#
|
||||
# The -m16 option is supported by GCC >= 4.9 and clang >= 3.5. For
|
||||
# older versions of GCC, include an *assembly* header to make sure that
|
||||
# gcc doesn't play any games behind our back.
|
||||
CODE16GCC_CFLAGS := -m32 -Wa,$(srctree)/arch/x86/boot/code16gcc.h
|
||||
M16_CFLAGS := $(call cc-option, -m16, $(CODE16GCC_CFLAGS))
|
||||
|
||||
REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -D__KERNEL__ \
|
||||
-DDISABLE_BRANCH_PROFILING \
|
||||
-Wall -Wstrict-prototypes -march=i386 -mregparm=3 \
|
||||
-fno-strict-aliasing -fomit-frame-pointer -fno-pic \
|
||||
-mno-mmx -mno-sse \
|
||||
$(call cc-option, -ffreestanding) \
|
||||
$(call cc-option, -fno-stack-protector) \
|
||||
$(call cc-option, -mpreferred-stack-boundary=2)
|
||||
export REALMODE_CFLAGS
|
||||
|
||||
# BITS is used as extension for files which are available in a 32 bit
|
||||
# and a 64 bit version to simplify shared Makefiles.
|
||||
# e.g.: obj-y += foo_$(BITS).o
|
||||
export BITS
|
||||
|
||||
ifdef CONFIG_X86_NEED_RELOCS
|
||||
LDFLAGS_vmlinux := --emit-relocs
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_X86_32),y)
|
||||
BITS := 32
|
||||
UTS_MACHINE := i386
|
||||
CHECKFLAGS += -D__i386__
|
||||
|
||||
biarch := $(call cc-option,-m32)
|
||||
KBUILD_AFLAGS += $(biarch)
|
||||
KBUILD_CFLAGS += $(biarch)
|
||||
|
||||
KBUILD_CFLAGS += -msoft-float -mregparm=3 -freg-struct-return
|
||||
|
||||
# Never want PIC in a 32-bit kernel, prevent breakage with GCC built
|
||||
# with nonstandard options
|
||||
KBUILD_CFLAGS += -fno-pic
|
||||
|
||||
# prevent gcc from keeping the stack 16 byte aligned
|
||||
KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2)
|
||||
|
||||
# Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
|
||||
# a lot more stack due to the lack of sharing of stacklots:
|
||||
KBUILD_CFLAGS += $(call cc-ifversion, -lt, 0400, \
|
||||
$(call cc-option,-fno-unit-at-a-time))
|
||||
|
||||
# CPU-specific tuning. Anything which can be shared with UML should go here.
|
||||
include $(srctree)/arch/x86/Makefile_32.cpu
|
||||
KBUILD_CFLAGS += $(cflags-y)
|
||||
|
||||
# temporary until string.h is fixed
|
||||
KBUILD_CFLAGS += -ffreestanding
|
||||
else
|
||||
BITS := 64
|
||||
UTS_MACHINE := x86_64
|
||||
CHECKFLAGS += -D__x86_64__ -m64
|
||||
|
||||
biarch := -m64
|
||||
KBUILD_AFLAGS += -m64
|
||||
KBUILD_CFLAGS += -m64
|
||||
|
||||
# Don't autogenerate traditional x87 instructions
|
||||
KBUILD_CFLAGS += $(call cc-option,-mno-80387)
|
||||
KBUILD_CFLAGS += $(call cc-option,-mno-fp-ret-in-387)
|
||||
|
||||
# Use -mpreferred-stack-boundary=3 if supported.
|
||||
KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3)
|
||||
|
||||
# FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
|
||||
cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
|
||||
cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
|
||||
|
||||
cflags-$(CONFIG_MCORE2) += \
|
||||
$(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
|
||||
cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \
|
||||
$(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
|
||||
cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
|
||||
KBUILD_CFLAGS += $(cflags-y)
|
||||
|
||||
KBUILD_CFLAGS += -mno-red-zone
|
||||
KBUILD_CFLAGS += -mcmodel=kernel
|
||||
|
||||
# -funit-at-a-time shrinks the kernel .text considerably
|
||||
# unfortunately it makes reading oopses harder.
|
||||
KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time)
|
||||
|
||||
# this works around some issues with generating unwind tables in older gccs
|
||||
# newer gccs do it by default
|
||||
KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args)
|
||||
endif
|
||||
|
||||
# Make sure compiler does not have buggy stack-protector support.
|
||||
ifdef CONFIG_CC_STACKPROTECTOR
|
||||
cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh
|
||||
ifneq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(KBUILD_CPPFLAGS) $(biarch)),y)
|
||||
$(warning stack-protector enabled but compiler support broken)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef CONFIG_X86_X32
|
||||
x32_ld_ok := $(call try-run,\
|
||||
/bin/echo -e '1: .quad 1b' | \
|
||||
$(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" - && \
|
||||
$(OBJCOPY) -O elf32-x86-64 "$$TMP" "$$TMPO" && \
|
||||
$(LD) -m elf32_x86_64 "$$TMPO" -o "$$TMP",y,n)
|
||||
ifeq ($(x32_ld_ok),y)
|
||||
CONFIG_X86_X32_ABI := y
|
||||
KBUILD_AFLAGS += -DCONFIG_X86_X32_ABI
|
||||
KBUILD_CFLAGS += -DCONFIG_X86_X32_ABI
|
||||
else
|
||||
$(warning CONFIG_X86_X32 enabled but no binutils support)
|
||||
endif
|
||||
endif
|
||||
export CONFIG_X86_X32_ABI
|
||||
|
||||
# Don't unroll struct assignments with kmemcheck enabled
|
||||
ifeq ($(CONFIG_KMEMCHECK),y)
|
||||
KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy)
|
||||
endif
|
||||
|
||||
# Stackpointer is addressed different for 32 bit and 64 bit x86
|
||||
sp-$(CONFIG_X86_32) := esp
|
||||
sp-$(CONFIG_X86_64) := rsp
|
||||
|
||||
# do binutils support CFI?
|
||||
cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_endproc,-DCONFIG_AS_CFI=1)
|
||||
# is .cfi_signal_frame supported too?
|
||||
cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1)
|
||||
cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1)
|
||||
|
||||
# does binutils support specific instructions?
|
||||
asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
|
||||
asinstr += $(call as-instr,crc32l %eax$(comma)%eax,-DCONFIG_AS_CRC32=1)
|
||||
avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
|
||||
avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
|
||||
|
||||
KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr)
|
||||
KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr)
|
||||
|
||||
LDFLAGS := -m elf_$(UTS_MACHINE)
|
||||
|
||||
# Speed up the build
|
||||
KBUILD_CFLAGS += -pipe
|
||||
# Workaround for a gcc prelease that unfortunately was shipped in a suse release
|
||||
KBUILD_CFLAGS += -Wno-sign-compare
|
||||
#
|
||||
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
|
||||
# prevent gcc from generating any FP code by mistake
|
||||
KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow
|
||||
KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
|
||||
|
||||
KBUILD_CFLAGS += $(mflags-y)
|
||||
KBUILD_AFLAGS += $(mflags-y)
|
||||
|
||||
archscripts: scripts_basic
|
||||
$(Q)$(MAKE) $(build)=arch/x86/tools relocs
|
||||
|
||||
###
|
||||
# Syscall table generation
|
||||
|
||||
archheaders:
|
||||
$(Q)$(MAKE) $(build)=arch/x86/syscalls all
|
||||
|
||||
archprepare:
|
||||
ifeq ($(CONFIG_KEXEC_FILE),y)
|
||||
$(Q)$(MAKE) $(build)=arch/x86/purgatory arch/x86/purgatory/kexec-purgatory.c
|
||||
endif
|
||||
|
||||
###
|
||||
# Kernel objects
|
||||
|
||||
head-y := arch/x86/kernel/head_$(BITS).o
|
||||
head-y += arch/x86/kernel/head$(BITS).o
|
||||
head-y += arch/x86/kernel/head.o
|
||||
|
||||
libs-y += arch/x86/lib/
|
||||
|
||||
# See arch/x86/Kbuild for content of core part of the kernel
|
||||
core-y += arch/x86/
|
||||
|
||||
# drivers-y are linked after core-y
|
||||
drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/
|
||||
drivers-$(CONFIG_PCI) += arch/x86/pci/
|
||||
|
||||
# must be linked after kernel/
|
||||
drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/
|
||||
|
||||
# suspend and hibernation support
|
||||
drivers-$(CONFIG_PM) += arch/x86/power/
|
||||
|
||||
drivers-$(CONFIG_FB) += arch/x86/video/
|
||||
|
||||
####
|
||||
# boot loader support. Several targets are kept for legacy purposes
|
||||
|
||||
boot := arch/x86/boot
|
||||
|
||||
BOOT_TARGETS = bzlilo bzdisk fdimage fdimage144 fdimage288 isoimage
|
||||
|
||||
PHONY += bzImage $(BOOT_TARGETS)
|
||||
|
||||
# Default kernel to build
|
||||
all: bzImage
|
||||
|
||||
# KBUILD_IMAGE specify target image being built
|
||||
KBUILD_IMAGE := $(boot)/bzImage
|
||||
|
||||
bzImage: vmlinux
|
||||
ifeq ($(CONFIG_X86_DECODER_SELFTEST),y)
|
||||
$(Q)$(MAKE) $(build)=arch/x86/tools posttest
|
||||
endif
|
||||
$(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
|
||||
$(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot
|
||||
$(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@
|
||||
|
||||
$(BOOT_TARGETS): vmlinux
|
||||
$(Q)$(MAKE) $(build)=$(boot) $@
|
||||
|
||||
PHONY += install
|
||||
install:
|
||||
$(Q)$(MAKE) $(build)=$(boot) $@
|
||||
|
||||
PHONY += vdso_install
|
||||
vdso_install:
|
||||
$(Q)$(MAKE) $(build)=arch/x86/vdso $@
|
||||
|
||||
archclean:
|
||||
$(Q)rm -rf $(objtree)/arch/i386
|
||||
$(Q)rm -rf $(objtree)/arch/x86_64
|
||||
$(Q)$(MAKE) $(clean)=$(boot)
|
||||
$(Q)$(MAKE) $(clean)=arch/x86/tools
|
||||
$(Q)$(MAKE) $(clean)=arch/x86/purgatory
|
||||
|
||||
define archhelp
|
||||
echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)'
|
||||
echo ' install - Install kernel using'
|
||||
echo ' (your) ~/bin/$(INSTALLKERNEL) or'
|
||||
echo ' (distribution) /sbin/$(INSTALLKERNEL) or'
|
||||
echo ' install to $$(INSTALL_PATH) and run lilo'
|
||||
echo ' fdimage - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
|
||||
echo ' fdimage144 - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
|
||||
echo ' fdimage288 - Create 2.8MB boot floppy image (arch/x86/boot/fdimage)'
|
||||
echo ' isoimage - Create a boot CD-ROM image (arch/x86/boot/image.iso)'
|
||||
echo ' bzdisk/fdimage*/isoimage also accept:'
|
||||
echo ' FDARGS="..." arguments for the booted kernel'
|
||||
echo ' FDINITRD=file initrd for the booted kernel'
|
||||
endef
|
||||
60
arch/x86/Makefile.um
Normal file
60
arch/x86/Makefile.um
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
core-y += arch/x86/crypto/
|
||||
|
||||
ifeq ($(CONFIG_X86_32),y)
|
||||
START := 0x8048000
|
||||
|
||||
LDFLAGS += -m elf_i386
|
||||
ELF_ARCH := i386
|
||||
ELF_FORMAT := elf32-i386
|
||||
CHECKFLAGS += -D__i386__
|
||||
|
||||
KBUILD_CFLAGS += $(call cc-option,-m32)
|
||||
KBUILD_AFLAGS += $(call cc-option,-m32)
|
||||
LINK-y += $(call cc-option,-m32)
|
||||
|
||||
export LDFLAGS
|
||||
|
||||
LDS_EXTRA := -Ui386
|
||||
export LDS_EXTRA
|
||||
|
||||
# First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y.
|
||||
include $(srctree)/arch/x86/Makefile_32.cpu
|
||||
|
||||
# prevent gcc from keeping the stack 16 byte aligned. Taken from i386.
|
||||
cflags-y += $(call cc-option,-mpreferred-stack-boundary=2)
|
||||
|
||||
# Prevent sprintf in nfsd from being converted to strcpy and resulting in
|
||||
# an unresolved reference.
|
||||
cflags-y += -ffreestanding
|
||||
|
||||
# Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
|
||||
# a lot more stack due to the lack of sharing of stacklots. Also, gcc
|
||||
# 4.3.0 needs -funit-at-a-time for extern inline functions.
|
||||
KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then \
|
||||
echo $(call cc-option,-fno-unit-at-a-time); \
|
||||
else echo $(call cc-option,-funit-at-a-time); fi ;)
|
||||
|
||||
KBUILD_CFLAGS += $(cflags-y)
|
||||
|
||||
else
|
||||
|
||||
START := 0x60000000
|
||||
|
||||
KBUILD_CFLAGS += -fno-builtin -m64
|
||||
|
||||
CHECKFLAGS += -m64 -D__x86_64__
|
||||
KBUILD_AFLAGS += -m64
|
||||
LDFLAGS += -m elf_x86_64
|
||||
KBUILD_CPPFLAGS += -m64
|
||||
|
||||
ELF_ARCH := i386:x86-64
|
||||
ELF_FORMAT := elf64-x86-64
|
||||
|
||||
# Not on all 64-bit distros /lib is a symlink to /lib64. PLD is an example.
|
||||
|
||||
LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib64
|
||||
LINK-y += -m64
|
||||
|
||||
# Do unit-at-a-time unconditionally on x86_64, following the host
|
||||
KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time)
|
||||
endif
|
||||
70
arch/x86/Makefile_32.cpu
Normal file
70
arch/x86/Makefile_32.cpu
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
# CPU tuning section - shared with UML.
|
||||
# Must change only cflags-y (or [yn]), not CFLAGS! That makes a difference for UML.
|
||||
|
||||
#-mtune exists since gcc 3.4
|
||||
HAS_MTUNE := $(call cc-option-yn, -mtune=i386)
|
||||
ifeq ($(HAS_MTUNE),y)
|
||||
tune = $(call cc-option,-mtune=$(1),$(2))
|
||||
else
|
||||
tune = $(call cc-option,-mcpu=$(1),$(2))
|
||||
endif
|
||||
|
||||
align := $(cc-option-align)
|
||||
cflags-$(CONFIG_M486) += -march=i486
|
||||
cflags-$(CONFIG_M586) += -march=i586
|
||||
cflags-$(CONFIG_M586TSC) += -march=i586
|
||||
cflags-$(CONFIG_M586MMX) += -march=pentium-mmx
|
||||
cflags-$(CONFIG_M686) += -march=i686
|
||||
cflags-$(CONFIG_MPENTIUMII) += -march=i686 $(call tune,pentium2)
|
||||
cflags-$(CONFIG_MPENTIUMIII) += -march=i686 $(call tune,pentium3)
|
||||
cflags-$(CONFIG_MPENTIUMM) += -march=i686 $(call tune,pentium3)
|
||||
cflags-$(CONFIG_MPENTIUM4) += -march=i686 $(call tune,pentium4)
|
||||
cflags-$(CONFIG_MK6) += -march=k6
|
||||
# Please note, that patches that add -march=athlon-xp and friends are pointless.
|
||||
# They make zero difference whatsosever to performance at this time.
|
||||
cflags-$(CONFIG_MK7) += -march=athlon
|
||||
cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon)
|
||||
cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
|
||||
cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
|
||||
cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586)
|
||||
cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586)
|
||||
cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
|
||||
cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686)
|
||||
cflags-$(CONFIG_MVIAC7) += -march=i686
|
||||
cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2)
|
||||
cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \
|
||||
$(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
|
||||
|
||||
# AMD Elan support
|
||||
cflags-$(CONFIG_MELAN) += -march=i486
|
||||
|
||||
# Geode GX1 support
|
||||
cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx
|
||||
cflags-$(CONFIG_MGEODE_LX) += $(call cc-option,-march=geode,-march=pentium-mmx)
|
||||
# add at the end to overwrite eventual tuning options from earlier
|
||||
# cpu entries
|
||||
cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686))
|
||||
|
||||
# Work around the pentium-mmx code generator madness of gcc4.4.x which
|
||||
# does stack alignment by generating horrible code _before_ the mcount
|
||||
# prologue (push %ebp, mov %esp, %ebp) which breaks the function graph
|
||||
# tracer assumptions. For i686, generic, core2 this is set by the
|
||||
# compiler anyway
|
||||
ifeq ($(CONFIG_FUNCTION_GRAPH_TRACER), y)
|
||||
ADD_ACCUMULATE_OUTGOING_ARGS := y
|
||||
endif
|
||||
|
||||
# Work around to a bug with asm goto with first implementations of it
|
||||
# in gcc causing gcc to mess up the push and pop of the stack in some
|
||||
# uses of asm goto.
|
||||
ifeq ($(CONFIG_JUMP_LABEL), y)
|
||||
ADD_ACCUMULATE_OUTGOING_ARGS := y
|
||||
endif
|
||||
|
||||
cflags-$(ADD_ACCUMULATE_OUTGOING_ARGS) += $(call cc-option,-maccumulate-outgoing-args)
|
||||
|
||||
# Bug fix for binutils: this option is required in order to keep
|
||||
# binutils from generating NOPL instructions against our will.
|
||||
ifneq ($(CONFIG_X86_P6_NOP),y)
|
||||
cflags-y += $(call cc-option,-Wa$(comma)-mtune=generic32,)
|
||||
endif
|
||||
187
arch/x86/boot/Makefile
Normal file
187
arch/x86/boot/Makefile
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
#
|
||||
# arch/x86/boot/Makefile
|
||||
#
|
||||
# This file is subject to the terms and conditions of the GNU General Public
|
||||
# License. See the file "COPYING" in the main directory of this archive
|
||||
# for more details.
|
||||
#
|
||||
# Copyright (C) 1994 by Linus Torvalds
|
||||
# Changed by many, many contributors over the years.
|
||||
#
|
||||
|
||||
# If you want to preset the SVGA mode, uncomment the next line and
|
||||
# set SVGA_MODE to whatever number you want.
|
||||
# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode.
|
||||
# The number is the same as you would ordinarily press at bootup.
|
||||
|
||||
SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
|
||||
|
||||
targets := vmlinux.bin setup.bin setup.elf bzImage
|
||||
targets += fdimage fdimage144 fdimage288 image.iso mtools.conf
|
||||
subdir- := compressed
|
||||
|
||||
setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpuflags.o cpucheck.o
|
||||
setup-y += early_serial_console.o edd.o header.o main.o mca.o memory.o
|
||||
setup-y += pm.o pmjump.o printf.o regs.o string.o tty.o video.o
|
||||
setup-y += video-mode.o version.o
|
||||
setup-$(CONFIG_X86_APM_BOOT) += apm.o
|
||||
|
||||
# The link order of the video-*.o modules can matter. In particular,
|
||||
# video-vga.o *must* be listed first, followed by video-vesa.o.
|
||||
# Hardware-specific drivers should follow in the order they should be
|
||||
# probed, and video-bios.o should typically be last.
|
||||
setup-y += video-vga.o
|
||||
setup-y += video-vesa.o
|
||||
setup-y += video-bios.o
|
||||
|
||||
targets += $(setup-y)
|
||||
hostprogs-y := tools/build
|
||||
hostprogs-$(CONFIG_X86_FEATURE_NAMES) += mkcpustr
|
||||
|
||||
HOST_EXTRACFLAGS += -I$(srctree)/tools/include \
|
||||
-include include/generated/autoconf.h \
|
||||
-D__EXPORTED_HEADERS__
|
||||
|
||||
ifdef CONFIG_X86_FEATURE_NAMES
|
||||
$(obj)/cpu.o: $(obj)/cpustr.h
|
||||
|
||||
quiet_cmd_cpustr = CPUSTR $@
|
||||
cmd_cpustr = $(obj)/mkcpustr > $@
|
||||
targets += cpustr.h
|
||||
$(obj)/cpustr.h: $(obj)/mkcpustr FORCE
|
||||
$(call if_changed,cpustr)
|
||||
endif
|
||||
clean-files += cpustr.h
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
KBUILD_CFLAGS := $(USERINCLUDE) $(REALMODE_CFLAGS) -D_SETUP
|
||||
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
|
||||
GCOV_PROFILE := n
|
||||
|
||||
$(obj)/bzImage: asflags-y := $(SVGA_MODE)
|
||||
|
||||
quiet_cmd_image = BUILD $@
|
||||
cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \
|
||||
$(obj)/zoffset.h $@
|
||||
|
||||
$(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE
|
||||
$(call if_changed,image)
|
||||
@echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
|
||||
|
||||
OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S
|
||||
$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
|
||||
$(call if_changed,objcopy)
|
||||
|
||||
SETUP_OBJS = $(addprefix $(obj)/,$(setup-y))
|
||||
|
||||
sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|_end\)$$/\#define VO_\2 0x\1/p'
|
||||
|
||||
quiet_cmd_voffset = VOFFSET $@
|
||||
cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@
|
||||
|
||||
targets += voffset.h
|
||||
$(obj)/voffset.h: vmlinux FORCE
|
||||
$(call if_changed,voffset)
|
||||
|
||||
sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p'
|
||||
|
||||
quiet_cmd_zoffset = ZOFFSET $@
|
||||
cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@
|
||||
|
||||
targets += zoffset.h
|
||||
$(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE
|
||||
$(call if_changed,zoffset)
|
||||
|
||||
|
||||
AFLAGS_header.o += -I$(obj)
|
||||
$(obj)/header.o: $(obj)/voffset.h $(obj)/zoffset.h
|
||||
|
||||
LDFLAGS_setup.elf := -T
|
||||
$(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE
|
||||
$(call if_changed,ld)
|
||||
|
||||
OBJCOPYFLAGS_setup.bin := -O binary
|
||||
$(obj)/setup.bin: $(obj)/setup.elf FORCE
|
||||
$(call if_changed,objcopy)
|
||||
|
||||
$(obj)/compressed/vmlinux: FORCE
|
||||
$(Q)$(MAKE) $(build)=$(obj)/compressed $@
|
||||
|
||||
# Set this if you want to pass append arguments to the
|
||||
# bzdisk/fdimage/isoimage kernel
|
||||
FDARGS =
|
||||
# Set this if you want an initrd included with the
|
||||
# bzdisk/fdimage/isoimage kernel
|
||||
FDINITRD =
|
||||
|
||||
image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,)
|
||||
|
||||
$(obj)/mtools.conf: $(src)/mtools.conf.in
|
||||
sed -e 's|@OBJ@|$(obj)|g' < $< > $@
|
||||
|
||||
# This requires write access to /dev/fd0
|
||||
bzdisk: $(obj)/bzImage $(obj)/mtools.conf
|
||||
MTOOLSRC=$(obj)/mtools.conf mformat a: ; sync
|
||||
syslinux /dev/fd0 ; sync
|
||||
echo '$(image_cmdline)' | \
|
||||
MTOOLSRC=$(src)/mtools.conf mcopy - a:syslinux.cfg
|
||||
if [ -f '$(FDINITRD)' ] ; then \
|
||||
MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' a:initrd.img ; \
|
||||
fi
|
||||
MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage a:linux ; sync
|
||||
|
||||
# These require being root or having syslinux 2.02 or higher installed
|
||||
fdimage fdimage144: $(obj)/bzImage $(obj)/mtools.conf
|
||||
dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=1440
|
||||
MTOOLSRC=$(obj)/mtools.conf mformat v: ; sync
|
||||
syslinux $(obj)/fdimage ; sync
|
||||
echo '$(image_cmdline)' | \
|
||||
MTOOLSRC=$(obj)/mtools.conf mcopy - v:syslinux.cfg
|
||||
if [ -f '$(FDINITRD)' ] ; then \
|
||||
MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' v:initrd.img ; \
|
||||
fi
|
||||
MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage v:linux ; sync
|
||||
|
||||
fdimage288: $(obj)/bzImage $(obj)/mtools.conf
|
||||
dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=2880
|
||||
MTOOLSRC=$(obj)/mtools.conf mformat w: ; sync
|
||||
syslinux $(obj)/fdimage ; sync
|
||||
echo '$(image_cmdline)' | \
|
||||
MTOOLSRC=$(obj)/mtools.conf mcopy - w:syslinux.cfg
|
||||
if [ -f '$(FDINITRD)' ] ; then \
|
||||
MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' w:initrd.img ; \
|
||||
fi
|
||||
MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage w:linux ; sync
|
||||
|
||||
isoimage: $(obj)/bzImage
|
||||
-rm -rf $(obj)/isoimage
|
||||
mkdir $(obj)/isoimage
|
||||
for i in lib lib64 share end ; do \
|
||||
if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \
|
||||
cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \
|
||||
break ; \
|
||||
fi ; \
|
||||
if [ $$i = end ] ; then exit 1 ; fi ; \
|
||||
done
|
||||
cp $(obj)/bzImage $(obj)/isoimage/linux
|
||||
echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg
|
||||
if [ -f '$(FDINITRD)' ] ; then \
|
||||
cp '$(FDINITRD)' $(obj)/isoimage/initrd.img ; \
|
||||
fi
|
||||
mkisofs -J -r -o $(obj)/image.iso -b isolinux.bin -c boot.cat \
|
||||
-no-emul-boot -boot-load-size 4 -boot-info-table \
|
||||
$(obj)/isoimage
|
||||
isohybrid $(obj)/image.iso 2>/dev/null || true
|
||||
rm -rf $(obj)/isoimage
|
||||
|
||||
bzlilo: $(obj)/bzImage
|
||||
if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi
|
||||
if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi
|
||||
cat $(obj)/bzImage > $(INSTALL_PATH)/vmlinuz
|
||||
cp System.map $(INSTALL_PATH)/
|
||||
if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi
|
||||
|
||||
install:
|
||||
sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(obj)/bzImage \
|
||||
System.map "$(INSTALL_PATH)"
|
||||
165
arch/x86/boot/a20.c
Normal file
165
arch/x86/boot/a20.c
Normal file
|
|
@ -0,0 +1,165 @@
|
|||
/* -*- linux-c -*- ------------------------------------------------------- *
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
* Copyright 2007-2008 rPath, Inc. - All Rights Reserved
|
||||
* Copyright 2009 Intel Corporation; author H. Peter Anvin
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Enable A20 gate (return -1 on failure)
|
||||
*/
|
||||
|
||||
#include "boot.h"
|
||||
|
||||
#define MAX_8042_LOOPS 100000
|
||||
#define MAX_8042_FF 32
|
||||
|
||||
static int empty_8042(void)
|
||||
{
|
||||
u8 status;
|
||||
int loops = MAX_8042_LOOPS;
|
||||
int ffs = MAX_8042_FF;
|
||||
|
||||
while (loops--) {
|
||||
io_delay();
|
||||
|
||||
status = inb(0x64);
|
||||
if (status == 0xff) {
|
||||
/* FF is a plausible, but very unlikely status */
|
||||
if (!--ffs)
|
||||
return -1; /* Assume no KBC present */
|
||||
}
|
||||
if (status & 1) {
|
||||
/* Read and discard input data */
|
||||
io_delay();
|
||||
(void)inb(0x60);
|
||||
} else if (!(status & 2)) {
|
||||
/* Buffers empty, finished! */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Returns nonzero if the A20 line is enabled. The memory address
|
||||
used as a test is the int $0x80 vector, which should be safe. */
|
||||
|
||||
#define A20_TEST_ADDR (4*0x80)
|
||||
#define A20_TEST_SHORT 32
|
||||
#define A20_TEST_LONG 2097152 /* 2^21 */
|
||||
|
||||
static int a20_test(int loops)
|
||||
{
|
||||
int ok = 0;
|
||||
int saved, ctr;
|
||||
|
||||
set_fs(0x0000);
|
||||
set_gs(0xffff);
|
||||
|
||||
saved = ctr = rdfs32(A20_TEST_ADDR);
|
||||
|
||||
while (loops--) {
|
||||
wrfs32(++ctr, A20_TEST_ADDR);
|
||||
io_delay(); /* Serialize and make delay constant */
|
||||
ok = rdgs32(A20_TEST_ADDR+0x10) ^ ctr;
|
||||
if (ok)
|
||||
break;
|
||||
}
|
||||
|
||||
wrfs32(saved, A20_TEST_ADDR);
|
||||
return ok;
|
||||
}
|
||||
|
||||
/* Quick test to see if A20 is already enabled */
|
||||
static int a20_test_short(void)
|
||||
{
|
||||
return a20_test(A20_TEST_SHORT);
|
||||
}
|
||||
|
||||
/* Longer test that actually waits for A20 to come on line; this
|
||||
is useful when dealing with the KBC or other slow external circuitry. */
|
||||
static int a20_test_long(void)
|
||||
{
|
||||
return a20_test(A20_TEST_LONG);
|
||||
}
|
||||
|
||||
static void enable_a20_bios(void)
|
||||
{
|
||||
struct biosregs ireg;
|
||||
|
||||
initregs(&ireg);
|
||||
ireg.ax = 0x2401;
|
||||
intcall(0x15, &ireg, NULL);
|
||||
}
|
||||
|
||||
static void enable_a20_kbc(void)
|
||||
{
|
||||
empty_8042();
|
||||
|
||||
outb(0xd1, 0x64); /* Command write */
|
||||
empty_8042();
|
||||
|
||||
outb(0xdf, 0x60); /* A20 on */
|
||||
empty_8042();
|
||||
|
||||
outb(0xff, 0x64); /* Null command, but UHCI wants it */
|
||||
empty_8042();
|
||||
}
|
||||
|
||||
static void enable_a20_fast(void)
|
||||
{
|
||||
u8 port_a;
|
||||
|
||||
port_a = inb(0x92); /* Configuration port A */
|
||||
port_a |= 0x02; /* Enable A20 */
|
||||
port_a &= ~0x01; /* Do not reset machine */
|
||||
outb(port_a, 0x92);
|
||||
}
|
||||
|
||||
/*
|
||||
* Actual routine to enable A20; return 0 on ok, -1 on failure
|
||||
*/
|
||||
|
||||
#define A20_ENABLE_LOOPS 255 /* Number of times to try */
|
||||
|
||||
int enable_a20(void)
|
||||
{
|
||||
int loops = A20_ENABLE_LOOPS;
|
||||
int kbc_err;
|
||||
|
||||
while (loops--) {
|
||||
/* First, check to see if A20 is already enabled
|
||||
(legacy free, etc.) */
|
||||
if (a20_test_short())
|
||||
return 0;
|
||||
|
||||
/* Next, try the BIOS (INT 0x15, AX=0x2401) */
|
||||
enable_a20_bios();
|
||||
if (a20_test_short())
|
||||
return 0;
|
||||
|
||||
/* Try enabling A20 through the keyboard controller */
|
||||
kbc_err = empty_8042();
|
||||
|
||||
if (a20_test_short())
|
||||
return 0; /* BIOS worked, but with delayed reaction */
|
||||
|
||||
if (!kbc_err) {
|
||||
enable_a20_kbc();
|
||||
if (a20_test_long())
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Finally, try enabling the "fast A20 gate" */
|
||||
enable_a20_fast();
|
||||
if (a20_test_long())
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
75
arch/x86/boot/apm.c
Normal file
75
arch/x86/boot/apm.c
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
/* -*- linux-c -*- ------------------------------------------------------- *
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
* Copyright 2007 rPath, Inc. - All Rights Reserved
|
||||
* Copyright 2009 Intel Corporation; author H. Peter Anvin
|
||||
*
|
||||
* Original APM BIOS checking by Stephen Rothwell, May 1994
|
||||
* (sfr@canb.auug.org.au)
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Get APM BIOS information
|
||||
*/
|
||||
|
||||
#include "boot.h"
|
||||
|
||||
int query_apm_bios(void)
|
||||
{
|
||||
struct biosregs ireg, oreg;
|
||||
|
||||
/* APM BIOS installation check */
|
||||
initregs(&ireg);
|
||||
ireg.ah = 0x53;
|
||||
intcall(0x15, &ireg, &oreg);
|
||||
|
||||
if (oreg.flags & X86_EFLAGS_CF)
|
||||
return -1; /* No APM BIOS */
|
||||
|
||||
if (oreg.bx != 0x504d) /* "PM" signature */
|
||||
return -1;
|
||||
|
||||
if (!(oreg.cx & 0x02)) /* 32 bits supported? */
|
||||
return -1;
|
||||
|
||||
/* Disconnect first, just in case */
|
||||
ireg.al = 0x04;
|
||||
intcall(0x15, &ireg, NULL);
|
||||
|
||||
/* 32-bit connect */
|
||||
ireg.al = 0x03;
|
||||
intcall(0x15, &ireg, &oreg);
|
||||
|
||||
boot_params.apm_bios_info.cseg = oreg.ax;
|
||||
boot_params.apm_bios_info.offset = oreg.ebx;
|
||||
boot_params.apm_bios_info.cseg_16 = oreg.cx;
|
||||
boot_params.apm_bios_info.dseg = oreg.dx;
|
||||
boot_params.apm_bios_info.cseg_len = oreg.si;
|
||||
boot_params.apm_bios_info.cseg_16_len = oreg.hsi;
|
||||
boot_params.apm_bios_info.dseg_len = oreg.di;
|
||||
|
||||
if (oreg.flags & X86_EFLAGS_CF)
|
||||
return -1;
|
||||
|
||||
/* Redo the installation check as the 32-bit connect;
|
||||
some BIOSes return different flags this way... */
|
||||
|
||||
ireg.al = 0x00;
|
||||
intcall(0x15, &ireg, &oreg);
|
||||
|
||||
if ((oreg.eflags & X86_EFLAGS_CF) || oreg.bx != 0x504d) {
|
||||
/* Failure with 32-bit connect, try to disconect and ignore */
|
||||
ireg.al = 0x04;
|
||||
intcall(0x15, &ireg, NULL);
|
||||
return -1;
|
||||
}
|
||||
|
||||
boot_params.apm_bios_info.version = oreg.ax;
|
||||
boot_params.apm_bios_info.flags = oreg.cx;
|
||||
return 0;
|
||||
}
|
||||
|
||||
82
arch/x86/boot/bioscall.S
Normal file
82
arch/x86/boot/bioscall.S
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
/* -----------------------------------------------------------------------
|
||||
*
|
||||
* Copyright 2009-2014 Intel Corporation; author H. Peter Anvin
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2 or (at your
|
||||
* option) any later version; incorporated herein by reference.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* "Glove box" for BIOS calls. Avoids the constant problems with BIOSes
|
||||
* touching registers they shouldn't be.
|
||||
*/
|
||||
|
||||
.code16
|
||||
.section ".inittext","ax"
|
||||
.globl intcall
|
||||
.type intcall, @function
|
||||
intcall:
|
||||
/* Self-modify the INT instruction. Ugly, but works. */
|
||||
cmpb %al, 3f
|
||||
je 1f
|
||||
movb %al, 3f
|
||||
jmp 1f /* Synchronize pipeline */
|
||||
1:
|
||||
/* Save state */
|
||||
pushfl
|
||||
pushw %fs
|
||||
pushw %gs
|
||||
pushal
|
||||
|
||||
/* Copy input state to stack frame */
|
||||
subw $44, %sp
|
||||
movw %dx, %si
|
||||
movw %sp, %di
|
||||
movw $11, %cx
|
||||
rep; movsd
|
||||
|
||||
/* Pop full state from the stack */
|
||||
popal
|
||||
popw %gs
|
||||
popw %fs
|
||||
popw %es
|
||||
popw %ds
|
||||
popfl
|
||||
|
||||
/* Actual INT */
|
||||
.byte 0xcd /* INT opcode */
|
||||
3: .byte 0
|
||||
|
||||
/* Push full state to the stack */
|
||||
pushfl
|
||||
pushw %ds
|
||||
pushw %es
|
||||
pushw %fs
|
||||
pushw %gs
|
||||
pushal
|
||||
|
||||
/* Re-establish C environment invariants */
|
||||
cld
|
||||
movzwl %sp, %esp
|
||||
movw %cs, %ax
|
||||
movw %ax, %ds
|
||||
movw %ax, %es
|
||||
|
||||
/* Copy output state from stack frame */
|
||||
movw 68(%esp), %di /* Original %cx == 3rd argument */
|
||||
andw %di, %di
|
||||
jz 4f
|
||||
movw %sp, %si
|
||||
movw $11, %cx
|
||||
rep; movsd
|
||||
4: addw $44, %sp
|
||||
|
||||
/* Restore state and return */
|
||||
popal
|
||||
popw %gs
|
||||
popw %fs
|
||||
popfl
|
||||
retl
|
||||
.size intcall, .-intcall
|
||||
43
arch/x86/boot/bitops.h
Normal file
43
arch/x86/boot/bitops.h
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
/* -*- linux-c -*- ------------------------------------------------------- *
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
* Copyright 2007 rPath, Inc. - All Rights Reserved
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Very simple bitops for the boot code.
|
||||
*/
|
||||
|
||||
#ifndef BOOT_BITOPS_H
|
||||
#define BOOT_BITOPS_H
|
||||
#define _LINUX_BITOPS_H /* Inhibit inclusion of <linux/bitops.h> */
|
||||
|
||||
static inline int constant_test_bit(int nr, const void *addr)
|
||||
{
|
||||
const u32 *p = (const u32 *)addr;
|
||||
return ((1UL << (nr & 31)) & (p[nr >> 5])) != 0;
|
||||
}
|
||||
static inline int variable_test_bit(int nr, const void *addr)
|
||||
{
|
||||
u8 v;
|
||||
const u32 *p = (const u32 *)addr;
|
||||
|
||||
asm("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr));
|
||||
return v;
|
||||
}
|
||||
|
||||
#define test_bit(nr,addr) \
|
||||
(__builtin_constant_p(nr) ? \
|
||||
constant_test_bit((nr),(addr)) : \
|
||||
variable_test_bit((nr),(addr)))
|
||||
|
||||
static inline void set_bit(int nr, void *addr)
|
||||
{
|
||||
asm("btsl %1,%0" : "+m" (*(u32 *)addr) : "Ir" (nr));
|
||||
}
|
||||
|
||||
#endif /* BOOT_BITOPS_H */
|
||||
359
arch/x86/boot/boot.h
Normal file
359
arch/x86/boot/boot.h
Normal file
|
|
@ -0,0 +1,359 @@
|
|||
/* -*- linux-c -*- ------------------------------------------------------- *
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
* Copyright 2007 rPath, Inc. - All Rights Reserved
|
||||
* Copyright 2009 Intel Corporation; author H. Peter Anvin
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Header file for the real-mode kernel code
|
||||
*/
|
||||
|
||||
#ifndef BOOT_BOOT_H
|
||||
#define BOOT_BOOT_H
|
||||
|
||||
#define STACK_SIZE 512 /* Minimum number of bytes for stack */
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/edd.h>
|
||||
#include <asm/boot.h>
|
||||
#include <asm/setup.h>
|
||||
#include "bitops.h"
|
||||
#include "ctype.h"
|
||||
#include "cpuflags.h"
|
||||
|
||||
/* Useful macros */
|
||||
#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
|
||||
|
||||
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
|
||||
|
||||
extern struct setup_header hdr;
|
||||
extern struct boot_params boot_params;
|
||||
|
||||
#define cpu_relax() asm volatile("rep; nop")
|
||||
|
||||
/* Basic port I/O */
|
||||
static inline void outb(u8 v, u16 port)
|
||||
{
|
||||
asm volatile("outb %0,%1" : : "a" (v), "dN" (port));
|
||||
}
|
||||
static inline u8 inb(u16 port)
|
||||
{
|
||||
u8 v;
|
||||
asm volatile("inb %1,%0" : "=a" (v) : "dN" (port));
|
||||
return v;
|
||||
}
|
||||
|
||||
static inline void outw(u16 v, u16 port)
|
||||
{
|
||||
asm volatile("outw %0,%1" : : "a" (v), "dN" (port));
|
||||
}
|
||||
static inline u16 inw(u16 port)
|
||||
{
|
||||
u16 v;
|
||||
asm volatile("inw %1,%0" : "=a" (v) : "dN" (port));
|
||||
return v;
|
||||
}
|
||||
|
||||
static inline void outl(u32 v, u16 port)
|
||||
{
|
||||
asm volatile("outl %0,%1" : : "a" (v), "dN" (port));
|
||||
}
|
||||
static inline u32 inl(u16 port)
|
||||
{
|
||||
u32 v;
|
||||
asm volatile("inl %1,%0" : "=a" (v) : "dN" (port));
|
||||
return v;
|
||||
}
|
||||
|
||||
static inline void io_delay(void)
|
||||
{
|
||||
const u16 DELAY_PORT = 0x80;
|
||||
asm volatile("outb %%al,%0" : : "dN" (DELAY_PORT));
|
||||
}
|
||||
|
||||
/* These functions are used to reference data in other segments. */
|
||||
|
||||
static inline u16 ds(void)
|
||||
{
|
||||
u16 seg;
|
||||
asm("movw %%ds,%0" : "=rm" (seg));
|
||||
return seg;
|
||||
}
|
||||
|
||||
static inline void set_fs(u16 seg)
|
||||
{
|
||||
asm volatile("movw %0,%%fs" : : "rm" (seg));
|
||||
}
|
||||
static inline u16 fs(void)
|
||||
{
|
||||
u16 seg;
|
||||
asm volatile("movw %%fs,%0" : "=rm" (seg));
|
||||
return seg;
|
||||
}
|
||||
|
||||
static inline void set_gs(u16 seg)
|
||||
{
|
||||
asm volatile("movw %0,%%gs" : : "rm" (seg));
|
||||
}
|
||||
static inline u16 gs(void)
|
||||
{
|
||||
u16 seg;
|
||||
asm volatile("movw %%gs,%0" : "=rm" (seg));
|
||||
return seg;
|
||||
}
|
||||
|
||||
typedef unsigned int addr_t;
|
||||
|
||||
static inline u8 rdfs8(addr_t addr)
|
||||
{
|
||||
u8 v;
|
||||
asm volatile("movb %%fs:%1,%0" : "=q" (v) : "m" (*(u8 *)addr));
|
||||
return v;
|
||||
}
|
||||
static inline u16 rdfs16(addr_t addr)
|
||||
{
|
||||
u16 v;
|
||||
asm volatile("movw %%fs:%1,%0" : "=r" (v) : "m" (*(u16 *)addr));
|
||||
return v;
|
||||
}
|
||||
static inline u32 rdfs32(addr_t addr)
|
||||
{
|
||||
u32 v;
|
||||
asm volatile("movl %%fs:%1,%0" : "=r" (v) : "m" (*(u32 *)addr));
|
||||
return v;
|
||||
}
|
||||
|
||||
static inline void wrfs8(u8 v, addr_t addr)
|
||||
{
|
||||
asm volatile("movb %1,%%fs:%0" : "+m" (*(u8 *)addr) : "qi" (v));
|
||||
}
|
||||
static inline void wrfs16(u16 v, addr_t addr)
|
||||
{
|
||||
asm volatile("movw %1,%%fs:%0" : "+m" (*(u16 *)addr) : "ri" (v));
|
||||
}
|
||||
static inline void wrfs32(u32 v, addr_t addr)
|
||||
{
|
||||
asm volatile("movl %1,%%fs:%0" : "+m" (*(u32 *)addr) : "ri" (v));
|
||||
}
|
||||
|
||||
static inline u8 rdgs8(addr_t addr)
|
||||
{
|
||||
u8 v;
|
||||
asm volatile("movb %%gs:%1,%0" : "=q" (v) : "m" (*(u8 *)addr));
|
||||
return v;
|
||||
}
|
||||
static inline u16 rdgs16(addr_t addr)
|
||||
{
|
||||
u16 v;
|
||||
asm volatile("movw %%gs:%1,%0" : "=r" (v) : "m" (*(u16 *)addr));
|
||||
return v;
|
||||
}
|
||||
static inline u32 rdgs32(addr_t addr)
|
||||
{
|
||||
u32 v;
|
||||
asm volatile("movl %%gs:%1,%0" : "=r" (v) : "m" (*(u32 *)addr));
|
||||
return v;
|
||||
}
|
||||
|
||||
static inline void wrgs8(u8 v, addr_t addr)
|
||||
{
|
||||
asm volatile("movb %1,%%gs:%0" : "+m" (*(u8 *)addr) : "qi" (v));
|
||||
}
|
||||
static inline void wrgs16(u16 v, addr_t addr)
|
||||
{
|
||||
asm volatile("movw %1,%%gs:%0" : "+m" (*(u16 *)addr) : "ri" (v));
|
||||
}
|
||||
static inline void wrgs32(u32 v, addr_t addr)
|
||||
{
|
||||
asm volatile("movl %1,%%gs:%0" : "+m" (*(u32 *)addr) : "ri" (v));
|
||||
}
|
||||
|
||||
/* Note: these only return true/false, not a signed return value! */
|
||||
static inline int memcmp_fs(const void *s1, addr_t s2, size_t len)
|
||||
{
|
||||
u8 diff;
|
||||
asm volatile("fs; repe; cmpsb; setnz %0"
|
||||
: "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
|
||||
return diff;
|
||||
}
|
||||
static inline int memcmp_gs(const void *s1, addr_t s2, size_t len)
|
||||
{
|
||||
u8 diff;
|
||||
asm volatile("gs; repe; cmpsb; setnz %0"
|
||||
: "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
|
||||
return diff;
|
||||
}
|
||||
|
||||
/* Heap -- available for dynamic lists. */
|
||||
extern char _end[];
|
||||
extern char *HEAP;
|
||||
extern char *heap_end;
|
||||
#define RESET_HEAP() ((void *)( HEAP = _end ))
|
||||
static inline char *__get_heap(size_t s, size_t a, size_t n)
|
||||
{
|
||||
char *tmp;
|
||||
|
||||
HEAP = (char *)(((size_t)HEAP+(a-1)) & ~(a-1));
|
||||
tmp = HEAP;
|
||||
HEAP += s*n;
|
||||
return tmp;
|
||||
}
|
||||
#define GET_HEAP(type, n) \
|
||||
((type *)__get_heap(sizeof(type),__alignof__(type),(n)))
|
||||
|
||||
static inline bool heap_free(size_t n)
|
||||
{
|
||||
return (int)(heap_end-HEAP) >= (int)n;
|
||||
}
|
||||
|
||||
/* copy.S */
|
||||
|
||||
void copy_to_fs(addr_t dst, void *src, size_t len);
|
||||
void *copy_from_fs(void *dst, addr_t src, size_t len);
|
||||
void copy_to_gs(addr_t dst, void *src, size_t len);
|
||||
void *copy_from_gs(void *dst, addr_t src, size_t len);
|
||||
|
||||
/* a20.c */
|
||||
int enable_a20(void);
|
||||
|
||||
/* apm.c */
|
||||
int query_apm_bios(void);
|
||||
|
||||
/* bioscall.c */
|
||||
struct biosregs {
|
||||
union {
|
||||
struct {
|
||||
u32 edi;
|
||||
u32 esi;
|
||||
u32 ebp;
|
||||
u32 _esp;
|
||||
u32 ebx;
|
||||
u32 edx;
|
||||
u32 ecx;
|
||||
u32 eax;
|
||||
u32 _fsgs;
|
||||
u32 _dses;
|
||||
u32 eflags;
|
||||
};
|
||||
struct {
|
||||
u16 di, hdi;
|
||||
u16 si, hsi;
|
||||
u16 bp, hbp;
|
||||
u16 _sp, _hsp;
|
||||
u16 bx, hbx;
|
||||
u16 dx, hdx;
|
||||
u16 cx, hcx;
|
||||
u16 ax, hax;
|
||||
u16 gs, fs;
|
||||
u16 es, ds;
|
||||
u16 flags, hflags;
|
||||
};
|
||||
struct {
|
||||
u8 dil, dih, edi2, edi3;
|
||||
u8 sil, sih, esi2, esi3;
|
||||
u8 bpl, bph, ebp2, ebp3;
|
||||
u8 _spl, _sph, _esp2, _esp3;
|
||||
u8 bl, bh, ebx2, ebx3;
|
||||
u8 dl, dh, edx2, edx3;
|
||||
u8 cl, ch, ecx2, ecx3;
|
||||
u8 al, ah, eax2, eax3;
|
||||
};
|
||||
};
|
||||
};
|
||||
void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg);
|
||||
|
||||
/* cmdline.c */
|
||||
int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize);
|
||||
int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option);
|
||||
static inline int cmdline_find_option(const char *option, char *buffer, int bufsize)
|
||||
{
|
||||
unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
|
||||
|
||||
if (cmd_line_ptr >= 0x100000)
|
||||
return -1; /* inaccessible */
|
||||
|
||||
return __cmdline_find_option(cmd_line_ptr, option, buffer, bufsize);
|
||||
}
|
||||
|
||||
static inline int cmdline_find_option_bool(const char *option)
|
||||
{
|
||||
unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
|
||||
|
||||
if (cmd_line_ptr >= 0x100000)
|
||||
return -1; /* inaccessible */
|
||||
|
||||
return __cmdline_find_option_bool(cmd_line_ptr, option);
|
||||
}
|
||||
|
||||
/* cpu.c, cpucheck.c */
|
||||
int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr);
|
||||
int validate_cpu(void);
|
||||
|
||||
/* early_serial_console.c */
|
||||
extern int early_serial_base;
|
||||
void console_init(void);
|
||||
|
||||
/* edd.c */
|
||||
void query_edd(void);
|
||||
|
||||
/* header.S */
|
||||
void __attribute__((noreturn)) die(void);
|
||||
|
||||
/* mca.c */
|
||||
int query_mca(void);
|
||||
|
||||
/* memory.c */
|
||||
int detect_memory(void);
|
||||
|
||||
/* pm.c */
|
||||
void __attribute__((noreturn)) go_to_protected_mode(void);
|
||||
|
||||
/* pmjump.S */
|
||||
void __attribute__((noreturn))
|
||||
protected_mode_jump(u32 entrypoint, u32 bootparams);
|
||||
|
||||
/* printf.c */
|
||||
int sprintf(char *buf, const char *fmt, ...);
|
||||
int vsprintf(char *buf, const char *fmt, va_list args);
|
||||
int printf(const char *fmt, ...);
|
||||
|
||||
/* regs.c */
|
||||
void initregs(struct biosregs *regs);
|
||||
|
||||
/* string.c */
|
||||
int strcmp(const char *str1, const char *str2);
|
||||
int strncmp(const char *cs, const char *ct, size_t count);
|
||||
size_t strnlen(const char *s, size_t maxlen);
|
||||
unsigned int atou(const char *s);
|
||||
unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base);
|
||||
size_t strlen(const char *s);
|
||||
|
||||
/* tty.c */
|
||||
void puts(const char *);
|
||||
void putchar(int);
|
||||
int getchar(void);
|
||||
void kbd_flush(void);
|
||||
int getchar_timeout(void);
|
||||
|
||||
/* video.c */
|
||||
void set_video(void);
|
||||
|
||||
/* video-mode.c */
|
||||
int set_mode(u16 mode);
|
||||
int mode_defined(u16 mode);
|
||||
void probe_cards(int unsafe);
|
||||
|
||||
/* video-vesa.c */
|
||||
void vesa_store_edid(void);
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif /* BOOT_BOOT_H */
|
||||
158
arch/x86/boot/cmdline.c
Normal file
158
arch/x86/boot/cmdline.c
Normal file
|
|
@ -0,0 +1,158 @@
|
|||
/* -*- linux-c -*- ------------------------------------------------------- *
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
* Copyright 2007 rPath, Inc. - All Rights Reserved
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Simple command-line parser for early boot.
|
||||
*/
|
||||
|
||||
#include "boot.h"
|
||||
|
||||
static inline int myisspace(u8 c)
|
||||
{
|
||||
return c <= ' '; /* Close enough approximation */
|
||||
}
|
||||
|
||||
/*
|
||||
* Find a non-boolean option, that is, "option=argument". In accordance
|
||||
* with standard Linux practice, if this option is repeated, this returns
|
||||
* the last instance on the command line.
|
||||
*
|
||||
* Returns the length of the argument (regardless of if it was
|
||||
* truncated to fit in the buffer), or -1 on not found.
|
||||
*/
|
||||
int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize)
|
||||
{
|
||||
addr_t cptr;
|
||||
char c;
|
||||
int len = -1;
|
||||
const char *opptr = NULL;
|
||||
char *bufptr = buffer;
|
||||
enum {
|
||||
st_wordstart, /* Start of word/after whitespace */
|
||||
st_wordcmp, /* Comparing this word */
|
||||
st_wordskip, /* Miscompare, skip */
|
||||
st_bufcpy /* Copying this to buffer */
|
||||
} state = st_wordstart;
|
||||
|
||||
if (!cmdline_ptr)
|
||||
return -1; /* No command line */
|
||||
|
||||
cptr = cmdline_ptr & 0xf;
|
||||
set_fs(cmdline_ptr >> 4);
|
||||
|
||||
while (cptr < 0x10000 && (c = rdfs8(cptr++))) {
|
||||
switch (state) {
|
||||
case st_wordstart:
|
||||
if (myisspace(c))
|
||||
break;
|
||||
|
||||
/* else */
|
||||
state = st_wordcmp;
|
||||
opptr = option;
|
||||
/* fall through */
|
||||
|
||||
case st_wordcmp:
|
||||
if (c == '=' && !*opptr) {
|
||||
len = 0;
|
||||
bufptr = buffer;
|
||||
state = st_bufcpy;
|
||||
} else if (myisspace(c)) {
|
||||
state = st_wordstart;
|
||||
} else if (c != *opptr++) {
|
||||
state = st_wordskip;
|
||||
}
|
||||
break;
|
||||
|
||||
case st_wordskip:
|
||||
if (myisspace(c))
|
||||
state = st_wordstart;
|
||||
break;
|
||||
|
||||
case st_bufcpy:
|
||||
if (myisspace(c)) {
|
||||
state = st_wordstart;
|
||||
} else {
|
||||
if (len < bufsize-1)
|
||||
*bufptr++ = c;
|
||||
len++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (bufsize)
|
||||
*bufptr = '\0';
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find a boolean option (like quiet,noapic,nosmp....)
|
||||
*
|
||||
* Returns the position of that option (starts counting with 1)
|
||||
* or 0 on not found
|
||||
*/
|
||||
int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option)
|
||||
{
|
||||
addr_t cptr;
|
||||
char c;
|
||||
int pos = 0, wstart = 0;
|
||||
const char *opptr = NULL;
|
||||
enum {
|
||||
st_wordstart, /* Start of word/after whitespace */
|
||||
st_wordcmp, /* Comparing this word */
|
||||
st_wordskip, /* Miscompare, skip */
|
||||
} state = st_wordstart;
|
||||
|
||||
if (!cmdline_ptr)
|
||||
return -1; /* No command line */
|
||||
|
||||
cptr = cmdline_ptr & 0xf;
|
||||
set_fs(cmdline_ptr >> 4);
|
||||
|
||||
while (cptr < 0x10000) {
|
||||
c = rdfs8(cptr++);
|
||||
pos++;
|
||||
|
||||
switch (state) {
|
||||
case st_wordstart:
|
||||
if (!c)
|
||||
return 0;
|
||||
else if (myisspace(c))
|
||||
break;
|
||||
|
||||
state = st_wordcmp;
|
||||
opptr = option;
|
||||
wstart = pos;
|
||||
/* fall through */
|
||||
|
||||
case st_wordcmp:
|
||||
if (!*opptr)
|
||||
if (!c || myisspace(c))
|
||||
return wstart;
|
||||
else
|
||||
state = st_wordskip;
|
||||
else if (!c)
|
||||
return 0;
|
||||
else if (c != *opptr++)
|
||||
state = st_wordskip;
|
||||
break;
|
||||
|
||||
case st_wordskip:
|
||||
if (!c)
|
||||
return 0;
|
||||
else if (myisspace(c))
|
||||
state = st_wordstart;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0; /* Buffer overrun */
|
||||
}
|
||||
11
arch/x86/boot/code16gcc.h
Normal file
11
arch/x86/boot/code16gcc.h
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
#
|
||||
# code16gcc.h
|
||||
#
|
||||
# This file is added to the assembler via -Wa when compiling 16-bit C code.
|
||||
# This is done this way instead via asm() to make sure gcc does not reorder
|
||||
# things around us.
|
||||
#
|
||||
# gcc 4.9+ has a real -m16 option so we can drop this hack long term.
|
||||
#
|
||||
|
||||
.code16gcc
|
||||
87
arch/x86/boot/compressed/Makefile
Normal file
87
arch/x86/boot/compressed/Makefile
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
#
|
||||
# linux/arch/x86/boot/compressed/Makefile
|
||||
#
|
||||
# create a compressed vmlinux image from the original vmlinux
|
||||
#
|
||||
|
||||
targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
|
||||
vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4
|
||||
|
||||
KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
|
||||
KBUILD_CFLAGS += -fno-strict-aliasing -fPIC
|
||||
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
|
||||
cflags-$(CONFIG_X86_32) := -march=i386
|
||||
cflags-$(CONFIG_X86_64) := -mcmodel=small
|
||||
KBUILD_CFLAGS += $(cflags-y)
|
||||
KBUILD_CFLAGS += -mno-mmx -mno-sse
|
||||
KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
|
||||
KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector)
|
||||
|
||||
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
|
||||
GCOV_PROFILE := n
|
||||
|
||||
LDFLAGS := -m elf_$(UTS_MACHINE)
|
||||
LDFLAGS_vmlinux := -T
|
||||
|
||||
hostprogs-y := mkpiggy
|
||||
HOST_EXTRACFLAGS += -I$(srctree)/tools/include
|
||||
|
||||
vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
|
||||
$(obj)/string.o $(obj)/cmdline.o \
|
||||
$(obj)/piggy.o $(obj)/cpuflags.o
|
||||
|
||||
vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o
|
||||
vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/aslr.o
|
||||
|
||||
$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
|
||||
|
||||
vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o
|
||||
vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
|
||||
|
||||
$(obj)/vmlinux: $(vmlinux-objs-y) FORCE
|
||||
$(call if_changed,ld)
|
||||
@:
|
||||
|
||||
OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
|
||||
$(obj)/vmlinux.bin: vmlinux FORCE
|
||||
$(call if_changed,objcopy)
|
||||
|
||||
targets += $(patsubst $(obj)/%,%,$(vmlinux-objs-y)) vmlinux.bin.all vmlinux.relocs
|
||||
|
||||
CMD_RELOCS = arch/x86/tools/relocs
|
||||
quiet_cmd_relocs = RELOCS $@
|
||||
cmd_relocs = $(CMD_RELOCS) $< > $@;$(CMD_RELOCS) --abs-relocs $<
|
||||
$(obj)/vmlinux.relocs: vmlinux FORCE
|
||||
$(call if_changed,relocs)
|
||||
|
||||
vmlinux.bin.all-y := $(obj)/vmlinux.bin
|
||||
vmlinux.bin.all-$(CONFIG_X86_NEED_RELOCS) += $(obj)/vmlinux.relocs
|
||||
|
||||
$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
|
||||
$(call if_changed,gzip)
|
||||
$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE
|
||||
$(call if_changed,bzip2)
|
||||
$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE
|
||||
$(call if_changed,lzma)
|
||||
$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE
|
||||
$(call if_changed,xzkern)
|
||||
$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE
|
||||
$(call if_changed,lzo)
|
||||
$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE
|
||||
$(call if_changed,lz4)
|
||||
|
||||
suffix-$(CONFIG_KERNEL_GZIP) := gz
|
||||
suffix-$(CONFIG_KERNEL_BZIP2) := bz2
|
||||
suffix-$(CONFIG_KERNEL_LZMA) := lzma
|
||||
suffix-$(CONFIG_KERNEL_XZ) := xz
|
||||
suffix-$(CONFIG_KERNEL_LZO) := lzo
|
||||
suffix-$(CONFIG_KERNEL_LZ4) := lz4
|
||||
|
||||
RUN_SIZE = $(shell $(OBJDUMP) -h vmlinux | \
|
||||
$(CONFIG_SHELL) $(srctree)/arch/x86/tools/calc_run_size.sh)
|
||||
quiet_cmd_mkpiggy = MKPIGGY $@
|
||||
cmd_mkpiggy = $(obj)/mkpiggy $< $(RUN_SIZE) > $@ || ( rm -f $@ ; false )
|
||||
|
||||
targets += piggy.S
|
||||
$(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE
|
||||
$(call if_changed,mkpiggy)
|
||||
336
arch/x86/boot/compressed/aslr.c
Normal file
336
arch/x86/boot/compressed/aslr.c
Normal file
|
|
@ -0,0 +1,336 @@
|
|||
#include "misc.h"
|
||||
|
||||
#include <asm/msr.h>
|
||||
#include <asm/archrandom.h>
|
||||
#include <asm/e820.h>
|
||||
|
||||
#include <generated/compile.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/uts.h>
|
||||
#include <linux/utsname.h>
|
||||
#include <generated/utsrelease.h>
|
||||
|
||||
/* Simplified build-specific string for starting entropy. */
|
||||
static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
|
||||
LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
|
||||
|
||||
#define I8254_PORT_CONTROL 0x43
|
||||
#define I8254_PORT_COUNTER0 0x40
|
||||
#define I8254_CMD_READBACK 0xC0
|
||||
#define I8254_SELECT_COUNTER0 0x02
|
||||
#define I8254_STATUS_NOTREADY 0x40
|
||||
static inline u16 i8254(void)
|
||||
{
|
||||
u16 status, timer;
|
||||
|
||||
do {
|
||||
outb(I8254_PORT_CONTROL,
|
||||
I8254_CMD_READBACK | I8254_SELECT_COUNTER0);
|
||||
status = inb(I8254_PORT_COUNTER0);
|
||||
timer = inb(I8254_PORT_COUNTER0);
|
||||
timer |= inb(I8254_PORT_COUNTER0) << 8;
|
||||
} while (status & I8254_STATUS_NOTREADY);
|
||||
|
||||
return timer;
|
||||
}
|
||||
|
||||
static unsigned long rotate_xor(unsigned long hash, const void *area,
|
||||
size_t size)
|
||||
{
|
||||
size_t i;
|
||||
unsigned long *ptr = (unsigned long *)area;
|
||||
|
||||
for (i = 0; i < size / sizeof(hash); i++) {
|
||||
/* Rotate by odd number of bits and XOR. */
|
||||
hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
|
||||
hash ^= ptr[i];
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
/* Attempt to create a simple but unpredictable starting entropy. */
|
||||
static unsigned long get_random_boot(void)
|
||||
{
|
||||
unsigned long hash = 0;
|
||||
|
||||
hash = rotate_xor(hash, build_str, sizeof(build_str));
|
||||
hash = rotate_xor(hash, real_mode, sizeof(*real_mode));
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
static unsigned long get_random_long(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
const unsigned long mix_const = 0x5d6008cbf3848dd3UL;
|
||||
#else
|
||||
const unsigned long mix_const = 0x3f39e593UL;
|
||||
#endif
|
||||
unsigned long raw, random = get_random_boot();
|
||||
bool use_i8254 = true;
|
||||
|
||||
debug_putstr("KASLR using");
|
||||
|
||||
if (has_cpuflag(X86_FEATURE_RDRAND)) {
|
||||
debug_putstr(" RDRAND");
|
||||
if (rdrand_long(&raw)) {
|
||||
random ^= raw;
|
||||
use_i8254 = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (has_cpuflag(X86_FEATURE_TSC)) {
|
||||
debug_putstr(" RDTSC");
|
||||
rdtscll(raw);
|
||||
|
||||
random ^= raw;
|
||||
use_i8254 = false;
|
||||
}
|
||||
|
||||
if (use_i8254) {
|
||||
debug_putstr(" i8254");
|
||||
random ^= i8254();
|
||||
}
|
||||
|
||||
/* Circular multiply for better bit diffusion */
|
||||
asm("mul %3"
|
||||
: "=a" (random), "=d" (raw)
|
||||
: "a" (random), "rm" (mix_const));
|
||||
random += raw;
|
||||
|
||||
debug_putstr("...\n");
|
||||
|
||||
return random;
|
||||
}
|
||||
|
||||
struct mem_vector {
|
||||
unsigned long start;
|
||||
unsigned long size;
|
||||
};
|
||||
|
||||
#define MEM_AVOID_MAX 5
|
||||
static struct mem_vector mem_avoid[MEM_AVOID_MAX];
|
||||
|
||||
static bool mem_contains(struct mem_vector *region, struct mem_vector *item)
|
||||
{
|
||||
/* Item at least partially before region. */
|
||||
if (item->start < region->start)
|
||||
return false;
|
||||
/* Item at least partially after region. */
|
||||
if (item->start + item->size > region->start + region->size)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
|
||||
{
|
||||
/* Item one is entirely before item two. */
|
||||
if (one->start + one->size <= two->start)
|
||||
return false;
|
||||
/* Item one is entirely after item two. */
|
||||
if (one->start >= two->start + two->size)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static void mem_avoid_init(unsigned long input, unsigned long input_size,
|
||||
unsigned long output, unsigned long output_size)
|
||||
{
|
||||
u64 initrd_start, initrd_size;
|
||||
u64 cmd_line, cmd_line_size;
|
||||
unsigned long unsafe, unsafe_len;
|
||||
char *ptr;
|
||||
|
||||
/*
|
||||
* Avoid the region that is unsafe to overlap during
|
||||
* decompression (see calculations at top of misc.c).
|
||||
*/
|
||||
unsafe_len = (output_size >> 12) + 32768 + 18;
|
||||
unsafe = (unsigned long)input + input_size - unsafe_len;
|
||||
mem_avoid[0].start = unsafe;
|
||||
mem_avoid[0].size = unsafe_len;
|
||||
|
||||
/* Avoid initrd. */
|
||||
initrd_start = (u64)real_mode->ext_ramdisk_image << 32;
|
||||
initrd_start |= real_mode->hdr.ramdisk_image;
|
||||
initrd_size = (u64)real_mode->ext_ramdisk_size << 32;
|
||||
initrd_size |= real_mode->hdr.ramdisk_size;
|
||||
mem_avoid[1].start = initrd_start;
|
||||
mem_avoid[1].size = initrd_size;
|
||||
|
||||
/* Avoid kernel command line. */
|
||||
cmd_line = (u64)real_mode->ext_cmd_line_ptr << 32;
|
||||
cmd_line |= real_mode->hdr.cmd_line_ptr;
|
||||
/* Calculate size of cmd_line. */
|
||||
ptr = (char *)(unsigned long)cmd_line;
|
||||
for (cmd_line_size = 0; ptr[cmd_line_size++]; )
|
||||
;
|
||||
mem_avoid[2].start = cmd_line;
|
||||
mem_avoid[2].size = cmd_line_size;
|
||||
|
||||
/* Avoid heap memory. */
|
||||
mem_avoid[3].start = (unsigned long)free_mem_ptr;
|
||||
mem_avoid[3].size = BOOT_HEAP_SIZE;
|
||||
|
||||
/* Avoid stack memory. */
|
||||
mem_avoid[4].start = (unsigned long)free_mem_end_ptr;
|
||||
mem_avoid[4].size = BOOT_STACK_SIZE;
|
||||
}
|
||||
|
||||
/* Does this memory vector overlap a known avoided area? */
|
||||
static bool mem_avoid_overlap(struct mem_vector *img)
|
||||
{
|
||||
int i;
|
||||
struct setup_data *ptr;
|
||||
|
||||
for (i = 0; i < MEM_AVOID_MAX; i++) {
|
||||
if (mem_overlaps(img, &mem_avoid[i]))
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Avoid all entries in the setup_data linked list. */
|
||||
ptr = (struct setup_data *)(unsigned long)real_mode->hdr.setup_data;
|
||||
while (ptr) {
|
||||
struct mem_vector avoid;
|
||||
|
||||
avoid.start = (unsigned long)ptr;
|
||||
avoid.size = sizeof(*ptr) + ptr->len;
|
||||
|
||||
if (mem_overlaps(img, &avoid))
|
||||
return true;
|
||||
|
||||
ptr = (struct setup_data *)(unsigned long)ptr->next;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET /
|
||||
CONFIG_PHYSICAL_ALIGN];
|
||||
static unsigned long slot_max;
|
||||
|
||||
static void slots_append(unsigned long addr)
|
||||
{
|
||||
/* Overflowing the slots list should be impossible. */
|
||||
if (slot_max >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET /
|
||||
CONFIG_PHYSICAL_ALIGN)
|
||||
return;
|
||||
|
||||
slots[slot_max++] = addr;
|
||||
}
|
||||
|
||||
static unsigned long slots_fetch_random(void)
|
||||
{
|
||||
/* Handle case of no slots stored. */
|
||||
if (slot_max == 0)
|
||||
return 0;
|
||||
|
||||
return slots[get_random_long() % slot_max];
|
||||
}
|
||||
|
||||
static void process_e820_entry(struct e820entry *entry,
|
||||
unsigned long minimum,
|
||||
unsigned long image_size)
|
||||
{
|
||||
struct mem_vector region, img;
|
||||
|
||||
/* Skip non-RAM entries. */
|
||||
if (entry->type != E820_RAM)
|
||||
return;
|
||||
|
||||
/* Ignore entries entirely above our maximum. */
|
||||
if (entry->addr >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET)
|
||||
return;
|
||||
|
||||
/* Ignore entries entirely below our minimum. */
|
||||
if (entry->addr + entry->size < minimum)
|
||||
return;
|
||||
|
||||
region.start = entry->addr;
|
||||
region.size = entry->size;
|
||||
|
||||
/* Potentially raise address to minimum location. */
|
||||
if (region.start < minimum)
|
||||
region.start = minimum;
|
||||
|
||||
/* Potentially raise address to meet alignment requirements. */
|
||||
region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
|
||||
|
||||
/* Did we raise the address above the bounds of this e820 region? */
|
||||
if (region.start > entry->addr + entry->size)
|
||||
return;
|
||||
|
||||
/* Reduce size by any delta from the original address. */
|
||||
region.size -= region.start - entry->addr;
|
||||
|
||||
/* Reduce maximum size to fit end of image within maximum limit. */
|
||||
if (region.start + region.size > CONFIG_RANDOMIZE_BASE_MAX_OFFSET)
|
||||
region.size = CONFIG_RANDOMIZE_BASE_MAX_OFFSET - region.start;
|
||||
|
||||
/* Walk each aligned slot and check for avoided areas. */
|
||||
for (img.start = region.start, img.size = image_size ;
|
||||
mem_contains(®ion, &img) ;
|
||||
img.start += CONFIG_PHYSICAL_ALIGN) {
|
||||
if (mem_avoid_overlap(&img))
|
||||
continue;
|
||||
slots_append(img.start);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned long find_random_addr(unsigned long minimum,
|
||||
unsigned long size)
|
||||
{
|
||||
int i;
|
||||
unsigned long addr;
|
||||
|
||||
/* Make sure minimum is aligned. */
|
||||
minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
|
||||
|
||||
/* Verify potential e820 positions, appending to slots list. */
|
||||
for (i = 0; i < real_mode->e820_entries; i++) {
|
||||
process_e820_entry(&real_mode->e820_map[i], minimum, size);
|
||||
}
|
||||
|
||||
return slots_fetch_random();
|
||||
}
|
||||
|
||||
unsigned char *choose_kernel_location(unsigned char *input,
|
||||
unsigned long input_size,
|
||||
unsigned char *output,
|
||||
unsigned long output_size)
|
||||
{
|
||||
unsigned long choice = (unsigned long)output;
|
||||
unsigned long random;
|
||||
|
||||
#ifdef CONFIG_HIBERNATION
|
||||
if (!cmdline_find_option_bool("kaslr")) {
|
||||
debug_putstr("KASLR disabled by default...\n");
|
||||
goto out;
|
||||
}
|
||||
#else
|
||||
if (cmdline_find_option_bool("nokaslr")) {
|
||||
debug_putstr("KASLR disabled by cmdline...\n");
|
||||
goto out;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Record the various known unsafe memory ranges. */
|
||||
mem_avoid_init((unsigned long)input, input_size,
|
||||
(unsigned long)output, output_size);
|
||||
|
||||
/* Walk e820 and find a random address. */
|
||||
random = find_random_addr(choice, output_size);
|
||||
if (!random) {
|
||||
debug_putstr("KASLR could not find suitable E820 region...\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Always enforce the minimum. */
|
||||
if (random < choice)
|
||||
goto out;
|
||||
|
||||
choice = random;
|
||||
out:
|
||||
return (unsigned char *)choice;
|
||||
}
|
||||
33
arch/x86/boot/compressed/cmdline.c
Normal file
33
arch/x86/boot/compressed/cmdline.c
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
#include "misc.h"
|
||||
|
||||
#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE
|
||||
|
||||
static unsigned long fs;
|
||||
static inline void set_fs(unsigned long seg)
|
||||
{
|
||||
fs = seg << 4; /* shift it back */
|
||||
}
|
||||
typedef unsigned long addr_t;
|
||||
static inline char rdfs8(addr_t addr)
|
||||
{
|
||||
return *((char *)(fs + addr));
|
||||
}
|
||||
#include "../cmdline.c"
|
||||
static unsigned long get_cmd_line_ptr(void)
|
||||
{
|
||||
unsigned long cmd_line_ptr = real_mode->hdr.cmd_line_ptr;
|
||||
|
||||
cmd_line_ptr |= (u64)real_mode->ext_cmd_line_ptr << 32;
|
||||
|
||||
return cmd_line_ptr;
|
||||
}
|
||||
int cmdline_find_option(const char *option, char *buffer, int bufsize)
|
||||
{
|
||||
return __cmdline_find_option(get_cmd_line_ptr(), option, buffer, bufsize);
|
||||
}
|
||||
int cmdline_find_option_bool(const char *option)
|
||||
{
|
||||
return __cmdline_find_option_bool(get_cmd_line_ptr(), option);
|
||||
}
|
||||
|
||||
#endif
|
||||
12
arch/x86/boot/compressed/cpuflags.c
Normal file
12
arch/x86/boot/compressed/cpuflags.c
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
#ifdef CONFIG_RANDOMIZE_BASE
|
||||
|
||||
#include "../cpuflags.c"
|
||||
|
||||
bool has_cpuflag(int flag)
|
||||
{
|
||||
get_cpuflags();
|
||||
|
||||
return test_bit(flag, cpu.flags);
|
||||
}
|
||||
|
||||
#endif
|
||||
5
arch/x86/boot/compressed/early_serial_console.c
Normal file
5
arch/x86/boot/compressed/early_serial_console.c
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
#include "misc.h"
|
||||
|
||||
int early_serial_base;
|
||||
|
||||
#include "../early_serial_console.c"
|
||||
1514
arch/x86/boot/compressed/eboot.c
Normal file
1514
arch/x86/boot/compressed/eboot.c
Normal file
File diff suppressed because it is too large
Load diff
122
arch/x86/boot/compressed/eboot.h
Normal file
122
arch/x86/boot/compressed/eboot.h
Normal file
|
|
@ -0,0 +1,122 @@
|
|||
#ifndef BOOT_COMPRESSED_EBOOT_H
|
||||
#define BOOT_COMPRESSED_EBOOT_H
|
||||
|
||||
#define SEG_TYPE_DATA (0 << 3)
|
||||
#define SEG_TYPE_READ_WRITE (1 << 1)
|
||||
#define SEG_TYPE_CODE (1 << 3)
|
||||
#define SEG_TYPE_EXEC_READ (1 << 1)
|
||||
#define SEG_TYPE_TSS ((1 << 3) | (1 << 0))
|
||||
#define SEG_OP_SIZE_32BIT (1 << 0)
|
||||
#define SEG_GRANULARITY_4KB (1 << 0)
|
||||
|
||||
#define DESC_TYPE_CODE_DATA (1 << 0)
|
||||
|
||||
#define EFI_CONSOLE_OUT_DEVICE_GUID \
|
||||
EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x0, 0x90, 0x27, \
|
||||
0x3f, 0xc1, 0x4d)
|
||||
|
||||
#define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0
|
||||
#define PIXEL_BGR_RESERVED_8BIT_PER_COLOR 1
|
||||
#define PIXEL_BIT_MASK 2
|
||||
#define PIXEL_BLT_ONLY 3
|
||||
#define PIXEL_FORMAT_MAX 4
|
||||
|
||||
struct efi_pixel_bitmask {
|
||||
u32 red_mask;
|
||||
u32 green_mask;
|
||||
u32 blue_mask;
|
||||
u32 reserved_mask;
|
||||
};
|
||||
|
||||
struct efi_graphics_output_mode_info {
|
||||
u32 version;
|
||||
u32 horizontal_resolution;
|
||||
u32 vertical_resolution;
|
||||
int pixel_format;
|
||||
struct efi_pixel_bitmask pixel_information;
|
||||
u32 pixels_per_scan_line;
|
||||
} __packed;
|
||||
|
||||
struct efi_graphics_output_protocol_mode_32 {
|
||||
u32 max_mode;
|
||||
u32 mode;
|
||||
u32 info;
|
||||
u32 size_of_info;
|
||||
u64 frame_buffer_base;
|
||||
u32 frame_buffer_size;
|
||||
} __packed;
|
||||
|
||||
struct efi_graphics_output_protocol_mode_64 {
|
||||
u32 max_mode;
|
||||
u32 mode;
|
||||
u64 info;
|
||||
u64 size_of_info;
|
||||
u64 frame_buffer_base;
|
||||
u64 frame_buffer_size;
|
||||
} __packed;
|
||||
|
||||
struct efi_graphics_output_protocol_mode {
|
||||
u32 max_mode;
|
||||
u32 mode;
|
||||
unsigned long info;
|
||||
unsigned long size_of_info;
|
||||
u64 frame_buffer_base;
|
||||
unsigned long frame_buffer_size;
|
||||
} __packed;
|
||||
|
||||
struct efi_graphics_output_protocol_32 {
|
||||
u32 query_mode;
|
||||
u32 set_mode;
|
||||
u32 blt;
|
||||
u32 mode;
|
||||
};
|
||||
|
||||
struct efi_graphics_output_protocol_64 {
|
||||
u64 query_mode;
|
||||
u64 set_mode;
|
||||
u64 blt;
|
||||
u64 mode;
|
||||
};
|
||||
|
||||
struct efi_graphics_output_protocol {
|
||||
void *query_mode;
|
||||
unsigned long set_mode;
|
||||
unsigned long blt;
|
||||
struct efi_graphics_output_protocol_mode *mode;
|
||||
};
|
||||
|
||||
struct efi_uga_draw_protocol_32 {
|
||||
u32 get_mode;
|
||||
u32 set_mode;
|
||||
u32 blt;
|
||||
};
|
||||
|
||||
struct efi_uga_draw_protocol_64 {
|
||||
u64 get_mode;
|
||||
u64 set_mode;
|
||||
u64 blt;
|
||||
};
|
||||
|
||||
struct efi_uga_draw_protocol {
|
||||
void *get_mode;
|
||||
void *set_mode;
|
||||
void *blt;
|
||||
};
|
||||
|
||||
struct efi_config {
|
||||
u64 image_handle;
|
||||
u64 table;
|
||||
u64 allocate_pool;
|
||||
u64 allocate_pages;
|
||||
u64 get_memory_map;
|
||||
u64 free_pool;
|
||||
u64 free_pages;
|
||||
u64 locate_handle;
|
||||
u64 handle_protocol;
|
||||
u64 exit_boot_services;
|
||||
u64 text_output;
|
||||
efi_status_t (*call)(unsigned long, ...);
|
||||
bool is64;
|
||||
} __packed;
|
||||
|
||||
#endif /* BOOT_COMPRESSED_EBOOT_H */
|
||||
86
arch/x86/boot/compressed/efi_stub_32.S
Normal file
86
arch/x86/boot/compressed/efi_stub_32.S
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
/*
|
||||
* EFI call stub for IA32.
|
||||
*
|
||||
* This stub allows us to make EFI calls in physical mode with interrupts
|
||||
* turned off. Note that this implementation is different from the one in
|
||||
* arch/x86/platform/efi/efi_stub_32.S because we're _already_ in physical
|
||||
* mode at this point.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/page_types.h>
|
||||
|
||||
/*
|
||||
* efi_call_phys(void *, ...) is a function with variable parameters.
|
||||
* All the callers of this function assure that all the parameters are 4-bytes.
|
||||
*/
|
||||
|
||||
/*
|
||||
* In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save.
|
||||
* So we'd better save all of them at the beginning of this function and restore
|
||||
* at the end no matter how many we use, because we can not assure EFI runtime
|
||||
* service functions will comply with gcc calling convention, too.
|
||||
*/
|
||||
|
||||
.text
|
||||
ENTRY(efi_call_phys)
|
||||
/*
|
||||
* 0. The function can only be called in Linux kernel. So CS has been
|
||||
* set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
|
||||
* the values of these registers are the same. And, the corresponding
|
||||
* GDT entries are identical. So I will do nothing about segment reg
|
||||
* and GDT, but change GDT base register in prelog and epilog.
|
||||
*/
|
||||
|
||||
/*
|
||||
* 1. Because we haven't been relocated by this point we need to
|
||||
* use relative addressing.
|
||||
*/
|
||||
call 1f
|
||||
1: popl %edx
|
||||
subl $1b, %edx
|
||||
|
||||
/*
|
||||
* 2. Now on the top of stack is the return
|
||||
* address in the caller of efi_call_phys(), then parameter 1,
|
||||
* parameter 2, ..., param n. To make things easy, we save the return
|
||||
* address of efi_call_phys in a global variable.
|
||||
*/
|
||||
popl %ecx
|
||||
movl %ecx, saved_return_addr(%edx)
|
||||
/* get the function pointer into ECX*/
|
||||
popl %ecx
|
||||
movl %ecx, efi_rt_function_ptr(%edx)
|
||||
|
||||
/*
|
||||
* 3. Call the physical function.
|
||||
*/
|
||||
call *%ecx
|
||||
|
||||
/*
|
||||
* 4. Balance the stack. And because EAX contain the return value,
|
||||
* we'd better not clobber it. We need to calculate our address
|
||||
* again because %ecx and %edx are not preserved across EFI function
|
||||
* calls.
|
||||
*/
|
||||
call 1f
|
||||
1: popl %edx
|
||||
subl $1b, %edx
|
||||
|
||||
movl efi_rt_function_ptr(%edx), %ecx
|
||||
pushl %ecx
|
||||
|
||||
/*
|
||||
* 10. Push the saved return address onto the stack and return.
|
||||
*/
|
||||
movl saved_return_addr(%edx), %ecx
|
||||
pushl %ecx
|
||||
ret
|
||||
ENDPROC(efi_call_phys)
|
||||
.previous
|
||||
|
||||
.data
|
||||
saved_return_addr:
|
||||
.long 0
|
||||
efi_rt_function_ptr:
|
||||
.long 0
|
||||
5
arch/x86/boot/compressed/efi_stub_64.S
Normal file
5
arch/x86/boot/compressed/efi_stub_64.S
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
#include <asm/segment.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/processor-flags.h>
|
||||
|
||||
#include "../../platform/efi/efi_stub_64.S"
|
||||
196
arch/x86/boot/compressed/efi_thunk_64.S
Normal file
196
arch/x86/boot/compressed/efi_thunk_64.S
Normal file
|
|
@ -0,0 +1,196 @@
|
|||
/*
|
||||
* Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming
|
||||
*
|
||||
* Early support for invoking 32-bit EFI services from a 64-bit kernel.
|
||||
*
|
||||
* Because this thunking occurs before ExitBootServices() we have to
|
||||
* restore the firmware's 32-bit GDT before we make EFI serivce calls,
|
||||
* since the firmware's 32-bit IDT is still currently installed and it
|
||||
* needs to be able to service interrupts.
|
||||
*
|
||||
* On the plus side, we don't have to worry about mangling 64-bit
|
||||
* addresses into 32-bits because we're executing with an identify
|
||||
* mapped pagetable and haven't transitioned to 64-bit virtual addresses
|
||||
* yet.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/page_types.h>
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/segment.h>
|
||||
|
||||
.code64
|
||||
.text
|
||||
ENTRY(efi64_thunk)
|
||||
push %rbp
|
||||
push %rbx
|
||||
|
||||
subq $8, %rsp
|
||||
leaq efi_exit32(%rip), %rax
|
||||
movl %eax, 4(%rsp)
|
||||
leaq efi_gdt64(%rip), %rax
|
||||
movl %eax, (%rsp)
|
||||
movl %eax, 2(%rax) /* Fixup the gdt base address */
|
||||
|
||||
movl %ds, %eax
|
||||
push %rax
|
||||
movl %es, %eax
|
||||
push %rax
|
||||
movl %ss, %eax
|
||||
push %rax
|
||||
|
||||
/*
|
||||
* Convert x86-64 ABI params to i386 ABI
|
||||
*/
|
||||
subq $32, %rsp
|
||||
movl %esi, 0x0(%rsp)
|
||||
movl %edx, 0x4(%rsp)
|
||||
movl %ecx, 0x8(%rsp)
|
||||
movq %r8, %rsi
|
||||
movl %esi, 0xc(%rsp)
|
||||
movq %r9, %rsi
|
||||
movl %esi, 0x10(%rsp)
|
||||
|
||||
sgdt save_gdt(%rip)
|
||||
|
||||
leaq 1f(%rip), %rbx
|
||||
movq %rbx, func_rt_ptr(%rip)
|
||||
|
||||
/*
|
||||
* Switch to gdt with 32-bit segments. This is the firmware GDT
|
||||
* that was installed when the kernel started executing. This
|
||||
* pointer was saved at the EFI stub entry point in head_64.S.
|
||||
*/
|
||||
leaq efi32_boot_gdt(%rip), %rax
|
||||
lgdt (%rax)
|
||||
|
||||
pushq $__KERNEL_CS
|
||||
leaq efi_enter32(%rip), %rax
|
||||
pushq %rax
|
||||
lretq
|
||||
|
||||
1: addq $32, %rsp
|
||||
|
||||
lgdt save_gdt(%rip)
|
||||
|
||||
pop %rbx
|
||||
movl %ebx, %ss
|
||||
pop %rbx
|
||||
movl %ebx, %es
|
||||
pop %rbx
|
||||
movl %ebx, %ds
|
||||
|
||||
/*
|
||||
* Convert 32-bit status code into 64-bit.
|
||||
*/
|
||||
test %rax, %rax
|
||||
jz 1f
|
||||
movl %eax, %ecx
|
||||
andl $0x0fffffff, %ecx
|
||||
andl $0xf0000000, %eax
|
||||
shl $32, %rax
|
||||
or %rcx, %rax
|
||||
1:
|
||||
addq $8, %rsp
|
||||
pop %rbx
|
||||
pop %rbp
|
||||
ret
|
||||
ENDPROC(efi64_thunk)
|
||||
|
||||
ENTRY(efi_exit32)
|
||||
movq func_rt_ptr(%rip), %rax
|
||||
push %rax
|
||||
mov %rdi, %rax
|
||||
ret
|
||||
ENDPROC(efi_exit32)
|
||||
|
||||
.code32
|
||||
/*
|
||||
* EFI service pointer must be in %edi.
|
||||
*
|
||||
* The stack should represent the 32-bit calling convention.
|
||||
*/
|
||||
ENTRY(efi_enter32)
|
||||
movl $__KERNEL_DS, %eax
|
||||
movl %eax, %ds
|
||||
movl %eax, %es
|
||||
movl %eax, %ss
|
||||
|
||||
/* Reload pgtables */
|
||||
movl %cr3, %eax
|
||||
movl %eax, %cr3
|
||||
|
||||
/* Disable paging */
|
||||
movl %cr0, %eax
|
||||
btrl $X86_CR0_PG_BIT, %eax
|
||||
movl %eax, %cr0
|
||||
|
||||
/* Disable long mode via EFER */
|
||||
movl $MSR_EFER, %ecx
|
||||
rdmsr
|
||||
btrl $_EFER_LME, %eax
|
||||
wrmsr
|
||||
|
||||
call *%edi
|
||||
|
||||
/* We must preserve return value */
|
||||
movl %eax, %edi
|
||||
|
||||
/*
|
||||
* Some firmware will return with interrupts enabled. Be sure to
|
||||
* disable them before we switch GDTs.
|
||||
*/
|
||||
cli
|
||||
|
||||
movl 56(%esp), %eax
|
||||
movl %eax, 2(%eax)
|
||||
lgdtl (%eax)
|
||||
|
||||
movl %cr4, %eax
|
||||
btsl $(X86_CR4_PAE_BIT), %eax
|
||||
movl %eax, %cr4
|
||||
|
||||
movl %cr3, %eax
|
||||
movl %eax, %cr3
|
||||
|
||||
movl $MSR_EFER, %ecx
|
||||
rdmsr
|
||||
btsl $_EFER_LME, %eax
|
||||
wrmsr
|
||||
|
||||
xorl %eax, %eax
|
||||
lldt %ax
|
||||
|
||||
movl 60(%esp), %eax
|
||||
pushl $__KERNEL_CS
|
||||
pushl %eax
|
||||
|
||||
/* Enable paging */
|
||||
movl %cr0, %eax
|
||||
btsl $X86_CR0_PG_BIT, %eax
|
||||
movl %eax, %cr0
|
||||
lret
|
||||
ENDPROC(efi_enter32)
|
||||
|
||||
.data
|
||||
.balign 8
|
||||
.global efi32_boot_gdt
|
||||
efi32_boot_gdt: .word 0
|
||||
.quad 0
|
||||
|
||||
save_gdt: .word 0
|
||||
.quad 0
|
||||
func_rt_ptr: .quad 0
|
||||
|
||||
.global efi_gdt64
|
||||
efi_gdt64:
|
||||
.word efi_gdt64_end - efi_gdt64
|
||||
.long 0 /* Filled out by user */
|
||||
.word 0
|
||||
.quad 0x0000000000000000 /* NULL descriptor */
|
||||
.quad 0x00af9a000000ffff /* __KERNEL_CS */
|
||||
.quad 0x00cf92000000ffff /* __KERNEL_DS */
|
||||
.quad 0x0080890000000000 /* TS descriptor */
|
||||
.quad 0x0000000000000000 /* TS continued */
|
||||
efi_gdt64_end:
|
||||
247
arch/x86/boot/compressed/head_32.S
Normal file
247
arch/x86/boot/compressed/head_32.S
Normal file
|
|
@ -0,0 +1,247 @@
|
|||
/*
|
||||
* linux/boot/head.S
|
||||
*
|
||||
* Copyright (C) 1991, 1992, 1993 Linus Torvalds
|
||||
*/
|
||||
|
||||
/*
|
||||
* head.S contains the 32-bit startup code.
|
||||
*
|
||||
* NOTE!!! Startup happens at absolute address 0x00001000, which is also where
|
||||
* the page directory will exist. The startup code will be overwritten by
|
||||
* the page directory. [According to comments etc elsewhere on a compressed
|
||||
* kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
|
||||
*
|
||||
* Page 0 is deliberately kept safe, since System Management Mode code in
|
||||
* laptops may need to access the BIOS data stored there. This is also
|
||||
* useful for future device drivers that either access the BIOS via VM86
|
||||
* mode.
|
||||
*/
|
||||
|
||||
/*
|
||||
* High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
|
||||
*/
|
||||
.text
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/segment.h>
|
||||
#include <asm/page_types.h>
|
||||
#include <asm/boot.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
__HEAD
|
||||
ENTRY(startup_32)
|
||||
#ifdef CONFIG_EFI_STUB
|
||||
jmp preferred_addr
|
||||
|
||||
/*
|
||||
* We don't need the return address, so set up the stack so
|
||||
* efi_main() can find its arguments.
|
||||
*/
|
||||
ENTRY(efi_pe_entry)
|
||||
add $0x4, %esp
|
||||
|
||||
call 1f
|
||||
1: popl %esi
|
||||
subl $1b, %esi
|
||||
|
||||
popl %ecx
|
||||
movl %ecx, efi32_config(%esi) /* Handle */
|
||||
popl %ecx
|
||||
movl %ecx, efi32_config+8(%esi) /* EFI System table pointer */
|
||||
|
||||
/* Relocate efi_config->call() */
|
||||
leal efi32_config(%esi), %eax
|
||||
add %esi, 88(%eax)
|
||||
pushl %eax
|
||||
|
||||
call make_boot_params
|
||||
cmpl $0, %eax
|
||||
je fail
|
||||
movl %esi, BP_code32_start(%eax)
|
||||
popl %ecx
|
||||
pushl %eax
|
||||
pushl %ecx
|
||||
jmp 2f /* Skip efi_config initialization */
|
||||
|
||||
ENTRY(efi32_stub_entry)
|
||||
add $0x4, %esp
|
||||
popl %ecx
|
||||
popl %edx
|
||||
|
||||
call 1f
|
||||
1: popl %esi
|
||||
subl $1b, %esi
|
||||
|
||||
movl %ecx, efi32_config(%esi) /* Handle */
|
||||
movl %edx, efi32_config+8(%esi) /* EFI System table pointer */
|
||||
|
||||
/* Relocate efi_config->call() */
|
||||
leal efi32_config(%esi), %eax
|
||||
add %esi, 88(%eax)
|
||||
pushl %eax
|
||||
2:
|
||||
call efi_main
|
||||
cmpl $0, %eax
|
||||
movl %eax, %esi
|
||||
jne 2f
|
||||
fail:
|
||||
/* EFI init failed, so hang. */
|
||||
hlt
|
||||
jmp fail
|
||||
2:
|
||||
movl BP_code32_start(%esi), %eax
|
||||
leal preferred_addr(%eax), %eax
|
||||
jmp *%eax
|
||||
|
||||
preferred_addr:
|
||||
#endif
|
||||
cld
|
||||
/*
|
||||
* Test KEEP_SEGMENTS flag to see if the bootloader is asking
|
||||
* us to not reload segments
|
||||
*/
|
||||
testb $(1<<6), BP_loadflags(%esi)
|
||||
jnz 1f
|
||||
|
||||
cli
|
||||
movl $__BOOT_DS, %eax
|
||||
movl %eax, %ds
|
||||
movl %eax, %es
|
||||
movl %eax, %fs
|
||||
movl %eax, %gs
|
||||
movl %eax, %ss
|
||||
1:
|
||||
|
||||
/*
|
||||
* Calculate the delta between where we were compiled to run
|
||||
* at and where we were actually loaded at. This can only be done
|
||||
* with a short local call on x86. Nothing else will tell us what
|
||||
* address we are running at. The reserved chunk of the real-mode
|
||||
* data at 0x1e4 (defined as a scratch field) are used as the stack
|
||||
* for this calculation. Only 4 bytes are needed.
|
||||
*/
|
||||
leal (BP_scratch+4)(%esi), %esp
|
||||
call 1f
|
||||
1: popl %ebp
|
||||
subl $1b, %ebp
|
||||
|
||||
/*
|
||||
* %ebp contains the address we are loaded at by the boot loader and %ebx
|
||||
* contains the address where we should move the kernel image temporarily
|
||||
* for safe in-place decompression.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_RELOCATABLE
|
||||
movl %ebp, %ebx
|
||||
movl BP_kernel_alignment(%esi), %eax
|
||||
decl %eax
|
||||
addl %eax, %ebx
|
||||
notl %eax
|
||||
andl %eax, %ebx
|
||||
cmpl $LOAD_PHYSICAL_ADDR, %ebx
|
||||
jge 1f
|
||||
#endif
|
||||
movl $LOAD_PHYSICAL_ADDR, %ebx
|
||||
1:
|
||||
|
||||
/* Target address to relocate to for decompression */
|
||||
addl $z_extract_offset, %ebx
|
||||
|
||||
/* Set up the stack */
|
||||
leal boot_stack_end(%ebx), %esp
|
||||
|
||||
/* Zero EFLAGS */
|
||||
pushl $0
|
||||
popfl
|
||||
|
||||
/*
|
||||
* Copy the compressed kernel to the end of our buffer
|
||||
* where decompression in place becomes safe.
|
||||
*/
|
||||
pushl %esi
|
||||
leal (_bss-4)(%ebp), %esi
|
||||
leal (_bss-4)(%ebx), %edi
|
||||
movl $(_bss - startup_32), %ecx
|
||||
shrl $2, %ecx
|
||||
std
|
||||
rep movsl
|
||||
cld
|
||||
popl %esi
|
||||
|
||||
/*
|
||||
* Jump to the relocated address.
|
||||
*/
|
||||
leal relocated(%ebx), %eax
|
||||
jmp *%eax
|
||||
ENDPROC(startup_32)
|
||||
|
||||
.text
|
||||
relocated:
|
||||
|
||||
/*
|
||||
* Clear BSS (stack is currently empty)
|
||||
*/
|
||||
xorl %eax, %eax
|
||||
leal _bss(%ebx), %edi
|
||||
leal _ebss(%ebx), %ecx
|
||||
subl %edi, %ecx
|
||||
shrl $2, %ecx
|
||||
rep stosl
|
||||
|
||||
/*
|
||||
* Adjust our own GOT
|
||||
*/
|
||||
leal _got(%ebx), %edx
|
||||
leal _egot(%ebx), %ecx
|
||||
1:
|
||||
cmpl %ecx, %edx
|
||||
jae 2f
|
||||
addl %ebx, (%edx)
|
||||
addl $4, %edx
|
||||
jmp 1b
|
||||
2:
|
||||
|
||||
/*
|
||||
* Do the decompression, and jump to the new kernel..
|
||||
*/
|
||||
/* push arguments for decompress_kernel: */
|
||||
pushl $z_run_size /* size of kernel with .bss and .brk */
|
||||
pushl $z_output_len /* decompressed length, end of relocs */
|
||||
leal z_extract_offset_negative(%ebx), %ebp
|
||||
pushl %ebp /* output address */
|
||||
pushl $z_input_len /* input_len */
|
||||
leal input_data(%ebx), %eax
|
||||
pushl %eax /* input_data */
|
||||
leal boot_heap(%ebx), %eax
|
||||
pushl %eax /* heap area */
|
||||
pushl %esi /* real mode pointer */
|
||||
call decompress_kernel /* returns kernel location in %eax */
|
||||
addl $28, %esp
|
||||
|
||||
/*
|
||||
* Jump to the decompressed kernel.
|
||||
*/
|
||||
xorl %ebx, %ebx
|
||||
jmp *%eax
|
||||
|
||||
#ifdef CONFIG_EFI_STUB
|
||||
.data
|
||||
efi32_config:
|
||||
.fill 11,8,0
|
||||
.long efi_call_phys
|
||||
.long 0
|
||||
.byte 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Stack and heap for uncompression
|
||||
*/
|
||||
.bss
|
||||
.balign 4
|
||||
boot_heap:
|
||||
.fill BOOT_HEAP_SIZE, 1, 0
|
||||
boot_stack:
|
||||
.fill BOOT_STACK_SIZE, 1, 0
|
||||
boot_stack_end:
|
||||
479
arch/x86/boot/compressed/head_64.S
Normal file
479
arch/x86/boot/compressed/head_64.S
Normal file
|
|
@ -0,0 +1,479 @@
|
|||
/*
|
||||
* linux/boot/head.S
|
||||
*
|
||||
* Copyright (C) 1991, 1992, 1993 Linus Torvalds
|
||||
*/
|
||||
|
||||
/*
|
||||
* head.S contains the 32-bit startup code.
|
||||
*
|
||||
* NOTE!!! Startup happens at absolute address 0x00001000, which is also where
|
||||
* the page directory will exist. The startup code will be overwritten by
|
||||
* the page directory. [According to comments etc elsewhere on a compressed
|
||||
* kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
|
||||
*
|
||||
* Page 0 is deliberately kept safe, since System Management Mode code in
|
||||
* laptops may need to access the BIOS data stored there. This is also
|
||||
* useful for future device drivers that either access the BIOS via VM86
|
||||
* mode.
|
||||
*/
|
||||
|
||||
/*
|
||||
* High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
|
||||
*/
|
||||
.code32
|
||||
.text
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/segment.h>
|
||||
#include <asm/boot.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
__HEAD
|
||||
.code32
|
||||
ENTRY(startup_32)
|
||||
/*
|
||||
* 32bit entry is 0 and it is ABI so immutable!
|
||||
* If we come here directly from a bootloader,
|
||||
* kernel(text+data+bss+brk) ramdisk, zero_page, command line
|
||||
* all need to be under the 4G limit.
|
||||
*/
|
||||
cld
|
||||
/*
|
||||
* Test KEEP_SEGMENTS flag to see if the bootloader is asking
|
||||
* us to not reload segments
|
||||
*/
|
||||
testb $(1<<6), BP_loadflags(%esi)
|
||||
jnz 1f
|
||||
|
||||
cli
|
||||
movl $(__BOOT_DS), %eax
|
||||
movl %eax, %ds
|
||||
movl %eax, %es
|
||||
movl %eax, %ss
|
||||
1:
|
||||
|
||||
/*
|
||||
* Calculate the delta between where we were compiled to run
|
||||
* at and where we were actually loaded at. This can only be done
|
||||
* with a short local call on x86. Nothing else will tell us what
|
||||
* address we are running at. The reserved chunk of the real-mode
|
||||
* data at 0x1e4 (defined as a scratch field) are used as the stack
|
||||
* for this calculation. Only 4 bytes are needed.
|
||||
*/
|
||||
leal (BP_scratch+4)(%esi), %esp
|
||||
call 1f
|
||||
1: popl %ebp
|
||||
subl $1b, %ebp
|
||||
|
||||
/* setup a stack and make sure cpu supports long mode. */
|
||||
movl $boot_stack_end, %eax
|
||||
addl %ebp, %eax
|
||||
movl %eax, %esp
|
||||
|
||||
call verify_cpu
|
||||
testl %eax, %eax
|
||||
jnz no_longmode
|
||||
|
||||
/*
|
||||
* Compute the delta between where we were compiled to run at
|
||||
* and where the code will actually run at.
|
||||
*
|
||||
* %ebp contains the address we are loaded at by the boot loader and %ebx
|
||||
* contains the address where we should move the kernel image temporarily
|
||||
* for safe in-place decompression.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_RELOCATABLE
|
||||
movl %ebp, %ebx
|
||||
movl BP_kernel_alignment(%esi), %eax
|
||||
decl %eax
|
||||
addl %eax, %ebx
|
||||
notl %eax
|
||||
andl %eax, %ebx
|
||||
cmpl $LOAD_PHYSICAL_ADDR, %ebx
|
||||
jge 1f
|
||||
#endif
|
||||
movl $LOAD_PHYSICAL_ADDR, %ebx
|
||||
1:
|
||||
|
||||
/* Target address to relocate to for decompression */
|
||||
addl $z_extract_offset, %ebx
|
||||
|
||||
/*
|
||||
* Prepare for entering 64 bit mode
|
||||
*/
|
||||
|
||||
/* Load new GDT with the 64bit segments using 32bit descriptor */
|
||||
leal gdt(%ebp), %eax
|
||||
movl %eax, gdt+2(%ebp)
|
||||
lgdt gdt(%ebp)
|
||||
|
||||
/* Enable PAE mode */
|
||||
movl %cr4, %eax
|
||||
orl $X86_CR4_PAE, %eax
|
||||
movl %eax, %cr4
|
||||
|
||||
/*
|
||||
* Build early 4G boot pagetable
|
||||
*/
|
||||
/* Initialize Page tables to 0 */
|
||||
leal pgtable(%ebx), %edi
|
||||
xorl %eax, %eax
|
||||
movl $((4096*6)/4), %ecx
|
||||
rep stosl
|
||||
|
||||
/* Build Level 4 */
|
||||
leal pgtable + 0(%ebx), %edi
|
||||
leal 0x1007 (%edi), %eax
|
||||
movl %eax, 0(%edi)
|
||||
|
||||
/* Build Level 3 */
|
||||
leal pgtable + 0x1000(%ebx), %edi
|
||||
leal 0x1007(%edi), %eax
|
||||
movl $4, %ecx
|
||||
1: movl %eax, 0x00(%edi)
|
||||
addl $0x00001000, %eax
|
||||
addl $8, %edi
|
||||
decl %ecx
|
||||
jnz 1b
|
||||
|
||||
/* Build Level 2 */
|
||||
leal pgtable + 0x2000(%ebx), %edi
|
||||
movl $0x00000183, %eax
|
||||
movl $2048, %ecx
|
||||
1: movl %eax, 0(%edi)
|
||||
addl $0x00200000, %eax
|
||||
addl $8, %edi
|
||||
decl %ecx
|
||||
jnz 1b
|
||||
|
||||
/* Enable the boot page tables */
|
||||
leal pgtable(%ebx), %eax
|
||||
movl %eax, %cr3
|
||||
|
||||
/* Enable Long mode in EFER (Extended Feature Enable Register) */
|
||||
movl $MSR_EFER, %ecx
|
||||
rdmsr
|
||||
btsl $_EFER_LME, %eax
|
||||
wrmsr
|
||||
|
||||
/* After gdt is loaded */
|
||||
xorl %eax, %eax
|
||||
lldt %ax
|
||||
movl $0x20, %eax
|
||||
ltr %ax
|
||||
|
||||
/*
|
||||
* Setup for the jump to 64bit mode
|
||||
*
|
||||
* When the jump is performend we will be in long mode but
|
||||
* in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
|
||||
* (and in turn EFER.LMA = 1). To jump into 64bit mode we use
|
||||
* the new gdt/idt that has __KERNEL_CS with CS.L = 1.
|
||||
* We place all of the values on our mini stack so lret can
|
||||
* used to perform that far jump.
|
||||
*/
|
||||
pushl $__KERNEL_CS
|
||||
leal startup_64(%ebp), %eax
|
||||
#ifdef CONFIG_EFI_MIXED
|
||||
movl efi32_config(%ebp), %ebx
|
||||
cmp $0, %ebx
|
||||
jz 1f
|
||||
leal handover_entry(%ebp), %eax
|
||||
1:
|
||||
#endif
|
||||
pushl %eax
|
||||
|
||||
/* Enter paged protected Mode, activating Long Mode */
|
||||
movl $(X86_CR0_PG | X86_CR0_PE), %eax /* Enable Paging and Protected mode */
|
||||
movl %eax, %cr0
|
||||
|
||||
/* Jump from 32bit compatibility mode into 64bit mode. */
|
||||
lret
|
||||
ENDPROC(startup_32)
|
||||
|
||||
#ifdef CONFIG_EFI_MIXED
|
||||
.org 0x190
|
||||
ENTRY(efi32_stub_entry)
|
||||
add $0x4, %esp /* Discard return address */
|
||||
popl %ecx
|
||||
popl %edx
|
||||
popl %esi
|
||||
|
||||
leal (BP_scratch+4)(%esi), %esp
|
||||
call 1f
|
||||
1: pop %ebp
|
||||
subl $1b, %ebp
|
||||
|
||||
movl %ecx, efi32_config(%ebp)
|
||||
movl %edx, efi32_config+8(%ebp)
|
||||
sgdtl efi32_boot_gdt(%ebp)
|
||||
|
||||
leal efi32_config(%ebp), %eax
|
||||
movl %eax, efi_config(%ebp)
|
||||
|
||||
jmp startup_32
|
||||
ENDPROC(efi32_stub_entry)
|
||||
#endif
|
||||
|
||||
.code64
|
||||
.org 0x200
|
||||
ENTRY(startup_64)
|
||||
/*
|
||||
* 64bit entry is 0x200 and it is ABI so immutable!
|
||||
* We come here either from startup_32 or directly from a
|
||||
* 64bit bootloader.
|
||||
* If we come here from a bootloader, kernel(text+data+bss+brk),
|
||||
* ramdisk, zero_page, command line could be above 4G.
|
||||
* We depend on an identity mapped page table being provided
|
||||
* that maps our entire kernel(text+data+bss+brk), zero page
|
||||
* and command line.
|
||||
*/
|
||||
#ifdef CONFIG_EFI_STUB
|
||||
/*
|
||||
* The entry point for the PE/COFF executable is efi_pe_entry, so
|
||||
* only legacy boot loaders will execute this jmp.
|
||||
*/
|
||||
jmp preferred_addr
|
||||
|
||||
ENTRY(efi_pe_entry)
|
||||
movq %rcx, efi64_config(%rip) /* Handle */
|
||||
movq %rdx, efi64_config+8(%rip) /* EFI System table pointer */
|
||||
|
||||
leaq efi64_config(%rip), %rax
|
||||
movq %rax, efi_config(%rip)
|
||||
|
||||
call 1f
|
||||
1: popq %rbp
|
||||
subq $1b, %rbp
|
||||
|
||||
/*
|
||||
* Relocate efi_config->call().
|
||||
*/
|
||||
addq %rbp, efi64_config+88(%rip)
|
||||
|
||||
movq %rax, %rdi
|
||||
call make_boot_params
|
||||
cmpq $0,%rax
|
||||
je fail
|
||||
mov %rax, %rsi
|
||||
leaq startup_32(%rip), %rax
|
||||
movl %eax, BP_code32_start(%rsi)
|
||||
jmp 2f /* Skip the relocation */
|
||||
|
||||
handover_entry:
|
||||
call 1f
|
||||
1: popq %rbp
|
||||
subq $1b, %rbp
|
||||
|
||||
/*
|
||||
* Relocate efi_config->call().
|
||||
*/
|
||||
movq efi_config(%rip), %rax
|
||||
addq %rbp, 88(%rax)
|
||||
2:
|
||||
movq efi_config(%rip), %rdi
|
||||
call efi_main
|
||||
movq %rax,%rsi
|
||||
cmpq $0,%rax
|
||||
jne 2f
|
||||
fail:
|
||||
/* EFI init failed, so hang. */
|
||||
hlt
|
||||
jmp fail
|
||||
2:
|
||||
movl BP_code32_start(%esi), %eax
|
||||
leaq preferred_addr(%rax), %rax
|
||||
jmp *%rax
|
||||
|
||||
preferred_addr:
|
||||
#endif
|
||||
|
||||
/* Setup data segments. */
|
||||
xorl %eax, %eax
|
||||
movl %eax, %ds
|
||||
movl %eax, %es
|
||||
movl %eax, %ss
|
||||
movl %eax, %fs
|
||||
movl %eax, %gs
|
||||
|
||||
/*
|
||||
* Compute the decompressed kernel start address. It is where
|
||||
* we were loaded at aligned to a 2M boundary. %rbp contains the
|
||||
* decompressed kernel start address.
|
||||
*
|
||||
* If it is a relocatable kernel then decompress and run the kernel
|
||||
* from load address aligned to 2MB addr, otherwise decompress and
|
||||
* run the kernel from LOAD_PHYSICAL_ADDR
|
||||
*
|
||||
* We cannot rely on the calculation done in 32-bit mode, since we
|
||||
* may have been invoked via the 64-bit entry point.
|
||||
*/
|
||||
|
||||
/* Start with the delta to where the kernel will run at. */
|
||||
#ifdef CONFIG_RELOCATABLE
|
||||
leaq startup_32(%rip) /* - $startup_32 */, %rbp
|
||||
movl BP_kernel_alignment(%rsi), %eax
|
||||
decl %eax
|
||||
addq %rax, %rbp
|
||||
notq %rax
|
||||
andq %rax, %rbp
|
||||
cmpq $LOAD_PHYSICAL_ADDR, %rbp
|
||||
jge 1f
|
||||
#endif
|
||||
movq $LOAD_PHYSICAL_ADDR, %rbp
|
||||
1:
|
||||
|
||||
/* Target address to relocate to for decompression */
|
||||
leaq z_extract_offset(%rbp), %rbx
|
||||
|
||||
/* Set up the stack */
|
||||
leaq boot_stack_end(%rbx), %rsp
|
||||
|
||||
/* Zero EFLAGS */
|
||||
pushq $0
|
||||
popfq
|
||||
|
||||
/*
|
||||
* Copy the compressed kernel to the end of our buffer
|
||||
* where decompression in place becomes safe.
|
||||
*/
|
||||
pushq %rsi
|
||||
leaq (_bss-8)(%rip), %rsi
|
||||
leaq (_bss-8)(%rbx), %rdi
|
||||
movq $_bss /* - $startup_32 */, %rcx
|
||||
shrq $3, %rcx
|
||||
std
|
||||
rep movsq
|
||||
cld
|
||||
popq %rsi
|
||||
|
||||
/*
|
||||
* Jump to the relocated address.
|
||||
*/
|
||||
leaq relocated(%rbx), %rax
|
||||
jmp *%rax
|
||||
|
||||
#ifdef CONFIG_EFI_STUB
|
||||
.org 0x390
|
||||
ENTRY(efi64_stub_entry)
|
||||
movq %rdi, efi64_config(%rip) /* Handle */
|
||||
movq %rsi, efi64_config+8(%rip) /* EFI System table pointer */
|
||||
|
||||
leaq efi64_config(%rip), %rax
|
||||
movq %rax, efi_config(%rip)
|
||||
|
||||
movq %rdx, %rsi
|
||||
jmp handover_entry
|
||||
ENDPROC(efi64_stub_entry)
|
||||
#endif
|
||||
|
||||
.text
|
||||
relocated:
|
||||
|
||||
/*
|
||||
* Clear BSS (stack is currently empty)
|
||||
*/
|
||||
xorl %eax, %eax
|
||||
leaq _bss(%rip), %rdi
|
||||
leaq _ebss(%rip), %rcx
|
||||
subq %rdi, %rcx
|
||||
shrq $3, %rcx
|
||||
rep stosq
|
||||
|
||||
/*
|
||||
* Adjust our own GOT
|
||||
*/
|
||||
leaq _got(%rip), %rdx
|
||||
leaq _egot(%rip), %rcx
|
||||
1:
|
||||
cmpq %rcx, %rdx
|
||||
jae 2f
|
||||
addq %rbx, (%rdx)
|
||||
addq $8, %rdx
|
||||
jmp 1b
|
||||
2:
|
||||
|
||||
/*
|
||||
* Do the decompression, and jump to the new kernel..
|
||||
*/
|
||||
pushq %rsi /* Save the real mode argument */
|
||||
movq $z_run_size, %r9 /* size of kernel with .bss and .brk */
|
||||
pushq %r9
|
||||
movq %rsi, %rdi /* real mode address */
|
||||
leaq boot_heap(%rip), %rsi /* malloc area for uncompression */
|
||||
leaq input_data(%rip), %rdx /* input_data */
|
||||
movl $z_input_len, %ecx /* input_len */
|
||||
movq %rbp, %r8 /* output target address */
|
||||
movq $z_output_len, %r9 /* decompressed length, end of relocs */
|
||||
call decompress_kernel /* returns kernel location in %rax */
|
||||
popq %r9
|
||||
popq %rsi
|
||||
|
||||
/*
|
||||
* Jump to the decompressed kernel.
|
||||
*/
|
||||
jmp *%rax
|
||||
|
||||
.code32
|
||||
no_longmode:
|
||||
/* This isn't an x86-64 CPU so hang */
|
||||
1:
|
||||
hlt
|
||||
jmp 1b
|
||||
|
||||
#include "../../kernel/verify_cpu.S"
|
||||
|
||||
.data
|
||||
gdt:
|
||||
.word gdt_end - gdt
|
||||
.long gdt
|
||||
.word 0
|
||||
.quad 0x0000000000000000 /* NULL descriptor */
|
||||
.quad 0x00af9a000000ffff /* __KERNEL_CS */
|
||||
.quad 0x00cf92000000ffff /* __KERNEL_DS */
|
||||
.quad 0x0080890000000000 /* TS descriptor */
|
||||
.quad 0x0000000000000000 /* TS continued */
|
||||
gdt_end:
|
||||
|
||||
#ifdef CONFIG_EFI_STUB
|
||||
efi_config:
|
||||
.quad 0
|
||||
|
||||
#ifdef CONFIG_EFI_MIXED
|
||||
.global efi32_config
|
||||
efi32_config:
|
||||
.fill 11,8,0
|
||||
.quad efi64_thunk
|
||||
.byte 0
|
||||
#endif
|
||||
|
||||
.global efi64_config
|
||||
efi64_config:
|
||||
.fill 11,8,0
|
||||
.quad efi_call
|
||||
.byte 1
|
||||
#endif /* CONFIG_EFI_STUB */
|
||||
|
||||
/*
|
||||
* Stack and heap for uncompression
|
||||
*/
|
||||
.bss
|
||||
.balign 4
|
||||
boot_heap:
|
||||
.fill BOOT_HEAP_SIZE, 1, 0
|
||||
boot_stack:
|
||||
.fill BOOT_STACK_SIZE, 1, 0
|
||||
boot_stack_end:
|
||||
|
||||
/*
|
||||
* Space for page tables (not in .bss so not zeroed)
|
||||
*/
|
||||
.section ".pgtable","a",@nobits
|
||||
.balign 4096
|
||||
pgtable:
|
||||
.fill 6*4096, 1, 0
|
||||
422
arch/x86/boot/compressed/misc.c
Normal file
422
arch/x86/boot/compressed/misc.c
Normal file
|
|
@ -0,0 +1,422 @@
|
|||
/*
|
||||
* misc.c
|
||||
*
|
||||
* This is a collection of several routines from gzip-1.0.3
|
||||
* adapted for Linux.
|
||||
*
|
||||
* malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
|
||||
* puts by Nick Holloway 1993, better puts by Martin Mares 1995
|
||||
* High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
|
||||
*/
|
||||
|
||||
#include "misc.h"
|
||||
#include "../string.h"
|
||||
|
||||
/* WARNING!!
|
||||
* This code is compiled with -fPIC and it is relocated dynamically
|
||||
* at run time, but no relocation processing is performed.
|
||||
* This means that it is not safe to place pointers in static structures.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Getting to provable safe in place decompression is hard.
|
||||
* Worst case behaviours need to be analyzed.
|
||||
* Background information:
|
||||
*
|
||||
* The file layout is:
|
||||
* magic[2]
|
||||
* method[1]
|
||||
* flags[1]
|
||||
* timestamp[4]
|
||||
* extraflags[1]
|
||||
* os[1]
|
||||
* compressed data blocks[N]
|
||||
* crc[4] orig_len[4]
|
||||
*
|
||||
* resulting in 18 bytes of non compressed data overhead.
|
||||
*
|
||||
* Files divided into blocks
|
||||
* 1 bit (last block flag)
|
||||
* 2 bits (block type)
|
||||
*
|
||||
* 1 block occurs every 32K -1 bytes or when there 50% compression
|
||||
* has been achieved. The smallest block type encoding is always used.
|
||||
*
|
||||
* stored:
|
||||
* 32 bits length in bytes.
|
||||
*
|
||||
* fixed:
|
||||
* magic fixed tree.
|
||||
* symbols.
|
||||
*
|
||||
* dynamic:
|
||||
* dynamic tree encoding.
|
||||
* symbols.
|
||||
*
|
||||
*
|
||||
* The buffer for decompression in place is the length of the
|
||||
* uncompressed data, plus a small amount extra to keep the algorithm safe.
|
||||
* The compressed data is placed at the end of the buffer. The output
|
||||
* pointer is placed at the start of the buffer and the input pointer
|
||||
* is placed where the compressed data starts. Problems will occur
|
||||
* when the output pointer overruns the input pointer.
|
||||
*
|
||||
* The output pointer can only overrun the input pointer if the input
|
||||
* pointer is moving faster than the output pointer. A condition only
|
||||
* triggered by data whose compressed form is larger than the uncompressed
|
||||
* form.
|
||||
*
|
||||
* The worst case at the block level is a growth of the compressed data
|
||||
* of 5 bytes per 32767 bytes.
|
||||
*
|
||||
* The worst case internal to a compressed block is very hard to figure.
|
||||
* The worst case can at least be boundined by having one bit that represents
|
||||
* 32764 bytes and then all of the rest of the bytes representing the very
|
||||
* very last byte.
|
||||
*
|
||||
* All of which is enough to compute an amount of extra data that is required
|
||||
* to be safe. To avoid problems at the block level allocating 5 extra bytes
|
||||
* per 32767 bytes of data is sufficient. To avoind problems internal to a
|
||||
* block adding an extra 32767 bytes (the worst case uncompressed block size)
|
||||
* is sufficient, to ensure that in the worst case the decompressed data for
|
||||
* block will stop the byte before the compressed data for a block begins.
|
||||
* To avoid problems with the compressed data's meta information an extra 18
|
||||
* bytes are needed. Leading to the formula:
|
||||
*
|
||||
* extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size.
|
||||
*
|
||||
* Adding 8 bytes per 32K is a bit excessive but much easier to calculate.
|
||||
* Adding 32768 instead of 32767 just makes for round numbers.
|
||||
* Adding the decompressor_size is necessary as it musht live after all
|
||||
* of the data as well. Last I measured the decompressor is about 14K.
|
||||
* 10K of actual data and 4K of bss.
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* gzip declarations
|
||||
*/
|
||||
#define STATIC static
|
||||
|
||||
#undef memcpy
|
||||
|
||||
/*
|
||||
* Use a normal definition of memset() from string.c. There are already
|
||||
* included header files which expect a definition of memset() and by
|
||||
* the time we define memset macro, it is too late.
|
||||
*/
|
||||
#undef memset
|
||||
#define memzero(s, n) memset((s), 0, (n))
|
||||
|
||||
|
||||
static void error(char *m);
|
||||
|
||||
/*
|
||||
* This is set up by the setup-routine at boot-time
|
||||
*/
|
||||
struct boot_params *real_mode; /* Pointer to real-mode data */
|
||||
|
||||
memptr free_mem_ptr;
|
||||
memptr free_mem_end_ptr;
|
||||
|
||||
static char *vidmem;
|
||||
static int vidport;
|
||||
static int lines, cols;
|
||||
|
||||
#ifdef CONFIG_KERNEL_GZIP
|
||||
#include "../../../../lib/decompress_inflate.c"
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KERNEL_BZIP2
|
||||
#include "../../../../lib/decompress_bunzip2.c"
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KERNEL_LZMA
|
||||
#include "../../../../lib/decompress_unlzma.c"
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KERNEL_XZ
|
||||
#include "../../../../lib/decompress_unxz.c"
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KERNEL_LZO
|
||||
#include "../../../../lib/decompress_unlzo.c"
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KERNEL_LZ4
|
||||
#include "../../../../lib/decompress_unlz4.c"
|
||||
#endif
|
||||
|
||||
static void scroll(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
memcpy(vidmem, vidmem + cols * 2, (lines - 1) * cols * 2);
|
||||
for (i = (lines - 1) * cols * 2; i < lines * cols * 2; i += 2)
|
||||
vidmem[i] = ' ';
|
||||
}
|
||||
|
||||
#define XMTRDY 0x20
|
||||
|
||||
#define TXR 0 /* Transmit register (WRITE) */
|
||||
#define LSR 5 /* Line Status */
|
||||
static void serial_putchar(int ch)
|
||||
{
|
||||
unsigned timeout = 0xffff;
|
||||
|
||||
while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout)
|
||||
cpu_relax();
|
||||
|
||||
outb(ch, early_serial_base + TXR);
|
||||
}
|
||||
|
||||
void __putstr(const char *s)
|
||||
{
|
||||
int x, y, pos;
|
||||
char c;
|
||||
|
||||
if (early_serial_base) {
|
||||
const char *str = s;
|
||||
while (*str) {
|
||||
if (*str == '\n')
|
||||
serial_putchar('\r');
|
||||
serial_putchar(*str++);
|
||||
}
|
||||
}
|
||||
|
||||
if (real_mode->screen_info.orig_video_mode == 0 &&
|
||||
lines == 0 && cols == 0)
|
||||
return;
|
||||
|
||||
x = real_mode->screen_info.orig_x;
|
||||
y = real_mode->screen_info.orig_y;
|
||||
|
||||
while ((c = *s++) != '\0') {
|
||||
if (c == '\n') {
|
||||
x = 0;
|
||||
if (++y >= lines) {
|
||||
scroll();
|
||||
y--;
|
||||
}
|
||||
} else {
|
||||
vidmem[(x + cols * y) * 2] = c;
|
||||
if (++x >= cols) {
|
||||
x = 0;
|
||||
if (++y >= lines) {
|
||||
scroll();
|
||||
y--;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
real_mode->screen_info.orig_x = x;
|
||||
real_mode->screen_info.orig_y = y;
|
||||
|
||||
pos = (x + cols * y) * 2; /* Update cursor position */
|
||||
outb(14, vidport);
|
||||
outb(0xff & (pos >> 9), vidport+1);
|
||||
outb(15, vidport);
|
||||
outb(0xff & (pos >> 1), vidport+1);
|
||||
}
|
||||
|
||||
static void error(char *x)
|
||||
{
|
||||
error_putstr("\n\n");
|
||||
error_putstr(x);
|
||||
error_putstr("\n\n -- System halted");
|
||||
|
||||
while (1)
|
||||
asm("hlt");
|
||||
}
|
||||
|
||||
#if CONFIG_X86_NEED_RELOCS
|
||||
static void handle_relocations(void *output, unsigned long output_len)
|
||||
{
|
||||
int *reloc;
|
||||
unsigned long delta, map, ptr;
|
||||
unsigned long min_addr = (unsigned long)output;
|
||||
unsigned long max_addr = min_addr + output_len;
|
||||
|
||||
/*
|
||||
* Calculate the delta between where vmlinux was linked to load
|
||||
* and where it was actually loaded.
|
||||
*/
|
||||
delta = min_addr - LOAD_PHYSICAL_ADDR;
|
||||
if (!delta) {
|
||||
debug_putstr("No relocation needed... ");
|
||||
return;
|
||||
}
|
||||
debug_putstr("Performing relocations... ");
|
||||
|
||||
/*
|
||||
* The kernel contains a table of relocation addresses. Those
|
||||
* addresses have the final load address of the kernel in virtual
|
||||
* memory. We are currently working in the self map. So we need to
|
||||
* create an adjustment for kernel memory addresses to the self map.
|
||||
* This will involve subtracting out the base address of the kernel.
|
||||
*/
|
||||
map = delta - __START_KERNEL_map;
|
||||
|
||||
/*
|
||||
* Process relocations: 32 bit relocations first then 64 bit after.
|
||||
* Two sets of binary relocations are added to the end of the kernel
|
||||
* before compression. Each relocation table entry is the kernel
|
||||
* address of the location which needs to be updated stored as a
|
||||
* 32-bit value which is sign extended to 64 bits.
|
||||
*
|
||||
* Format is:
|
||||
*
|
||||
* kernel bits...
|
||||
* 0 - zero terminator for 64 bit relocations
|
||||
* 64 bit relocation repeated
|
||||
* 0 - zero terminator for 32 bit relocations
|
||||
* 32 bit relocation repeated
|
||||
*
|
||||
* So we work backwards from the end of the decompressed image.
|
||||
*/
|
||||
for (reloc = output + output_len - sizeof(*reloc); *reloc; reloc--) {
|
||||
int extended = *reloc;
|
||||
extended += map;
|
||||
|
||||
ptr = (unsigned long)extended;
|
||||
if (ptr < min_addr || ptr > max_addr)
|
||||
error("32-bit relocation outside of kernel!\n");
|
||||
|
||||
*(uint32_t *)ptr += delta;
|
||||
}
|
||||
#ifdef CONFIG_X86_64
|
||||
for (reloc--; *reloc; reloc--) {
|
||||
long extended = *reloc;
|
||||
extended += map;
|
||||
|
||||
ptr = (unsigned long)extended;
|
||||
if (ptr < min_addr || ptr > max_addr)
|
||||
error("64-bit relocation outside of kernel!\n");
|
||||
|
||||
*(uint64_t *)ptr += delta;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
static inline void handle_relocations(void *output, unsigned long output_len)
|
||||
{ }
|
||||
#endif
|
||||
|
||||
static void parse_elf(void *output)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
Elf64_Ehdr ehdr;
|
||||
Elf64_Phdr *phdrs, *phdr;
|
||||
#else
|
||||
Elf32_Ehdr ehdr;
|
||||
Elf32_Phdr *phdrs, *phdr;
|
||||
#endif
|
||||
void *dest;
|
||||
int i;
|
||||
|
||||
memcpy(&ehdr, output, sizeof(ehdr));
|
||||
if (ehdr.e_ident[EI_MAG0] != ELFMAG0 ||
|
||||
ehdr.e_ident[EI_MAG1] != ELFMAG1 ||
|
||||
ehdr.e_ident[EI_MAG2] != ELFMAG2 ||
|
||||
ehdr.e_ident[EI_MAG3] != ELFMAG3) {
|
||||
error("Kernel is not a valid ELF file");
|
||||
return;
|
||||
}
|
||||
|
||||
debug_putstr("Parsing ELF... ");
|
||||
|
||||
phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum);
|
||||
if (!phdrs)
|
||||
error("Failed to allocate space for phdrs");
|
||||
|
||||
memcpy(phdrs, output + ehdr.e_phoff, sizeof(*phdrs) * ehdr.e_phnum);
|
||||
|
||||
for (i = 0; i < ehdr.e_phnum; i++) {
|
||||
phdr = &phdrs[i];
|
||||
|
||||
switch (phdr->p_type) {
|
||||
case PT_LOAD:
|
||||
#ifdef CONFIG_RELOCATABLE
|
||||
dest = output;
|
||||
dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR);
|
||||
#else
|
||||
dest = (void *)(phdr->p_paddr);
|
||||
#endif
|
||||
memcpy(dest,
|
||||
output + phdr->p_offset,
|
||||
phdr->p_filesz);
|
||||
break;
|
||||
default: /* Ignore other PT_* */ break;
|
||||
}
|
||||
}
|
||||
|
||||
free(phdrs);
|
||||
}
|
||||
|
||||
asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
|
||||
unsigned char *input_data,
|
||||
unsigned long input_len,
|
||||
unsigned char *output,
|
||||
unsigned long output_len,
|
||||
unsigned long run_size)
|
||||
{
|
||||
unsigned char *output_orig = output;
|
||||
|
||||
real_mode = rmode;
|
||||
|
||||
sanitize_boot_params(real_mode);
|
||||
|
||||
if (real_mode->screen_info.orig_video_mode == 7) {
|
||||
vidmem = (char *) 0xb0000;
|
||||
vidport = 0x3b4;
|
||||
} else {
|
||||
vidmem = (char *) 0xb8000;
|
||||
vidport = 0x3d4;
|
||||
}
|
||||
|
||||
lines = real_mode->screen_info.orig_video_lines;
|
||||
cols = real_mode->screen_info.orig_video_cols;
|
||||
|
||||
console_init();
|
||||
debug_putstr("early console in decompress_kernel\n");
|
||||
|
||||
free_mem_ptr = heap; /* Heap */
|
||||
free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
|
||||
|
||||
/*
|
||||
* The memory hole needed for the kernel is the larger of either
|
||||
* the entire decompressed kernel plus relocation table, or the
|
||||
* entire decompressed kernel plus .bss and .brk sections.
|
||||
*/
|
||||
output = choose_kernel_location(input_data, input_len, output,
|
||||
output_len > run_size ? output_len
|
||||
: run_size);
|
||||
|
||||
/* Validate memory location choices. */
|
||||
if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
|
||||
error("Destination address inappropriately aligned");
|
||||
#ifdef CONFIG_X86_64
|
||||
if (heap > 0x3fffffffffffUL)
|
||||
error("Destination address too large");
|
||||
#else
|
||||
if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff))
|
||||
error("Destination address too large");
|
||||
#endif
|
||||
#ifndef CONFIG_RELOCATABLE
|
||||
if ((unsigned long)output != LOAD_PHYSICAL_ADDR)
|
||||
error("Wrong destination address");
|
||||
#endif
|
||||
|
||||
debug_putstr("\nDecompressing Linux... ");
|
||||
decompress(input_data, input_len, NULL, NULL, output, NULL, error);
|
||||
parse_elf(output);
|
||||
/*
|
||||
* 32-bit always performs relocations. 64-bit relocations are only
|
||||
* needed if kASLR has chosen a different load address.
|
||||
*/
|
||||
if (!IS_ENABLED(CONFIG_X86_64) || output != output_orig)
|
||||
handle_relocations(output, output_len);
|
||||
debug_putstr("done.\nBooting the kernel.\n");
|
||||
return output;
|
||||
}
|
||||
86
arch/x86/boot/compressed/misc.h
Normal file
86
arch/x86/boot/compressed/misc.h
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
#ifndef BOOT_COMPRESSED_MISC_H
|
||||
#define BOOT_COMPRESSED_MISC_H
|
||||
|
||||
/*
|
||||
* we have to be careful, because no indirections are allowed here, and
|
||||
* paravirt_ops is a kind of one. As it will only run in baremetal anyway,
|
||||
* we just keep it from happening
|
||||
*/
|
||||
#undef CONFIG_PARAVIRT
|
||||
#ifdef CONFIG_X86_32
|
||||
#define _ASM_X86_DESC_H 1
|
||||
#endif
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/screen_info.h>
|
||||
#include <linux/elf.h>
|
||||
#include <linux/io.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/boot.h>
|
||||
#include <asm/bootparam.h>
|
||||
#include <asm/bootparam_utils.h>
|
||||
|
||||
#define BOOT_BOOT_H
|
||||
#include "../ctype.h"
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define memptr long
|
||||
#else
|
||||
#define memptr unsigned
|
||||
#endif
|
||||
|
||||
/* misc.c */
|
||||
extern memptr free_mem_ptr;
|
||||
extern memptr free_mem_end_ptr;
|
||||
extern struct boot_params *real_mode; /* Pointer to real-mode data */
|
||||
void __putstr(const char *s);
|
||||
#define error_putstr(__x) __putstr(__x)
|
||||
|
||||
#ifdef CONFIG_X86_VERBOSE_BOOTUP
|
||||
|
||||
#define debug_putstr(__x) __putstr(__x)
|
||||
|
||||
#else
|
||||
|
||||
static inline void debug_putstr(const char *s)
|
||||
{ }
|
||||
|
||||
#endif
|
||||
|
||||
#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE
|
||||
/* cmdline.c */
|
||||
int cmdline_find_option(const char *option, char *buffer, int bufsize);
|
||||
int cmdline_find_option_bool(const char *option);
|
||||
#endif
|
||||
|
||||
|
||||
#if CONFIG_RANDOMIZE_BASE
|
||||
/* aslr.c */
|
||||
unsigned char *choose_kernel_location(unsigned char *input,
|
||||
unsigned long input_size,
|
||||
unsigned char *output,
|
||||
unsigned long output_size);
|
||||
/* cpuflags.c */
|
||||
bool has_cpuflag(int flag);
|
||||
#else
|
||||
static inline
|
||||
unsigned char *choose_kernel_location(unsigned char *input,
|
||||
unsigned long input_size,
|
||||
unsigned char *output,
|
||||
unsigned long output_size)
|
||||
{
|
||||
return output;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_EARLY_PRINTK
|
||||
/* early_serial_console.c */
|
||||
extern int early_serial_base;
|
||||
void console_init(void);
|
||||
#else
|
||||
static const int early_serial_base;
|
||||
static inline void console_init(void)
|
||||
{ }
|
||||
#endif
|
||||
|
||||
#endif
|
||||
104
arch/x86/boot/compressed/mkpiggy.c
Normal file
104
arch/x86/boot/compressed/mkpiggy.c
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
/* ----------------------------------------------------------------------- *
|
||||
*
|
||||
* Copyright (C) 2009 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version
|
||||
* 2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*
|
||||
* H. Peter Anvin <hpa@linux.intel.com>
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Compute the desired load offset from a compressed program; outputs
|
||||
* a small assembly wrapper with the appropriate symbols defined.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <inttypes.h>
|
||||
#include <tools/le_byteshift.h>
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
uint32_t olen;
|
||||
long ilen;
|
||||
unsigned long offs;
|
||||
unsigned long run_size;
|
||||
FILE *f = NULL;
|
||||
int retval = 1;
|
||||
|
||||
if (argc < 3) {
|
||||
fprintf(stderr, "Usage: %s compressed_file run_size\n",
|
||||
argv[0]);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
/* Get the information for the compressed kernel image first */
|
||||
|
||||
f = fopen(argv[1], "r");
|
||||
if (!f) {
|
||||
perror(argv[1]);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
|
||||
if (fseek(f, -4L, SEEK_END)) {
|
||||
perror(argv[1]);
|
||||
}
|
||||
|
||||
if (fread(&olen, sizeof(olen), 1, f) != 1) {
|
||||
perror(argv[1]);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
ilen = ftell(f);
|
||||
olen = get_unaligned_le32(&olen);
|
||||
|
||||
/*
|
||||
* Now we have the input (compressed) and output (uncompressed)
|
||||
* sizes, compute the necessary decompression offset...
|
||||
*/
|
||||
|
||||
offs = (olen > ilen) ? olen - ilen : 0;
|
||||
offs += olen >> 12; /* Add 8 bytes for each 32K block */
|
||||
offs += 64*1024 + 128; /* Add 64K + 128 bytes slack */
|
||||
offs = (offs+4095) & ~4095; /* Round to a 4K boundary */
|
||||
run_size = atoi(argv[2]);
|
||||
|
||||
printf(".section \".rodata..compressed\",\"a\",@progbits\n");
|
||||
printf(".globl z_input_len\n");
|
||||
printf("z_input_len = %lu\n", ilen);
|
||||
printf(".globl z_output_len\n");
|
||||
printf("z_output_len = %lu\n", (unsigned long)olen);
|
||||
printf(".globl z_extract_offset\n");
|
||||
printf("z_extract_offset = 0x%lx\n", offs);
|
||||
/* z_extract_offset_negative allows simplification of head_32.S */
|
||||
printf(".globl z_extract_offset_negative\n");
|
||||
printf("z_extract_offset_negative = -0x%lx\n", offs);
|
||||
printf(".globl z_run_size\n");
|
||||
printf("z_run_size = %lu\n", run_size);
|
||||
|
||||
printf(".globl input_data, input_data_end\n");
|
||||
printf("input_data:\n");
|
||||
printf(".incbin \"%s\"\n", argv[1]);
|
||||
printf("input_data_end:\n");
|
||||
|
||||
retval = 0;
|
||||
bail:
|
||||
if (f)
|
||||
fclose(f);
|
||||
return retval;
|
||||
}
|
||||
41
arch/x86/boot/compressed/string.c
Normal file
41
arch/x86/boot/compressed/string.c
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
#include "../string.c"
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
void *memcpy(void *dest, const void *src, size_t n)
|
||||
{
|
||||
int d0, d1, d2;
|
||||
asm volatile(
|
||||
"rep ; movsl\n\t"
|
||||
"movl %4,%%ecx\n\t"
|
||||
"rep ; movsb\n\t"
|
||||
: "=&c" (d0), "=&D" (d1), "=&S" (d2)
|
||||
: "0" (n >> 2), "g" (n & 3), "1" (dest), "2" (src)
|
||||
: "memory");
|
||||
|
||||
return dest;
|
||||
}
|
||||
#else
|
||||
void *memcpy(void *dest, const void *src, size_t n)
|
||||
{
|
||||
long d0, d1, d2;
|
||||
asm volatile(
|
||||
"rep ; movsq\n\t"
|
||||
"movq %4,%%rcx\n\t"
|
||||
"rep ; movsb\n\t"
|
||||
: "=&c" (d0), "=&D" (d1), "=&S" (d2)
|
||||
: "0" (n >> 3), "g" (n & 7), "1" (dest), "2" (src)
|
||||
: "memory");
|
||||
|
||||
return dest;
|
||||
}
|
||||
#endif
|
||||
|
||||
void *memset(void *s, int c, size_t n)
|
||||
{
|
||||
int i;
|
||||
char *ss = s;
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
ss[i] = c;
|
||||
return s;
|
||||
}
|
||||
74
arch/x86/boot/compressed/vmlinux.lds.S
Normal file
74
arch/x86/boot/compressed/vmlinux.lds.S
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
#include <asm-generic/vmlinux.lds.h>
|
||||
|
||||
OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
|
||||
|
||||
#undef i386
|
||||
|
||||
#include <asm/cache.h>
|
||||
#include <asm/page_types.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
OUTPUT_ARCH(i386:x86-64)
|
||||
ENTRY(startup_64)
|
||||
#else
|
||||
OUTPUT_ARCH(i386)
|
||||
ENTRY(startup_32)
|
||||
#endif
|
||||
|
||||
SECTIONS
|
||||
{
|
||||
/* Be careful parts of head_64.S assume startup_32 is at
|
||||
* address 0.
|
||||
*/
|
||||
. = 0;
|
||||
.head.text : {
|
||||
_head = . ;
|
||||
HEAD_TEXT
|
||||
_ehead = . ;
|
||||
}
|
||||
.rodata..compressed : {
|
||||
*(.rodata..compressed)
|
||||
}
|
||||
.text : {
|
||||
_text = .; /* Text */
|
||||
*(.text)
|
||||
*(.text.*)
|
||||
_etext = . ;
|
||||
}
|
||||
.rodata : {
|
||||
_rodata = . ;
|
||||
*(.rodata) /* read-only data */
|
||||
*(.rodata.*)
|
||||
_erodata = . ;
|
||||
}
|
||||
.got : {
|
||||
_got = .;
|
||||
KEEP(*(.got.plt))
|
||||
KEEP(*(.got))
|
||||
_egot = .;
|
||||
}
|
||||
.data : {
|
||||
_data = . ;
|
||||
*(.data)
|
||||
*(.data.*)
|
||||
_edata = . ;
|
||||
}
|
||||
. = ALIGN(L1_CACHE_BYTES);
|
||||
.bss : {
|
||||
_bss = . ;
|
||||
*(.bss)
|
||||
*(.bss.*)
|
||||
*(COMMON)
|
||||
. = ALIGN(8); /* For convenience during zeroing */
|
||||
_ebss = .;
|
||||
}
|
||||
#ifdef CONFIG_X86_64
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
.pgtable : {
|
||||
_pgtable = . ;
|
||||
*(.pgtable)
|
||||
_epgtable = . ;
|
||||
}
|
||||
#endif
|
||||
_end = .;
|
||||
}
|
||||
87
arch/x86/boot/copy.S
Normal file
87
arch/x86/boot/copy.S
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
/* ----------------------------------------------------------------------- *
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
* Copyright 2007 rPath, Inc. - All Rights Reserved
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
/*
|
||||
* Memory copy routines
|
||||
*/
|
||||
|
||||
.code16
|
||||
.text
|
||||
|
||||
GLOBAL(memcpy)
|
||||
pushw %si
|
||||
pushw %di
|
||||
movw %ax, %di
|
||||
movw %dx, %si
|
||||
pushw %cx
|
||||
shrw $2, %cx
|
||||
rep; movsl
|
||||
popw %cx
|
||||
andw $3, %cx
|
||||
rep; movsb
|
||||
popw %di
|
||||
popw %si
|
||||
retl
|
||||
ENDPROC(memcpy)
|
||||
|
||||
GLOBAL(memset)
|
||||
pushw %di
|
||||
movw %ax, %di
|
||||
movzbl %dl, %eax
|
||||
imull $0x01010101,%eax
|
||||
pushw %cx
|
||||
shrw $2, %cx
|
||||
rep; stosl
|
||||
popw %cx
|
||||
andw $3, %cx
|
||||
rep; stosb
|
||||
popw %di
|
||||
retl
|
||||
ENDPROC(memset)
|
||||
|
||||
GLOBAL(copy_from_fs)
|
||||
pushw %ds
|
||||
pushw %fs
|
||||
popw %ds
|
||||
calll memcpy
|
||||
popw %ds
|
||||
retl
|
||||
ENDPROC(copy_from_fs)
|
||||
|
||||
GLOBAL(copy_to_fs)
|
||||
pushw %es
|
||||
pushw %fs
|
||||
popw %es
|
||||
calll memcpy
|
||||
popw %es
|
||||
retl
|
||||
ENDPROC(copy_to_fs)
|
||||
|
||||
#if 0 /* Not currently used, but can be enabled as needed */
|
||||
GLOBAL(copy_from_gs)
|
||||
pushw %ds
|
||||
pushw %gs
|
||||
popw %ds
|
||||
calll memcpy
|
||||
popw %ds
|
||||
retl
|
||||
ENDPROC(copy_from_gs)
|
||||
|
||||
GLOBAL(copy_to_gs)
|
||||
pushw %es
|
||||
pushw %gs
|
||||
popw %es
|
||||
calll memcpy
|
||||
popw %es
|
||||
retl
|
||||
ENDPROC(copy_to_gs)
|
||||
#endif
|
||||
99
arch/x86/boot/cpu.c
Normal file
99
arch/x86/boot/cpu.c
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
/* -*- linux-c -*- ------------------------------------------------------- *
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
* Copyright 2007-2008 rPath, Inc. - All Rights Reserved
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* arch/x86/boot/cpu.c
|
||||
*
|
||||
* Check for obligatory CPU features and abort if the features are not
|
||||
* present.
|
||||
*/
|
||||
|
||||
#include "boot.h"
|
||||
#ifdef CONFIG_X86_FEATURE_NAMES
|
||||
#include "cpustr.h"
|
||||
#endif
|
||||
|
||||
static char *cpu_name(int level)
|
||||
{
|
||||
static char buf[6];
|
||||
|
||||
if (level == 64) {
|
||||
return "x86-64";
|
||||
} else {
|
||||
if (level == 15)
|
||||
level = 6;
|
||||
sprintf(buf, "i%d86", level);
|
||||
return buf;
|
||||
}
|
||||
}
|
||||
|
||||
static void show_cap_strs(u32 *err_flags)
|
||||
{
|
||||
int i, j;
|
||||
#ifdef CONFIG_X86_FEATURE_NAMES
|
||||
const unsigned char *msg_strs = (const unsigned char *)x86_cap_strs;
|
||||
for (i = 0; i < NCAPINTS; i++) {
|
||||
u32 e = err_flags[i];
|
||||
for (j = 0; j < 32; j++) {
|
||||
if (msg_strs[0] < i ||
|
||||
(msg_strs[0] == i && msg_strs[1] < j)) {
|
||||
/* Skip to the next string */
|
||||
msg_strs += 2;
|
||||
while (*msg_strs++)
|
||||
;
|
||||
}
|
||||
if (e & 1) {
|
||||
if (msg_strs[0] == i &&
|
||||
msg_strs[1] == j &&
|
||||
msg_strs[2])
|
||||
printf("%s ", msg_strs+2);
|
||||
else
|
||||
printf("%d:%d ", i, j);
|
||||
}
|
||||
e >>= 1;
|
||||
}
|
||||
}
|
||||
#else
|
||||
for (i = 0; i < NCAPINTS; i++) {
|
||||
u32 e = err_flags[i];
|
||||
for (j = 0; j < 32; j++) {
|
||||
if (e & 1)
|
||||
printf("%d:%d ", i, j);
|
||||
e >>= 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
int validate_cpu(void)
|
||||
{
|
||||
u32 *err_flags;
|
||||
int cpu_level, req_level;
|
||||
|
||||
check_cpu(&cpu_level, &req_level, &err_flags);
|
||||
|
||||
if (cpu_level < req_level) {
|
||||
printf("This kernel requires an %s CPU, ",
|
||||
cpu_name(req_level));
|
||||
printf("but only detected an %s CPU.\n",
|
||||
cpu_name(cpu_level));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (err_flags) {
|
||||
puts("This kernel requires the following features "
|
||||
"not present on the CPU:\n");
|
||||
show_cap_strs(err_flags);
|
||||
putchar('\n');
|
||||
return -1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
187
arch/x86/boot/cpucheck.c
Normal file
187
arch/x86/boot/cpucheck.c
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
/* -*- linux-c -*- ------------------------------------------------------- *
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
* Copyright 2007 rPath, Inc. - All Rights Reserved
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Check for obligatory CPU features and abort if the features are not
|
||||
* present. This code should be compilable as 16-, 32- or 64-bit
|
||||
* code, so be very careful with types and inline assembly.
|
||||
*
|
||||
* This code should not contain any messages; that requires an
|
||||
* additional wrapper.
|
||||
*
|
||||
* As written, this code is not safe for inclusion into the kernel
|
||||
* proper (after FPU initialization, in particular).
|
||||
*/
|
||||
|
||||
#ifdef _SETUP
|
||||
# include "boot.h"
|
||||
#endif
|
||||
#include <linux/types.h>
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/required-features.h>
|
||||
#include <asm/msr-index.h>
|
||||
#include "string.h"
|
||||
|
||||
static u32 err_flags[NCAPINTS];
|
||||
|
||||
static const int req_level = CONFIG_X86_MINIMUM_CPU_FAMILY;
|
||||
|
||||
static const u32 req_flags[NCAPINTS] =
|
||||
{
|
||||
REQUIRED_MASK0,
|
||||
REQUIRED_MASK1,
|
||||
0, /* REQUIRED_MASK2 not implemented in this file */
|
||||
0, /* REQUIRED_MASK3 not implemented in this file */
|
||||
REQUIRED_MASK4,
|
||||
0, /* REQUIRED_MASK5 not implemented in this file */
|
||||
REQUIRED_MASK6,
|
||||
0, /* REQUIRED_MASK7 not implemented in this file */
|
||||
};
|
||||
|
||||
#define A32(a, b, c, d) (((d) << 24)+((c) << 16)+((b) << 8)+(a))
|
||||
|
||||
static int is_amd(void)
|
||||
{
|
||||
return cpu_vendor[0] == A32('A', 'u', 't', 'h') &&
|
||||
cpu_vendor[1] == A32('e', 'n', 't', 'i') &&
|
||||
cpu_vendor[2] == A32('c', 'A', 'M', 'D');
|
||||
}
|
||||
|
||||
static int is_centaur(void)
|
||||
{
|
||||
return cpu_vendor[0] == A32('C', 'e', 'n', 't') &&
|
||||
cpu_vendor[1] == A32('a', 'u', 'r', 'H') &&
|
||||
cpu_vendor[2] == A32('a', 'u', 'l', 's');
|
||||
}
|
||||
|
||||
static int is_transmeta(void)
|
||||
{
|
||||
return cpu_vendor[0] == A32('G', 'e', 'n', 'u') &&
|
||||
cpu_vendor[1] == A32('i', 'n', 'e', 'T') &&
|
||||
cpu_vendor[2] == A32('M', 'x', '8', '6');
|
||||
}
|
||||
|
||||
static int is_intel(void)
|
||||
{
|
||||
return cpu_vendor[0] == A32('G', 'e', 'n', 'u') &&
|
||||
cpu_vendor[1] == A32('i', 'n', 'e', 'I') &&
|
||||
cpu_vendor[2] == A32('n', 't', 'e', 'l');
|
||||
}
|
||||
|
||||
/* Returns a bitmask of which words we have error bits in */
|
||||
static int check_cpuflags(void)
|
||||
{
|
||||
u32 err;
|
||||
int i;
|
||||
|
||||
err = 0;
|
||||
for (i = 0; i < NCAPINTS; i++) {
|
||||
err_flags[i] = req_flags[i] & ~cpu.flags[i];
|
||||
if (err_flags[i])
|
||||
err |= 1 << i;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns -1 on error.
|
||||
*
|
||||
* *cpu_level is set to the current CPU level; *req_level to the required
|
||||
* level. x86-64 is considered level 64 for this purpose.
|
||||
*
|
||||
* *err_flags_ptr is set to the flags error array if there are flags missing.
|
||||
*/
|
||||
int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
|
||||
{
|
||||
int err;
|
||||
|
||||
memset(&cpu.flags, 0, sizeof cpu.flags);
|
||||
cpu.level = 3;
|
||||
|
||||
if (has_eflag(X86_EFLAGS_AC))
|
||||
cpu.level = 4;
|
||||
|
||||
get_cpuflags();
|
||||
err = check_cpuflags();
|
||||
|
||||
if (test_bit(X86_FEATURE_LM, cpu.flags))
|
||||
cpu.level = 64;
|
||||
|
||||
if (err == 0x01 &&
|
||||
!(err_flags[0] &
|
||||
~((1 << X86_FEATURE_XMM)|(1 << X86_FEATURE_XMM2))) &&
|
||||
is_amd()) {
|
||||
/* If this is an AMD and we're only missing SSE+SSE2, try to
|
||||
turn them on */
|
||||
|
||||
u32 ecx = MSR_K7_HWCR;
|
||||
u32 eax, edx;
|
||||
|
||||
asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
|
||||
eax &= ~(1 << 15);
|
||||
asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
|
||||
|
||||
get_cpuflags(); /* Make sure it really did something */
|
||||
err = check_cpuflags();
|
||||
} else if (err == 0x01 &&
|
||||
!(err_flags[0] & ~(1 << X86_FEATURE_CX8)) &&
|
||||
is_centaur() && cpu.model >= 6) {
|
||||
/* If this is a VIA C3, we might have to enable CX8
|
||||
explicitly */
|
||||
|
||||
u32 ecx = MSR_VIA_FCR;
|
||||
u32 eax, edx;
|
||||
|
||||
asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
|
||||
eax |= (1<<1)|(1<<7);
|
||||
asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
|
||||
|
||||
set_bit(X86_FEATURE_CX8, cpu.flags);
|
||||
err = check_cpuflags();
|
||||
} else if (err == 0x01 && is_transmeta()) {
|
||||
/* Transmeta might have masked feature bits in word 0 */
|
||||
|
||||
u32 ecx = 0x80860004;
|
||||
u32 eax, edx;
|
||||
u32 level = 1;
|
||||
|
||||
asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
|
||||
asm("wrmsr" : : "a" (~0), "d" (edx), "c" (ecx));
|
||||
asm("cpuid"
|
||||
: "+a" (level), "=d" (cpu.flags[0])
|
||||
: : "ecx", "ebx");
|
||||
asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
|
||||
|
||||
err = check_cpuflags();
|
||||
} else if (err == 0x01 &&
|
||||
!(err_flags[0] & ~(1 << X86_FEATURE_PAE)) &&
|
||||
is_intel() && cpu.level == 6 &&
|
||||
(cpu.model == 9 || cpu.model == 13)) {
|
||||
/* PAE is disabled on this Pentium M but can be forced */
|
||||
if (cmdline_find_option_bool("forcepae")) {
|
||||
puts("WARNING: Forcing PAE in CPU flags\n");
|
||||
set_bit(X86_FEATURE_PAE, cpu.flags);
|
||||
err = check_cpuflags();
|
||||
}
|
||||
else {
|
||||
puts("WARNING: PAE disabled. Use parameter 'forcepae' to enable at your own risk!\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (err_flags_ptr)
|
||||
*err_flags_ptr = err ? err_flags : NULL;
|
||||
if (cpu_level_ptr)
|
||||
*cpu_level_ptr = cpu.level;
|
||||
if (req_level_ptr)
|
||||
*req_level_ptr = req_level;
|
||||
|
||||
return (cpu.level < req_level || err) ? -1 : 0;
|
||||
}
|
||||
119
arch/x86/boot/cpuflags.c
Normal file
119
arch/x86/boot/cpuflags.c
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
#include <linux/types.h>
|
||||
#include "bitops.h"
|
||||
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/required-features.h>
|
||||
#include <asm/msr-index.h>
|
||||
#include "cpuflags.h"
|
||||
|
||||
struct cpu_features cpu;
|
||||
u32 cpu_vendor[3];
|
||||
|
||||
static bool loaded_flags;
|
||||
|
||||
static int has_fpu(void)
|
||||
{
|
||||
u16 fcw = -1, fsw = -1;
|
||||
unsigned long cr0;
|
||||
|
||||
asm volatile("mov %%cr0,%0" : "=r" (cr0));
|
||||
if (cr0 & (X86_CR0_EM|X86_CR0_TS)) {
|
||||
cr0 &= ~(X86_CR0_EM|X86_CR0_TS);
|
||||
asm volatile("mov %0,%%cr0" : : "r" (cr0));
|
||||
}
|
||||
|
||||
asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
|
||||
: "+m" (fsw), "+m" (fcw));
|
||||
|
||||
return fsw == 0 && (fcw & 0x103f) == 0x003f;
|
||||
}
|
||||
|
||||
/*
|
||||
* For building the 16-bit code we want to explicitly specify 32-bit
|
||||
* push/pop operations, rather than just saying 'pushf' or 'popf' and
|
||||
* letting the compiler choose. But this is also included from the
|
||||
* compressed/ directory where it may be 64-bit code, and thus needs
|
||||
* to be 'pushfq' or 'popfq' in that case.
|
||||
*/
|
||||
#ifdef __x86_64__
|
||||
#define PUSHF "pushfq"
|
||||
#define POPF "popfq"
|
||||
#else
|
||||
#define PUSHF "pushfl"
|
||||
#define POPF "popfl"
|
||||
#endif
|
||||
|
||||
int has_eflag(unsigned long mask)
|
||||
{
|
||||
unsigned long f0, f1;
|
||||
|
||||
asm volatile(PUSHF " \n\t"
|
||||
PUSHF " \n\t"
|
||||
"pop %0 \n\t"
|
||||
"mov %0,%1 \n\t"
|
||||
"xor %2,%1 \n\t"
|
||||
"push %1 \n\t"
|
||||
POPF " \n\t"
|
||||
PUSHF " \n\t"
|
||||
"pop %1 \n\t"
|
||||
POPF
|
||||
: "=&r" (f0), "=&r" (f1)
|
||||
: "ri" (mask));
|
||||
|
||||
return !!((f0^f1) & mask);
|
||||
}
|
||||
|
||||
/* Handle x86_32 PIC using ebx. */
|
||||
#if defined(__i386__) && defined(__PIC__)
|
||||
# define EBX_REG "=r"
|
||||
#else
|
||||
# define EBX_REG "=b"
|
||||
#endif
|
||||
|
||||
static inline void cpuid(u32 id, u32 *a, u32 *b, u32 *c, u32 *d)
|
||||
{
|
||||
asm volatile(".ifnc %%ebx,%3 ; movl %%ebx,%3 ; .endif \n\t"
|
||||
"cpuid \n\t"
|
||||
".ifnc %%ebx,%3 ; xchgl %%ebx,%3 ; .endif \n\t"
|
||||
: "=a" (*a), "=c" (*c), "=d" (*d), EBX_REG (*b)
|
||||
: "a" (id)
|
||||
);
|
||||
}
|
||||
|
||||
void get_cpuflags(void)
|
||||
{
|
||||
u32 max_intel_level, max_amd_level;
|
||||
u32 tfms;
|
||||
u32 ignored;
|
||||
|
||||
if (loaded_flags)
|
||||
return;
|
||||
loaded_flags = true;
|
||||
|
||||
if (has_fpu())
|
||||
set_bit(X86_FEATURE_FPU, cpu.flags);
|
||||
|
||||
if (has_eflag(X86_EFLAGS_ID)) {
|
||||
cpuid(0x0, &max_intel_level, &cpu_vendor[0], &cpu_vendor[2],
|
||||
&cpu_vendor[1]);
|
||||
|
||||
if (max_intel_level >= 0x00000001 &&
|
||||
max_intel_level <= 0x0000ffff) {
|
||||
cpuid(0x1, &tfms, &ignored, &cpu.flags[4],
|
||||
&cpu.flags[0]);
|
||||
cpu.level = (tfms >> 8) & 15;
|
||||
cpu.model = (tfms >> 4) & 15;
|
||||
if (cpu.level >= 6)
|
||||
cpu.model += ((tfms >> 16) & 0xf) << 4;
|
||||
}
|
||||
|
||||
cpuid(0x80000000, &max_amd_level, &ignored, &ignored,
|
||||
&ignored);
|
||||
|
||||
if (max_amd_level >= 0x80000001 &&
|
||||
max_amd_level <= 0x8000ffff) {
|
||||
cpuid(0x80000001, &ignored, &ignored, &cpu.flags[6],
|
||||
&cpu.flags[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
19
arch/x86/boot/cpuflags.h
Normal file
19
arch/x86/boot/cpuflags.h
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
#ifndef BOOT_CPUFLAGS_H
|
||||
#define BOOT_CPUFLAGS_H
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/processor-flags.h>
|
||||
|
||||
struct cpu_features {
|
||||
int level; /* Family, or 64 for x86-64 */
|
||||
int model;
|
||||
u32 flags[NCAPINTS];
|
||||
};
|
||||
|
||||
extern struct cpu_features cpu;
|
||||
extern u32 cpu_vendor[3];
|
||||
|
||||
int has_eflag(unsigned long mask);
|
||||
void get_cpuflags(void);
|
||||
|
||||
#endif
|
||||
21
arch/x86/boot/ctype.h
Normal file
21
arch/x86/boot/ctype.h
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
#ifndef BOOT_ISDIGIT_H
|
||||
|
||||
#define BOOT_ISDIGIT_H
|
||||
|
||||
static inline int isdigit(int ch)
|
||||
{
|
||||
return (ch >= '0') && (ch <= '9');
|
||||
}
|
||||
|
||||
static inline int isxdigit(int ch)
|
||||
{
|
||||
if (isdigit(ch))
|
||||
return true;
|
||||
|
||||
if ((ch >= 'a') && (ch <= 'f'))
|
||||
return true;
|
||||
|
||||
return (ch >= 'A') && (ch <= 'F');
|
||||
}
|
||||
|
||||
#endif
|
||||
151
arch/x86/boot/early_serial_console.c
Normal file
151
arch/x86/boot/early_serial_console.c
Normal file
|
|
@ -0,0 +1,151 @@
|
|||
#include "boot.h"
|
||||
|
||||
#define DEFAULT_SERIAL_PORT 0x3f8 /* ttyS0 */
|
||||
|
||||
#define XMTRDY 0x20
|
||||
|
||||
#define DLAB 0x80
|
||||
|
||||
#define TXR 0 /* Transmit register (WRITE) */
|
||||
#define RXR 0 /* Receive register (READ) */
|
||||
#define IER 1 /* Interrupt Enable */
|
||||
#define IIR 2 /* Interrupt ID */
|
||||
#define FCR 2 /* FIFO control */
|
||||
#define LCR 3 /* Line control */
|
||||
#define MCR 4 /* Modem control */
|
||||
#define LSR 5 /* Line Status */
|
||||
#define MSR 6 /* Modem Status */
|
||||
#define DLL 0 /* Divisor Latch Low */
|
||||
#define DLH 1 /* Divisor latch High */
|
||||
|
||||
#define DEFAULT_BAUD 9600
|
||||
|
||||
static void early_serial_init(int port, int baud)
|
||||
{
|
||||
unsigned char c;
|
||||
unsigned divisor;
|
||||
|
||||
outb(0x3, port + LCR); /* 8n1 */
|
||||
outb(0, port + IER); /* no interrupt */
|
||||
outb(0, port + FCR); /* no fifo */
|
||||
outb(0x3, port + MCR); /* DTR + RTS */
|
||||
|
||||
divisor = 115200 / baud;
|
||||
c = inb(port + LCR);
|
||||
outb(c | DLAB, port + LCR);
|
||||
outb(divisor & 0xff, port + DLL);
|
||||
outb((divisor >> 8) & 0xff, port + DLH);
|
||||
outb(c & ~DLAB, port + LCR);
|
||||
|
||||
early_serial_base = port;
|
||||
}
|
||||
|
||||
static void parse_earlyprintk(void)
|
||||
{
|
||||
int baud = DEFAULT_BAUD;
|
||||
char arg[32];
|
||||
int pos = 0;
|
||||
int port = 0;
|
||||
|
||||
if (cmdline_find_option("earlyprintk", arg, sizeof arg) > 0) {
|
||||
char *e;
|
||||
|
||||
if (!strncmp(arg, "serial", 6)) {
|
||||
port = DEFAULT_SERIAL_PORT;
|
||||
pos += 6;
|
||||
}
|
||||
|
||||
if (arg[pos] == ',')
|
||||
pos++;
|
||||
|
||||
/*
|
||||
* make sure we have
|
||||
* "serial,0x3f8,115200"
|
||||
* "serial,ttyS0,115200"
|
||||
* "ttyS0,115200"
|
||||
*/
|
||||
if (pos == 7 && !strncmp(arg + pos, "0x", 2)) {
|
||||
port = simple_strtoull(arg + pos, &e, 16);
|
||||
if (port == 0 || arg + pos == e)
|
||||
port = DEFAULT_SERIAL_PORT;
|
||||
else
|
||||
pos = e - arg;
|
||||
} else if (!strncmp(arg + pos, "ttyS", 4)) {
|
||||
static const int bases[] = { 0x3f8, 0x2f8 };
|
||||
int idx = 0;
|
||||
|
||||
if (!strncmp(arg + pos, "ttyS", 4))
|
||||
pos += 4;
|
||||
|
||||
if (arg[pos++] == '1')
|
||||
idx = 1;
|
||||
|
||||
port = bases[idx];
|
||||
}
|
||||
|
||||
if (arg[pos] == ',')
|
||||
pos++;
|
||||
|
||||
baud = simple_strtoull(arg + pos, &e, 0);
|
||||
if (baud == 0 || arg + pos == e)
|
||||
baud = DEFAULT_BAUD;
|
||||
}
|
||||
|
||||
if (port)
|
||||
early_serial_init(port, baud);
|
||||
}
|
||||
|
||||
#define BASE_BAUD (1843200/16)
|
||||
static unsigned int probe_baud(int port)
|
||||
{
|
||||
unsigned char lcr, dll, dlh;
|
||||
unsigned int quot;
|
||||
|
||||
lcr = inb(port + LCR);
|
||||
outb(lcr | DLAB, port + LCR);
|
||||
dll = inb(port + DLL);
|
||||
dlh = inb(port + DLH);
|
||||
outb(lcr, port + LCR);
|
||||
quot = (dlh << 8) | dll;
|
||||
|
||||
return BASE_BAUD / quot;
|
||||
}
|
||||
|
||||
static void parse_console_uart8250(void)
|
||||
{
|
||||
char optstr[64], *options;
|
||||
int baud = DEFAULT_BAUD;
|
||||
int port = 0;
|
||||
|
||||
/*
|
||||
* console=uart8250,io,0x3f8,115200n8
|
||||
* need to make sure it is last one console !
|
||||
*/
|
||||
if (cmdline_find_option("console", optstr, sizeof optstr) <= 0)
|
||||
return;
|
||||
|
||||
options = optstr;
|
||||
|
||||
if (!strncmp(options, "uart8250,io,", 12))
|
||||
port = simple_strtoull(options + 12, &options, 0);
|
||||
else if (!strncmp(options, "uart,io,", 8))
|
||||
port = simple_strtoull(options + 8, &options, 0);
|
||||
else
|
||||
return;
|
||||
|
||||
if (options && (options[0] == ','))
|
||||
baud = simple_strtoull(options + 1, &options, 0);
|
||||
else
|
||||
baud = probe_baud(port);
|
||||
|
||||
if (port)
|
||||
early_serial_init(port, baud);
|
||||
}
|
||||
|
||||
void console_init(void)
|
||||
{
|
||||
parse_earlyprintk();
|
||||
|
||||
if (!early_serial_base)
|
||||
parse_console_uart8250();
|
||||
}
|
||||
182
arch/x86/boot/edd.c
Normal file
182
arch/x86/boot/edd.c
Normal file
|
|
@ -0,0 +1,182 @@
|
|||
/* -*- linux-c -*- ------------------------------------------------------- *
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
* Copyright 2007 rPath, Inc. - All Rights Reserved
|
||||
* Copyright 2009 Intel Corporation; author H. Peter Anvin
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Get EDD BIOS disk information
|
||||
*/
|
||||
|
||||
#include "boot.h"
|
||||
#include <linux/edd.h>
|
||||
#include "string.h"
|
||||
|
||||
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
|
||||
|
||||
/*
|
||||
* Read the MBR (first sector) from a specific device.
|
||||
*/
|
||||
static int read_mbr(u8 devno, void *buf)
|
||||
{
|
||||
struct biosregs ireg, oreg;
|
||||
|
||||
initregs(&ireg);
|
||||
ireg.ax = 0x0201; /* Legacy Read, one sector */
|
||||
ireg.cx = 0x0001; /* Sector 0-0-1 */
|
||||
ireg.dl = devno;
|
||||
ireg.bx = (size_t)buf;
|
||||
|
||||
intcall(0x13, &ireg, &oreg);
|
||||
|
||||
return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */
|
||||
}
|
||||
|
||||
static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig)
|
||||
{
|
||||
int sector_size;
|
||||
char *mbrbuf_ptr, *mbrbuf_end;
|
||||
u32 buf_base, mbr_base;
|
||||
extern char _end[];
|
||||
u16 mbr_magic;
|
||||
|
||||
sector_size = ei->params.bytes_per_sector;
|
||||
if (!sector_size)
|
||||
sector_size = 512; /* Best available guess */
|
||||
|
||||
/* Produce a naturally aligned buffer on the heap */
|
||||
buf_base = (ds() << 4) + (u32)&_end;
|
||||
mbr_base = (buf_base+sector_size-1) & ~(sector_size-1);
|
||||
mbrbuf_ptr = _end + (mbr_base-buf_base);
|
||||
mbrbuf_end = mbrbuf_ptr + sector_size;
|
||||
|
||||
/* Make sure we actually have space on the heap... */
|
||||
if (!(boot_params.hdr.loadflags & CAN_USE_HEAP))
|
||||
return -1;
|
||||
if (mbrbuf_end > (char *)(size_t)boot_params.hdr.heap_end_ptr)
|
||||
return -1;
|
||||
|
||||
memset(mbrbuf_ptr, 0, sector_size);
|
||||
if (read_mbr(devno, mbrbuf_ptr))
|
||||
return -1;
|
||||
|
||||
*mbrsig = *(u32 *)&mbrbuf_ptr[EDD_MBR_SIG_OFFSET];
|
||||
mbr_magic = *(u16 *)&mbrbuf_ptr[510];
|
||||
|
||||
/* check for valid MBR magic */
|
||||
return mbr_magic == 0xAA55 ? 0 : -1;
|
||||
}
|
||||
|
||||
static int get_edd_info(u8 devno, struct edd_info *ei)
|
||||
{
|
||||
struct biosregs ireg, oreg;
|
||||
|
||||
memset(ei, 0, sizeof *ei);
|
||||
|
||||
/* Check Extensions Present */
|
||||
|
||||
initregs(&ireg);
|
||||
ireg.ah = 0x41;
|
||||
ireg.bx = EDDMAGIC1;
|
||||
ireg.dl = devno;
|
||||
intcall(0x13, &ireg, &oreg);
|
||||
|
||||
if (oreg.eflags & X86_EFLAGS_CF)
|
||||
return -1; /* No extended information */
|
||||
|
||||
if (oreg.bx != EDDMAGIC2)
|
||||
return -1;
|
||||
|
||||
ei->device = devno;
|
||||
ei->version = oreg.ah; /* EDD version number */
|
||||
ei->interface_support = oreg.cx; /* EDD functionality subsets */
|
||||
|
||||
/* Extended Get Device Parameters */
|
||||
|
||||
ei->params.length = sizeof(ei->params);
|
||||
ireg.ah = 0x48;
|
||||
ireg.si = (size_t)&ei->params;
|
||||
intcall(0x13, &ireg, &oreg);
|
||||
|
||||
/* Get legacy CHS parameters */
|
||||
|
||||
/* Ralf Brown recommends setting ES:DI to 0:0 */
|
||||
ireg.ah = 0x08;
|
||||
ireg.es = 0;
|
||||
intcall(0x13, &ireg, &oreg);
|
||||
|
||||
if (!(oreg.eflags & X86_EFLAGS_CF)) {
|
||||
ei->legacy_max_cylinder = oreg.ch + ((oreg.cl & 0xc0) << 2);
|
||||
ei->legacy_max_head = oreg.dh;
|
||||
ei->legacy_sectors_per_track = oreg.cl & 0x3f;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void query_edd(void)
|
||||
{
|
||||
char eddarg[8];
|
||||
int do_mbr = 1;
|
||||
#ifdef CONFIG_EDD_OFF
|
||||
int do_edd = 0;
|
||||
#else
|
||||
int do_edd = 1;
|
||||
#endif
|
||||
int be_quiet;
|
||||
int devno;
|
||||
struct edd_info ei, *edp;
|
||||
u32 *mbrptr;
|
||||
|
||||
if (cmdline_find_option("edd", eddarg, sizeof eddarg) > 0) {
|
||||
if (!strcmp(eddarg, "skipmbr") || !strcmp(eddarg, "skip")) {
|
||||
do_edd = 1;
|
||||
do_mbr = 0;
|
||||
}
|
||||
else if (!strcmp(eddarg, "off"))
|
||||
do_edd = 0;
|
||||
else if (!strcmp(eddarg, "on"))
|
||||
do_edd = 1;
|
||||
}
|
||||
|
||||
be_quiet = cmdline_find_option_bool("quiet");
|
||||
|
||||
edp = boot_params.eddbuf;
|
||||
mbrptr = boot_params.edd_mbr_sig_buffer;
|
||||
|
||||
if (!do_edd)
|
||||
return;
|
||||
|
||||
/* Bugs in OnBoard or AddOnCards Bios may hang the EDD probe,
|
||||
* so give a hint if this happens.
|
||||
*/
|
||||
|
||||
if (!be_quiet)
|
||||
printf("Probing EDD (edd=off to disable)... ");
|
||||
|
||||
for (devno = 0x80; devno < 0x80+EDD_MBR_SIG_MAX; devno++) {
|
||||
/*
|
||||
* Scan the BIOS-supported hard disks and query EDD
|
||||
* information...
|
||||
*/
|
||||
if (!get_edd_info(devno, &ei)
|
||||
&& boot_params.eddbuf_entries < EDDMAXNR) {
|
||||
memcpy(edp, &ei, sizeof ei);
|
||||
edp++;
|
||||
boot_params.eddbuf_entries++;
|
||||
}
|
||||
|
||||
if (do_mbr && !read_mbr_sig(devno, &ei, mbrptr++))
|
||||
boot_params.edd_mbr_sig_buf_entries = devno-0x80+1;
|
||||
}
|
||||
|
||||
if (!be_quiet)
|
||||
printf("ok\n");
|
||||
}
|
||||
|
||||
#endif
|
||||
528
arch/x86/boot/header.S
Normal file
528
arch/x86/boot/header.S
Normal file
|
|
@ -0,0 +1,528 @@
|
|||
/*
|
||||
* header.S
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
*
|
||||
* Based on bootsect.S and setup.S
|
||||
* modified by more people than can be counted
|
||||
*
|
||||
* Rewritten as a common file by H. Peter Anvin (Apr 2007)
|
||||
*
|
||||
* BIG FAT NOTE: We're in real mode using 64k segments. Therefore segment
|
||||
* addresses must be multiplied by 16 to obtain their respective linear
|
||||
* addresses. To avoid confusion, linear addresses are written using leading
|
||||
* hex while segment addresses are written as segment:offset.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <asm/segment.h>
|
||||
#include <generated/utsrelease.h>
|
||||
#include <asm/boot.h>
|
||||
#include <asm/e820.h>
|
||||
#include <asm/page_types.h>
|
||||
#include <asm/setup.h>
|
||||
#include <asm/bootparam.h>
|
||||
#include "boot.h"
|
||||
#include "voffset.h"
|
||||
#include "zoffset.h"
|
||||
|
||||
BOOTSEG = 0x07C0 /* original address of boot-sector */
|
||||
SYSSEG = 0x1000 /* historical load address >> 4 */
|
||||
|
||||
#ifndef SVGA_MODE
|
||||
#define SVGA_MODE ASK_VGA
|
||||
#endif
|
||||
|
||||
#ifndef ROOT_RDONLY
|
||||
#define ROOT_RDONLY 1
|
||||
#endif
|
||||
|
||||
.code16
|
||||
.section ".bstext", "ax"
|
||||
|
||||
.global bootsect_start
|
||||
bootsect_start:
|
||||
#ifdef CONFIG_EFI_STUB
|
||||
# "MZ", MS-DOS header
|
||||
.byte 0x4d
|
||||
.byte 0x5a
|
||||
#endif
|
||||
|
||||
# Normalize the start address
|
||||
ljmp $BOOTSEG, $start2
|
||||
|
||||
start2:
|
||||
movw %cs, %ax
|
||||
movw %ax, %ds
|
||||
movw %ax, %es
|
||||
movw %ax, %ss
|
||||
xorw %sp, %sp
|
||||
sti
|
||||
cld
|
||||
|
||||
movw $bugger_off_msg, %si
|
||||
|
||||
msg_loop:
|
||||
lodsb
|
||||
andb %al, %al
|
||||
jz bs_die
|
||||
movb $0xe, %ah
|
||||
movw $7, %bx
|
||||
int $0x10
|
||||
jmp msg_loop
|
||||
|
||||
bs_die:
|
||||
# Allow the user to press a key, then reboot
|
||||
xorw %ax, %ax
|
||||
int $0x16
|
||||
int $0x19
|
||||
|
||||
# int 0x19 should never return. In case it does anyway,
|
||||
# invoke the BIOS reset code...
|
||||
ljmp $0xf000,$0xfff0
|
||||
|
||||
#ifdef CONFIG_EFI_STUB
|
||||
.org 0x3c
|
||||
#
|
||||
# Offset to the PE header.
|
||||
#
|
||||
.long pe_header
|
||||
#endif /* CONFIG_EFI_STUB */
|
||||
|
||||
.section ".bsdata", "a"
|
||||
bugger_off_msg:
|
||||
.ascii "Use a boot loader.\r\n"
|
||||
.ascii "\n"
|
||||
.ascii "Remove disk and press any key to reboot...\r\n"
|
||||
.byte 0
|
||||
|
||||
#ifdef CONFIG_EFI_STUB
|
||||
pe_header:
|
||||
.ascii "PE"
|
||||
.word 0
|
||||
|
||||
coff_header:
|
||||
#ifdef CONFIG_X86_32
|
||||
.word 0x14c # i386
|
||||
#else
|
||||
.word 0x8664 # x86-64
|
||||
#endif
|
||||
.word 4 # nr_sections
|
||||
.long 0 # TimeDateStamp
|
||||
.long 0 # PointerToSymbolTable
|
||||
.long 1 # NumberOfSymbols
|
||||
.word section_table - optional_header # SizeOfOptionalHeader
|
||||
#ifdef CONFIG_X86_32
|
||||
.word 0x306 # Characteristics.
|
||||
# IMAGE_FILE_32BIT_MACHINE |
|
||||
# IMAGE_FILE_DEBUG_STRIPPED |
|
||||
# IMAGE_FILE_EXECUTABLE_IMAGE |
|
||||
# IMAGE_FILE_LINE_NUMS_STRIPPED
|
||||
#else
|
||||
.word 0x206 # Characteristics
|
||||
# IMAGE_FILE_DEBUG_STRIPPED |
|
||||
# IMAGE_FILE_EXECUTABLE_IMAGE |
|
||||
# IMAGE_FILE_LINE_NUMS_STRIPPED
|
||||
#endif
|
||||
|
||||
optional_header:
|
||||
#ifdef CONFIG_X86_32
|
||||
.word 0x10b # PE32 format
|
||||
#else
|
||||
.word 0x20b # PE32+ format
|
||||
#endif
|
||||
.byte 0x02 # MajorLinkerVersion
|
||||
.byte 0x14 # MinorLinkerVersion
|
||||
|
||||
# Filled in by build.c
|
||||
.long 0 # SizeOfCode
|
||||
|
||||
.long 0 # SizeOfInitializedData
|
||||
.long 0 # SizeOfUninitializedData
|
||||
|
||||
# Filled in by build.c
|
||||
.long 0x0000 # AddressOfEntryPoint
|
||||
|
||||
.long 0x0200 # BaseOfCode
|
||||
#ifdef CONFIG_X86_32
|
||||
.long 0 # data
|
||||
#endif
|
||||
|
||||
extra_header_fields:
|
||||
#ifdef CONFIG_X86_32
|
||||
.long 0 # ImageBase
|
||||
#else
|
||||
.quad 0 # ImageBase
|
||||
#endif
|
||||
.long CONFIG_PHYSICAL_ALIGN # SectionAlignment
|
||||
.long 0x20 # FileAlignment
|
||||
.word 0 # MajorOperatingSystemVersion
|
||||
.word 0 # MinorOperatingSystemVersion
|
||||
.word 0 # MajorImageVersion
|
||||
.word 0 # MinorImageVersion
|
||||
.word 0 # MajorSubsystemVersion
|
||||
.word 0 # MinorSubsystemVersion
|
||||
.long 0 # Win32VersionValue
|
||||
|
||||
#
|
||||
# The size of the bzImage is written in tools/build.c
|
||||
#
|
||||
.long 0 # SizeOfImage
|
||||
|
||||
.long 0x200 # SizeOfHeaders
|
||||
.long 0 # CheckSum
|
||||
.word 0xa # Subsystem (EFI application)
|
||||
.word 0 # DllCharacteristics
|
||||
#ifdef CONFIG_X86_32
|
||||
.long 0 # SizeOfStackReserve
|
||||
.long 0 # SizeOfStackCommit
|
||||
.long 0 # SizeOfHeapReserve
|
||||
.long 0 # SizeOfHeapCommit
|
||||
#else
|
||||
.quad 0 # SizeOfStackReserve
|
||||
.quad 0 # SizeOfStackCommit
|
||||
.quad 0 # SizeOfHeapReserve
|
||||
.quad 0 # SizeOfHeapCommit
|
||||
#endif
|
||||
.long 0 # LoaderFlags
|
||||
.long 0x6 # NumberOfRvaAndSizes
|
||||
|
||||
.quad 0 # ExportTable
|
||||
.quad 0 # ImportTable
|
||||
.quad 0 # ResourceTable
|
||||
.quad 0 # ExceptionTable
|
||||
.quad 0 # CertificationTable
|
||||
.quad 0 # BaseRelocationTable
|
||||
|
||||
# Section table
|
||||
section_table:
|
||||
#
|
||||
# The offset & size fields are filled in by build.c.
|
||||
#
|
||||
.ascii ".setup"
|
||||
.byte 0
|
||||
.byte 0
|
||||
.long 0
|
||||
.long 0x0 # startup_{32,64}
|
||||
.long 0 # Size of initialized data
|
||||
# on disk
|
||||
.long 0x0 # startup_{32,64}
|
||||
.long 0 # PointerToRelocations
|
||||
.long 0 # PointerToLineNumbers
|
||||
.word 0 # NumberOfRelocations
|
||||
.word 0 # NumberOfLineNumbers
|
||||
.long 0x60500020 # Characteristics (section flags)
|
||||
|
||||
#
|
||||
# The EFI application loader requires a relocation section
|
||||
# because EFI applications must be relocatable. The .reloc
|
||||
# offset & size fields are filled in by build.c.
|
||||
#
|
||||
.ascii ".reloc"
|
||||
.byte 0
|
||||
.byte 0
|
||||
.long 0
|
||||
.long 0
|
||||
.long 0 # SizeOfRawData
|
||||
.long 0 # PointerToRawData
|
||||
.long 0 # PointerToRelocations
|
||||
.long 0 # PointerToLineNumbers
|
||||
.word 0 # NumberOfRelocations
|
||||
.word 0 # NumberOfLineNumbers
|
||||
.long 0x42100040 # Characteristics (section flags)
|
||||
|
||||
#
|
||||
# The offset & size fields are filled in by build.c.
|
||||
#
|
||||
.ascii ".text"
|
||||
.byte 0
|
||||
.byte 0
|
||||
.byte 0
|
||||
.long 0
|
||||
.long 0x0 # startup_{32,64}
|
||||
.long 0 # Size of initialized data
|
||||
# on disk
|
||||
.long 0x0 # startup_{32,64}
|
||||
.long 0 # PointerToRelocations
|
||||
.long 0 # PointerToLineNumbers
|
||||
.word 0 # NumberOfRelocations
|
||||
.word 0 # NumberOfLineNumbers
|
||||
.long 0x60500020 # Characteristics (section flags)
|
||||
|
||||
#
|
||||
# The offset & size fields are filled in by build.c.
|
||||
#
|
||||
.ascii ".bss"
|
||||
.byte 0
|
||||
.byte 0
|
||||
.byte 0
|
||||
.byte 0
|
||||
.long 0
|
||||
.long 0x0
|
||||
.long 0 # Size of initialized data
|
||||
# on disk
|
||||
.long 0x0
|
||||
.long 0 # PointerToRelocations
|
||||
.long 0 # PointerToLineNumbers
|
||||
.word 0 # NumberOfRelocations
|
||||
.word 0 # NumberOfLineNumbers
|
||||
.long 0xc8000080 # Characteristics (section flags)
|
||||
|
||||
#endif /* CONFIG_EFI_STUB */
|
||||
|
||||
# Kernel attributes; used by setup. This is part 1 of the
|
||||
# header, from the old boot sector.
|
||||
|
||||
.section ".header", "a"
|
||||
.globl sentinel
|
||||
sentinel: .byte 0xff, 0xff /* Used to detect broken loaders */
|
||||
|
||||
.globl hdr
|
||||
hdr:
|
||||
setup_sects: .byte 0 /* Filled in by build.c */
|
||||
root_flags: .word ROOT_RDONLY
|
||||
syssize: .long 0 /* Filled in by build.c */
|
||||
ram_size: .word 0 /* Obsolete */
|
||||
vid_mode: .word SVGA_MODE
|
||||
root_dev: .word 0 /* Filled in by build.c */
|
||||
boot_flag: .word 0xAA55
|
||||
|
||||
# offset 512, entry point
|
||||
|
||||
.globl _start
|
||||
_start:
|
||||
# Explicitly enter this as bytes, or the assembler
|
||||
# tries to generate a 3-byte jump here, which causes
|
||||
# everything else to push off to the wrong offset.
|
||||
.byte 0xeb # short (2-byte) jump
|
||||
.byte start_of_setup-1f
|
||||
1:
|
||||
|
||||
# Part 2 of the header, from the old setup.S
|
||||
|
||||
.ascii "HdrS" # header signature
|
||||
.word 0x020d # header version number (>= 0x0105)
|
||||
# or else old loadlin-1.5 will fail)
|
||||
.globl realmode_swtch
|
||||
realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
|
||||
start_sys_seg: .word SYSSEG # obsolete and meaningless, but just
|
||||
# in case something decided to "use" it
|
||||
.word kernel_version-512 # pointing to kernel version string
|
||||
# above section of header is compatible
|
||||
# with loadlin-1.5 (header v1.5). Don't
|
||||
# change it.
|
||||
|
||||
type_of_loader: .byte 0 # 0 means ancient bootloader, newer
|
||||
# bootloaders know to change this.
|
||||
# See Documentation/x86/boot.txt for
|
||||
# assigned ids
|
||||
|
||||
# flags, unused bits must be zero (RFU) bit within loadflags
|
||||
loadflags:
|
||||
.byte LOADED_HIGH # The kernel is to be loaded high
|
||||
|
||||
setup_move_size: .word 0x8000 # size to move, when setup is not
|
||||
# loaded at 0x90000. We will move setup
|
||||
# to 0x90000 then just before jumping
|
||||
# into the kernel. However, only the
|
||||
# loader knows how much data behind
|
||||
# us also needs to be loaded.
|
||||
|
||||
code32_start: # here loaders can put a different
|
||||
# start address for 32-bit code.
|
||||
.long 0x100000 # 0x100000 = default for big kernel
|
||||
|
||||
ramdisk_image: .long 0 # address of loaded ramdisk image
|
||||
# Here the loader puts the 32-bit
|
||||
# address where it loaded the image.
|
||||
# This only will be read by the kernel.
|
||||
|
||||
ramdisk_size: .long 0 # its size in bytes
|
||||
|
||||
bootsect_kludge:
|
||||
.long 0 # obsolete
|
||||
|
||||
heap_end_ptr: .word _end+STACK_SIZE-512
|
||||
# (Header version 0x0201 or later)
|
||||
# space from here (exclusive) down to
|
||||
# end of setup code can be used by setup
|
||||
# for local heap purposes.
|
||||
|
||||
ext_loader_ver:
|
||||
.byte 0 # Extended boot loader version
|
||||
ext_loader_type:
|
||||
.byte 0 # Extended boot loader type
|
||||
|
||||
cmd_line_ptr: .long 0 # (Header version 0x0202 or later)
|
||||
# If nonzero, a 32-bit pointer
|
||||
# to the kernel command line.
|
||||
# The command line should be
|
||||
# located between the start of
|
||||
# setup and the end of low
|
||||
# memory (0xa0000), or it may
|
||||
# get overwritten before it
|
||||
# gets read. If this field is
|
||||
# used, there is no longer
|
||||
# anything magical about the
|
||||
# 0x90000 segment; the setup
|
||||
# can be located anywhere in
|
||||
# low memory 0x10000 or higher.
|
||||
|
||||
initrd_addr_max: .long 0x7fffffff
|
||||
# (Header version 0x0203 or later)
|
||||
# The highest safe address for
|
||||
# the contents of an initrd
|
||||
# The current kernel allows up to 4 GB,
|
||||
# but leave it at 2 GB to avoid
|
||||
# possible bootloader bugs.
|
||||
|
||||
kernel_alignment: .long CONFIG_PHYSICAL_ALIGN #physical addr alignment
|
||||
#required for protected mode
|
||||
#kernel
|
||||
#ifdef CONFIG_RELOCATABLE
|
||||
relocatable_kernel: .byte 1
|
||||
#else
|
||||
relocatable_kernel: .byte 0
|
||||
#endif
|
||||
min_alignment: .byte MIN_KERNEL_ALIGN_LG2 # minimum alignment
|
||||
|
||||
xloadflags:
|
||||
#ifdef CONFIG_X86_64
|
||||
# define XLF0 XLF_KERNEL_64 /* 64-bit kernel */
|
||||
#else
|
||||
# define XLF0 0
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_X86_64)
|
||||
/* kernel/boot_param/ramdisk could be loaded above 4g */
|
||||
# define XLF1 XLF_CAN_BE_LOADED_ABOVE_4G
|
||||
#else
|
||||
# define XLF1 0
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_EFI_STUB
|
||||
# ifdef CONFIG_EFI_MIXED
|
||||
# define XLF23 (XLF_EFI_HANDOVER_32|XLF_EFI_HANDOVER_64)
|
||||
# else
|
||||
# ifdef CONFIG_X86_64
|
||||
# define XLF23 XLF_EFI_HANDOVER_64 /* 64-bit EFI handover ok */
|
||||
# else
|
||||
# define XLF23 XLF_EFI_HANDOVER_32 /* 32-bit EFI handover ok */
|
||||
# endif
|
||||
# endif
|
||||
#else
|
||||
# define XLF23 0
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_X86_64) && defined(CONFIG_EFI) && defined(CONFIG_KEXEC)
|
||||
# define XLF4 XLF_EFI_KEXEC
|
||||
#else
|
||||
# define XLF4 0
|
||||
#endif
|
||||
|
||||
.word XLF0 | XLF1 | XLF23 | XLF4
|
||||
|
||||
cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
|
||||
#added with boot protocol
|
||||
#version 2.06
|
||||
|
||||
hardware_subarch: .long 0 # subarchitecture, added with 2.07
|
||||
# default to 0 for normal x86 PC
|
||||
|
||||
hardware_subarch_data: .quad 0
|
||||
|
||||
payload_offset: .long ZO_input_data
|
||||
payload_length: .long ZO_z_input_len
|
||||
|
||||
setup_data: .quad 0 # 64-bit physical pointer to
|
||||
# single linked list of
|
||||
# struct setup_data
|
||||
|
||||
pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr
|
||||
|
||||
#define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_extract_offset)
|
||||
#define VO_INIT_SIZE (VO__end - VO__text)
|
||||
#if ZO_INIT_SIZE > VO_INIT_SIZE
|
||||
#define INIT_SIZE ZO_INIT_SIZE
|
||||
#else
|
||||
#define INIT_SIZE VO_INIT_SIZE
|
||||
#endif
|
||||
init_size: .long INIT_SIZE # kernel initialization size
|
||||
handover_offset: .long 0 # Filled in by build.c
|
||||
|
||||
# End of setup header #####################################################
|
||||
|
||||
.section ".entrytext", "ax"
|
||||
start_of_setup:
|
||||
# Force %es = %ds
|
||||
movw %ds, %ax
|
||||
movw %ax, %es
|
||||
cld
|
||||
|
||||
# Apparently some ancient versions of LILO invoked the kernel with %ss != %ds,
|
||||
# which happened to work by accident for the old code. Recalculate the stack
|
||||
# pointer if %ss is invalid. Otherwise leave it alone, LOADLIN sets up the
|
||||
# stack behind its own code, so we can't blindly put it directly past the heap.
|
||||
|
||||
movw %ss, %dx
|
||||
cmpw %ax, %dx # %ds == %ss?
|
||||
movw %sp, %dx
|
||||
je 2f # -> assume %sp is reasonably set
|
||||
|
||||
# Invalid %ss, make up a new stack
|
||||
movw $_end, %dx
|
||||
testb $CAN_USE_HEAP, loadflags
|
||||
jz 1f
|
||||
movw heap_end_ptr, %dx
|
||||
1: addw $STACK_SIZE, %dx
|
||||
jnc 2f
|
||||
xorw %dx, %dx # Prevent wraparound
|
||||
|
||||
2: # Now %dx should point to the end of our stack space
|
||||
andw $~3, %dx # dword align (might as well...)
|
||||
jnz 3f
|
||||
movw $0xfffc, %dx # Make sure we're not zero
|
||||
3: movw %ax, %ss
|
||||
movzwl %dx, %esp # Clear upper half of %esp
|
||||
sti # Now we should have a working stack
|
||||
|
||||
# We will have entered with %cs = %ds+0x20, normalize %cs so
|
||||
# it is on par with the other segments.
|
||||
pushw %ds
|
||||
pushw $6f
|
||||
lretw
|
||||
6:
|
||||
|
||||
# Check signature at end of setup
|
||||
cmpl $0x5a5aaa55, setup_sig
|
||||
jne setup_bad
|
||||
|
||||
# Zero the bss
|
||||
movw $__bss_start, %di
|
||||
movw $_end+3, %cx
|
||||
xorl %eax, %eax
|
||||
subw %di, %cx
|
||||
shrw $2, %cx
|
||||
rep; stosl
|
||||
|
||||
# Jump to C code (should not return)
|
||||
calll main
|
||||
|
||||
# Setup corrupt somehow...
|
||||
setup_bad:
|
||||
movl $setup_corrupt, %eax
|
||||
calll puts
|
||||
# Fall through...
|
||||
|
||||
.globl die
|
||||
.type die, @function
|
||||
die:
|
||||
hlt
|
||||
jmp die
|
||||
|
||||
.size die, .-die
|
||||
|
||||
.section ".initdata", "a"
|
||||
setup_corrupt:
|
||||
.byte 7
|
||||
.string "No setup signature found...\n"
|
||||
59
arch/x86/boot/install.sh
Normal file
59
arch/x86/boot/install.sh
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
#!/bin/sh
|
||||
#
|
||||
# This file is subject to the terms and conditions of the GNU General Public
|
||||
# License. See the file "COPYING" in the main directory of this archive
|
||||
# for more details.
|
||||
#
|
||||
# Copyright (C) 1995 by Linus Torvalds
|
||||
#
|
||||
# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
|
||||
#
|
||||
# "make install" script for i386 architecture
|
||||
#
|
||||
# Arguments:
|
||||
# $1 - kernel version
|
||||
# $2 - kernel image file
|
||||
# $3 - kernel map file
|
||||
# $4 - default install path (blank if root directory)
|
||||
#
|
||||
|
||||
verify () {
|
||||
if [ ! -f "$1" ]; then
|
||||
echo "" 1>&2
|
||||
echo " *** Missing file: $1" 1>&2
|
||||
echo ' *** You need to run "make" before "make install".' 1>&2
|
||||
echo "" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Make sure the files actually exist
|
||||
verify "$2"
|
||||
verify "$3"
|
||||
|
||||
# User may have a custom install script
|
||||
|
||||
if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
|
||||
if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
|
||||
|
||||
# Default install - same as make zlilo
|
||||
|
||||
if [ -f $4/vmlinuz ]; then
|
||||
mv $4/vmlinuz $4/vmlinuz.old
|
||||
fi
|
||||
|
||||
if [ -f $4/System.map ]; then
|
||||
mv $4/System.map $4/System.old
|
||||
fi
|
||||
|
||||
cat $2 > $4/vmlinuz
|
||||
cp $3 $4/System.map
|
||||
|
||||
if [ -x /sbin/lilo ]; then
|
||||
/sbin/lilo
|
||||
elif [ -x /etc/lilo/install ]; then
|
||||
/etc/lilo/install
|
||||
else
|
||||
sync
|
||||
echo "Cannot find LILO."
|
||||
fi
|
||||
185
arch/x86/boot/main.c
Normal file
185
arch/x86/boot/main.c
Normal file
|
|
@ -0,0 +1,185 @@
|
|||
/* -*- linux-c -*- ------------------------------------------------------- *
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
* Copyright 2007 rPath, Inc. - All Rights Reserved
|
||||
* Copyright 2009 Intel Corporation; author H. Peter Anvin
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Main module for the real-mode kernel code
|
||||
*/
|
||||
|
||||
#include "boot.h"
|
||||
#include "string.h"
|
||||
|
||||
struct boot_params boot_params __attribute__((aligned(16)));
|
||||
|
||||
char *HEAP = _end;
|
||||
char *heap_end = _end; /* Default end of heap = no heap */
|
||||
|
||||
/*
|
||||
* Copy the header into the boot parameter block. Since this
|
||||
* screws up the old-style command line protocol, adjust by
|
||||
* filling in the new-style command line pointer instead.
|
||||
*/
|
||||
|
||||
static void copy_boot_params(void)
|
||||
{
|
||||
struct old_cmdline {
|
||||
u16 cl_magic;
|
||||
u16 cl_offset;
|
||||
};
|
||||
const struct old_cmdline * const oldcmd =
|
||||
(const struct old_cmdline *)OLD_CL_ADDRESS;
|
||||
|
||||
BUILD_BUG_ON(sizeof boot_params != 4096);
|
||||
memcpy(&boot_params.hdr, &hdr, sizeof hdr);
|
||||
|
||||
if (!boot_params.hdr.cmd_line_ptr &&
|
||||
oldcmd->cl_magic == OLD_CL_MAGIC) {
|
||||
/* Old-style command line protocol. */
|
||||
u16 cmdline_seg;
|
||||
|
||||
/* Figure out if the command line falls in the region
|
||||
of memory that an old kernel would have copied up
|
||||
to 0x90000... */
|
||||
if (oldcmd->cl_offset < boot_params.hdr.setup_move_size)
|
||||
cmdline_seg = ds();
|
||||
else
|
||||
cmdline_seg = 0x9000;
|
||||
|
||||
boot_params.hdr.cmd_line_ptr =
|
||||
(cmdline_seg << 4) + oldcmd->cl_offset;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Query the keyboard lock status as given by the BIOS, and
|
||||
* set the keyboard repeat rate to maximum. Unclear why the latter
|
||||
* is done here; this might be possible to kill off as stale code.
|
||||
*/
|
||||
static void keyboard_init(void)
|
||||
{
|
||||
struct biosregs ireg, oreg;
|
||||
initregs(&ireg);
|
||||
|
||||
ireg.ah = 0x02; /* Get keyboard status */
|
||||
intcall(0x16, &ireg, &oreg);
|
||||
boot_params.kbd_status = oreg.al;
|
||||
|
||||
ireg.ax = 0x0305; /* Set keyboard repeat rate */
|
||||
intcall(0x16, &ireg, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get Intel SpeedStep (IST) information.
|
||||
*/
|
||||
static void query_ist(void)
|
||||
{
|
||||
struct biosregs ireg, oreg;
|
||||
|
||||
/* Some older BIOSes apparently crash on this call, so filter
|
||||
it from machines too old to have SpeedStep at all. */
|
||||
if (cpu.level < 6)
|
||||
return;
|
||||
|
||||
initregs(&ireg);
|
||||
ireg.ax = 0xe980; /* IST Support */
|
||||
ireg.edx = 0x47534943; /* Request value */
|
||||
intcall(0x15, &ireg, &oreg);
|
||||
|
||||
boot_params.ist_info.signature = oreg.eax;
|
||||
boot_params.ist_info.command = oreg.ebx;
|
||||
boot_params.ist_info.event = oreg.ecx;
|
||||
boot_params.ist_info.perf_level = oreg.edx;
|
||||
}
|
||||
|
||||
/*
|
||||
* Tell the BIOS what CPU mode we intend to run in.
|
||||
*/
|
||||
static void set_bios_mode(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
struct biosregs ireg;
|
||||
|
||||
initregs(&ireg);
|
||||
ireg.ax = 0xec00;
|
||||
ireg.bx = 2;
|
||||
intcall(0x15, &ireg, NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void init_heap(void)
|
||||
{
|
||||
char *stack_end;
|
||||
|
||||
if (boot_params.hdr.loadflags & CAN_USE_HEAP) {
|
||||
asm("leal %P1(%%esp),%0"
|
||||
: "=r" (stack_end) : "i" (-STACK_SIZE));
|
||||
|
||||
heap_end = (char *)
|
||||
((size_t)boot_params.hdr.heap_end_ptr + 0x200);
|
||||
if (heap_end > stack_end)
|
||||
heap_end = stack_end;
|
||||
} else {
|
||||
/* Boot protocol 2.00 only, no heap available */
|
||||
puts("WARNING: Ancient bootloader, some functionality "
|
||||
"may be limited!\n");
|
||||
}
|
||||
}
|
||||
|
||||
void main(void)
|
||||
{
|
||||
/* First, copy the boot header into the "zeropage" */
|
||||
copy_boot_params();
|
||||
|
||||
/* Initialize the early-boot console */
|
||||
console_init();
|
||||
if (cmdline_find_option_bool("debug"))
|
||||
puts("early console in setup code\n");
|
||||
|
||||
/* End of heap check */
|
||||
init_heap();
|
||||
|
||||
/* Make sure we have all the proper CPU support */
|
||||
if (validate_cpu()) {
|
||||
puts("Unable to boot - please use a kernel appropriate "
|
||||
"for your CPU.\n");
|
||||
die();
|
||||
}
|
||||
|
||||
/* Tell the BIOS what CPU mode we intend to run in. */
|
||||
set_bios_mode();
|
||||
|
||||
/* Detect memory layout */
|
||||
detect_memory();
|
||||
|
||||
/* Set keyboard repeat rate (why?) and query the lock flags */
|
||||
keyboard_init();
|
||||
|
||||
/* Query MCA information */
|
||||
query_mca();
|
||||
|
||||
/* Query Intel SpeedStep (IST) information */
|
||||
query_ist();
|
||||
|
||||
/* Query APM information */
|
||||
#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE)
|
||||
query_apm_bios();
|
||||
#endif
|
||||
|
||||
/* Query EDD information */
|
||||
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
|
||||
query_edd();
|
||||
#endif
|
||||
|
||||
/* Set the video mode */
|
||||
set_video();
|
||||
|
||||
/* Do the last things and invoke protected mode */
|
||||
go_to_protected_mode();
|
||||
}
|
||||
38
arch/x86/boot/mca.c
Normal file
38
arch/x86/boot/mca.c
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
/* -*- linux-c -*- ------------------------------------------------------- *
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
* Copyright 2007 rPath, Inc. - All Rights Reserved
|
||||
* Copyright 2009 Intel Corporation; author H. Peter Anvin
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Get the MCA system description table
|
||||
*/
|
||||
|
||||
#include "boot.h"
|
||||
|
||||
int query_mca(void)
|
||||
{
|
||||
struct biosregs ireg, oreg;
|
||||
u16 len;
|
||||
|
||||
initregs(&ireg);
|
||||
ireg.ah = 0xc0;
|
||||
intcall(0x15, &ireg, &oreg);
|
||||
|
||||
if (oreg.eflags & X86_EFLAGS_CF)
|
||||
return -1; /* No MCA present */
|
||||
|
||||
set_fs(oreg.es);
|
||||
len = rdfs16(oreg.bx);
|
||||
|
||||
if (len > sizeof(boot_params.sys_desc_table))
|
||||
len = sizeof(boot_params.sys_desc_table);
|
||||
|
||||
copy_from_fs(&boot_params.sys_desc_table, oreg.bx, len);
|
||||
return 0;
|
||||
}
|
||||
136
arch/x86/boot/memory.c
Normal file
136
arch/x86/boot/memory.c
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
/* -*- linux-c -*- ------------------------------------------------------- *
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
* Copyright 2007 rPath, Inc. - All Rights Reserved
|
||||
* Copyright 2009 Intel Corporation; author H. Peter Anvin
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Memory detection code
|
||||
*/
|
||||
|
||||
#include "boot.h"
|
||||
|
||||
#define SMAP 0x534d4150 /* ASCII "SMAP" */
|
||||
|
||||
static int detect_memory_e820(void)
|
||||
{
|
||||
int count = 0;
|
||||
struct biosregs ireg, oreg;
|
||||
struct e820entry *desc = boot_params.e820_map;
|
||||
static struct e820entry buf; /* static so it is zeroed */
|
||||
|
||||
initregs(&ireg);
|
||||
ireg.ax = 0xe820;
|
||||
ireg.cx = sizeof buf;
|
||||
ireg.edx = SMAP;
|
||||
ireg.di = (size_t)&buf;
|
||||
|
||||
/*
|
||||
* Note: at least one BIOS is known which assumes that the
|
||||
* buffer pointed to by one e820 call is the same one as
|
||||
* the previous call, and only changes modified fields. Therefore,
|
||||
* we use a temporary buffer and copy the results entry by entry.
|
||||
*
|
||||
* This routine deliberately does not try to account for
|
||||
* ACPI 3+ extended attributes. This is because there are
|
||||
* BIOSes in the field which report zero for the valid bit for
|
||||
* all ranges, and we don't currently make any use of the
|
||||
* other attribute bits. Revisit this if we see the extended
|
||||
* attribute bits deployed in a meaningful way in the future.
|
||||
*/
|
||||
|
||||
do {
|
||||
intcall(0x15, &ireg, &oreg);
|
||||
ireg.ebx = oreg.ebx; /* for next iteration... */
|
||||
|
||||
/* BIOSes which terminate the chain with CF = 1 as opposed
|
||||
to %ebx = 0 don't always report the SMAP signature on
|
||||
the final, failing, probe. */
|
||||
if (oreg.eflags & X86_EFLAGS_CF)
|
||||
break;
|
||||
|
||||
/* Some BIOSes stop returning SMAP in the middle of
|
||||
the search loop. We don't know exactly how the BIOS
|
||||
screwed up the map at that point, we might have a
|
||||
partial map, the full map, or complete garbage, so
|
||||
just return failure. */
|
||||
if (oreg.eax != SMAP) {
|
||||
count = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
*desc++ = buf;
|
||||
count++;
|
||||
} while (ireg.ebx && count < ARRAY_SIZE(boot_params.e820_map));
|
||||
|
||||
return boot_params.e820_entries = count;
|
||||
}
|
||||
|
||||
static int detect_memory_e801(void)
|
||||
{
|
||||
struct biosregs ireg, oreg;
|
||||
|
||||
initregs(&ireg);
|
||||
ireg.ax = 0xe801;
|
||||
intcall(0x15, &ireg, &oreg);
|
||||
|
||||
if (oreg.eflags & X86_EFLAGS_CF)
|
||||
return -1;
|
||||
|
||||
/* Do we really need to do this? */
|
||||
if (oreg.cx || oreg.dx) {
|
||||
oreg.ax = oreg.cx;
|
||||
oreg.bx = oreg.dx;
|
||||
}
|
||||
|
||||
if (oreg.ax > 15*1024) {
|
||||
return -1; /* Bogus! */
|
||||
} else if (oreg.ax == 15*1024) {
|
||||
boot_params.alt_mem_k = (oreg.bx << 6) + oreg.ax;
|
||||
} else {
|
||||
/*
|
||||
* This ignores memory above 16MB if we have a memory
|
||||
* hole there. If someone actually finds a machine
|
||||
* with a memory hole at 16MB and no support for
|
||||
* 0E820h they should probably generate a fake e820
|
||||
* map.
|
||||
*/
|
||||
boot_params.alt_mem_k = oreg.ax;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int detect_memory_88(void)
|
||||
{
|
||||
struct biosregs ireg, oreg;
|
||||
|
||||
initregs(&ireg);
|
||||
ireg.ah = 0x88;
|
||||
intcall(0x15, &ireg, &oreg);
|
||||
|
||||
boot_params.screen_info.ext_mem_k = oreg.ax;
|
||||
|
||||
return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */
|
||||
}
|
||||
|
||||
int detect_memory(void)
|
||||
{
|
||||
int err = -1;
|
||||
|
||||
if (detect_memory_e820() > 0)
|
||||
err = 0;
|
||||
|
||||
if (!detect_memory_e801())
|
||||
err = 0;
|
||||
|
||||
if (!detect_memory_88())
|
||||
err = 0;
|
||||
|
||||
return err;
|
||||
}
|
||||
52
arch/x86/boot/mkcpustr.c
Normal file
52
arch/x86/boot/mkcpustr.c
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
/* ----------------------------------------------------------------------- *
|
||||
*
|
||||
* Copyright 2008 rPath, Inc. - All Rights Reserved
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2 or (at your
|
||||
* option) any later version; incorporated herein by reference.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* This is a host program to preprocess the CPU strings into a
|
||||
* compact format suitable for the setup code.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "../include/asm/required-features.h"
|
||||
#include "../include/asm/disabled-features.h"
|
||||
#include "../include/asm/cpufeature.h"
|
||||
#include "../kernel/cpu/capflags.c"
|
||||
|
||||
int main(void)
|
||||
{
|
||||
int i, j;
|
||||
const char *str;
|
||||
|
||||
printf("static const char x86_cap_strs[] =\n");
|
||||
|
||||
for (i = 0; i < NCAPINTS; i++) {
|
||||
for (j = 0; j < 32; j++) {
|
||||
str = x86_cap_flags[i*32+j];
|
||||
|
||||
if (i == NCAPINTS-1 && j == 31) {
|
||||
/* The last entry must be unconditional; this
|
||||
also consumes the compiler-added null
|
||||
character */
|
||||
if (!str)
|
||||
str = "";
|
||||
printf("\t\"\\x%02x\\x%02x\"\"%s\"\n",
|
||||
i, j, str);
|
||||
} else if (str) {
|
||||
printf("#if REQUIRED_MASK%d & (1 << %d)\n"
|
||||
"\t\"\\x%02x\\x%02x\"\"%s\\0\"\n"
|
||||
"#endif\n",
|
||||
i, j, i, j, str);
|
||||
}
|
||||
}
|
||||
}
|
||||
printf("\t;\n");
|
||||
return 0;
|
||||
}
|
||||
17
arch/x86/boot/mtools.conf.in
Normal file
17
arch/x86/boot/mtools.conf.in
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
#
|
||||
# mtools configuration file for "make (b)zdisk"
|
||||
#
|
||||
|
||||
# Actual floppy drive
|
||||
drive a:
|
||||
file="/dev/fd0"
|
||||
|
||||
# 1.44 MB floppy disk image
|
||||
drive v:
|
||||
file="@OBJ@/fdimage" cylinders=80 heads=2 sectors=18 filter
|
||||
|
||||
# 2.88 MB floppy disk image (mostly for virtual uses)
|
||||
drive w:
|
||||
file="@OBJ@/fdimage" cylinders=80 heads=2 sectors=36 filter
|
||||
|
||||
|
||||
126
arch/x86/boot/pm.c
Normal file
126
arch/x86/boot/pm.c
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
/* -*- linux-c -*- ------------------------------------------------------- *
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
* Copyright 2007 rPath, Inc. - All Rights Reserved
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Prepare the machine for transition to protected mode.
|
||||
*/
|
||||
|
||||
#include "boot.h"
|
||||
#include <asm/segment.h>
|
||||
|
||||
/*
|
||||
* Invoke the realmode switch hook if present; otherwise
|
||||
* disable all interrupts.
|
||||
*/
|
||||
static void realmode_switch_hook(void)
|
||||
{
|
||||
if (boot_params.hdr.realmode_swtch) {
|
||||
asm volatile("lcallw *%0"
|
||||
: : "m" (boot_params.hdr.realmode_swtch)
|
||||
: "eax", "ebx", "ecx", "edx");
|
||||
} else {
|
||||
asm volatile("cli");
|
||||
outb(0x80, 0x70); /* Disable NMI */
|
||||
io_delay();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Disable all interrupts at the legacy PIC.
|
||||
*/
|
||||
static void mask_all_interrupts(void)
|
||||
{
|
||||
outb(0xff, 0xa1); /* Mask all interrupts on the secondary PIC */
|
||||
io_delay();
|
||||
outb(0xfb, 0x21); /* Mask all but cascade on the primary PIC */
|
||||
io_delay();
|
||||
}
|
||||
|
||||
/*
|
||||
* Reset IGNNE# if asserted in the FPU.
|
||||
*/
|
||||
static void reset_coprocessor(void)
|
||||
{
|
||||
outb(0, 0xf0);
|
||||
io_delay();
|
||||
outb(0, 0xf1);
|
||||
io_delay();
|
||||
}
|
||||
|
||||
/*
|
||||
* Set up the GDT
|
||||
*/
|
||||
|
||||
struct gdt_ptr {
|
||||
u16 len;
|
||||
u32 ptr;
|
||||
} __attribute__((packed));
|
||||
|
||||
static void setup_gdt(void)
|
||||
{
|
||||
/* There are machines which are known to not boot with the GDT
|
||||
being 8-byte unaligned. Intel recommends 16 byte alignment. */
|
||||
static const u64 boot_gdt[] __attribute__((aligned(16))) = {
|
||||
/* CS: code, read/execute, 4 GB, base 0 */
|
||||
[GDT_ENTRY_BOOT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff),
|
||||
/* DS: data, read/write, 4 GB, base 0 */
|
||||
[GDT_ENTRY_BOOT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff),
|
||||
/* TSS: 32-bit tss, 104 bytes, base 4096 */
|
||||
/* We only have a TSS here to keep Intel VT happy;
|
||||
we don't actually use it for anything. */
|
||||
[GDT_ENTRY_BOOT_TSS] = GDT_ENTRY(0x0089, 4096, 103),
|
||||
};
|
||||
/* Xen HVM incorrectly stores a pointer to the gdt_ptr, instead
|
||||
of the gdt_ptr contents. Thus, make it static so it will
|
||||
stay in memory, at least long enough that we switch to the
|
||||
proper kernel GDT. */
|
||||
static struct gdt_ptr gdt;
|
||||
|
||||
gdt.len = sizeof(boot_gdt)-1;
|
||||
gdt.ptr = (u32)&boot_gdt + (ds() << 4);
|
||||
|
||||
asm volatile("lgdtl %0" : : "m" (gdt));
|
||||
}
|
||||
|
||||
/*
|
||||
* Set up the IDT
|
||||
*/
|
||||
static void setup_idt(void)
|
||||
{
|
||||
static const struct gdt_ptr null_idt = {0, 0};
|
||||
asm volatile("lidtl %0" : : "m" (null_idt));
|
||||
}
|
||||
|
||||
/*
|
||||
* Actual invocation sequence
|
||||
*/
|
||||
void go_to_protected_mode(void)
|
||||
{
|
||||
/* Hook before leaving real mode, also disables interrupts */
|
||||
realmode_switch_hook();
|
||||
|
||||
/* Enable the A20 gate */
|
||||
if (enable_a20()) {
|
||||
puts("A20 gate not responding, unable to boot...\n");
|
||||
die();
|
||||
}
|
||||
|
||||
/* Reset coprocessor (IGNNE#) */
|
||||
reset_coprocessor();
|
||||
|
||||
/* Mask all interrupts in the PIC */
|
||||
mask_all_interrupts();
|
||||
|
||||
/* Actual transition to protected mode... */
|
||||
setup_idt();
|
||||
setup_gdt();
|
||||
protected_mode_jump(boot_params.hdr.code32_start,
|
||||
(u32)&boot_params + (ds() << 4));
|
||||
}
|
||||
77
arch/x86/boot/pmjump.S
Normal file
77
arch/x86/boot/pmjump.S
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
/* ----------------------------------------------------------------------- *
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
* Copyright 2007 rPath, Inc. - All Rights Reserved
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* The actual transition into protected mode
|
||||
*/
|
||||
|
||||
#include <asm/boot.h>
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/segment.h>
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.text
|
||||
.code16
|
||||
|
||||
/*
|
||||
* void protected_mode_jump(u32 entrypoint, u32 bootparams);
|
||||
*/
|
||||
GLOBAL(protected_mode_jump)
|
||||
movl %edx, %esi # Pointer to boot_params table
|
||||
|
||||
xorl %ebx, %ebx
|
||||
movw %cs, %bx
|
||||
shll $4, %ebx
|
||||
addl %ebx, 2f
|
||||
jmp 1f # Short jump to serialize on 386/486
|
||||
1:
|
||||
|
||||
movw $__BOOT_DS, %cx
|
||||
movw $__BOOT_TSS, %di
|
||||
|
||||
movl %cr0, %edx
|
||||
orb $X86_CR0_PE, %dl # Protected mode
|
||||
movl %edx, %cr0
|
||||
|
||||
# Transition to 32-bit mode
|
||||
.byte 0x66, 0xea # ljmpl opcode
|
||||
2: .long in_pm32 # offset
|
||||
.word __BOOT_CS # segment
|
||||
ENDPROC(protected_mode_jump)
|
||||
|
||||
.code32
|
||||
.section ".text32","ax"
|
||||
GLOBAL(in_pm32)
|
||||
# Set up data segments for flat 32-bit mode
|
||||
movl %ecx, %ds
|
||||
movl %ecx, %es
|
||||
movl %ecx, %fs
|
||||
movl %ecx, %gs
|
||||
movl %ecx, %ss
|
||||
# The 32-bit code sets up its own stack, but this way we do have
|
||||
# a valid stack if some debugging hack wants to use it.
|
||||
addl %ebx, %esp
|
||||
|
||||
# Set up TR to make Intel VT happy
|
||||
ltr %di
|
||||
|
||||
# Clear registers to allow for future extensions to the
|
||||
# 32-bit boot protocol
|
||||
xorl %ecx, %ecx
|
||||
xorl %edx, %edx
|
||||
xorl %ebx, %ebx
|
||||
xorl %ebp, %ebp
|
||||
xorl %edi, %edi
|
||||
|
||||
# Set up LDTR to make Intel VT happy
|
||||
lldt %cx
|
||||
|
||||
jmpl *%eax # Jump to the 32-bit entrypoint
|
||||
ENDPROC(in_pm32)
|
||||
309
arch/x86/boot/printf.c
Normal file
309
arch/x86/boot/printf.c
Normal file
|
|
@ -0,0 +1,309 @@
|
|||
/* -*- linux-c -*- ------------------------------------------------------- *
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
* Copyright 2007 rPath, Inc. - All Rights Reserved
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Oh, it's a waste of space, but oh-so-yummy for debugging. This
|
||||
* version of printf() does not include 64-bit support. "Live with
|
||||
* it."
|
||||
*
|
||||
*/
|
||||
|
||||
#include "boot.h"
|
||||
|
||||
static int skip_atoi(const char **s)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
while (isdigit(**s))
|
||||
i = i * 10 + *((*s)++) - '0';
|
||||
return i;
|
||||
}
|
||||
|
||||
#define ZEROPAD 1 /* pad with zero */
|
||||
#define SIGN 2 /* unsigned/signed long */
|
||||
#define PLUS 4 /* show plus */
|
||||
#define SPACE 8 /* space if plus */
|
||||
#define LEFT 16 /* left justified */
|
||||
#define SMALL 32 /* Must be 32 == 0x20 */
|
||||
#define SPECIAL 64 /* 0x */
|
||||
|
||||
#define __do_div(n, base) ({ \
|
||||
int __res; \
|
||||
__res = ((unsigned long) n) % (unsigned) base; \
|
||||
n = ((unsigned long) n) / (unsigned) base; \
|
||||
__res; })
|
||||
|
||||
static char *number(char *str, long num, int base, int size, int precision,
|
||||
int type)
|
||||
{
|
||||
/* we are called with base 8, 10 or 16, only, thus don't need "G..." */
|
||||
static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
|
||||
|
||||
char tmp[66];
|
||||
char c, sign, locase;
|
||||
int i;
|
||||
|
||||
/* locase = 0 or 0x20. ORing digits or letters with 'locase'
|
||||
* produces same digits or (maybe lowercased) letters */
|
||||
locase = (type & SMALL);
|
||||
if (type & LEFT)
|
||||
type &= ~ZEROPAD;
|
||||
if (base < 2 || base > 16)
|
||||
return NULL;
|
||||
c = (type & ZEROPAD) ? '0' : ' ';
|
||||
sign = 0;
|
||||
if (type & SIGN) {
|
||||
if (num < 0) {
|
||||
sign = '-';
|
||||
num = -num;
|
||||
size--;
|
||||
} else if (type & PLUS) {
|
||||
sign = '+';
|
||||
size--;
|
||||
} else if (type & SPACE) {
|
||||
sign = ' ';
|
||||
size--;
|
||||
}
|
||||
}
|
||||
if (type & SPECIAL) {
|
||||
if (base == 16)
|
||||
size -= 2;
|
||||
else if (base == 8)
|
||||
size--;
|
||||
}
|
||||
i = 0;
|
||||
if (num == 0)
|
||||
tmp[i++] = '0';
|
||||
else
|
||||
while (num != 0)
|
||||
tmp[i++] = (digits[__do_div(num, base)] | locase);
|
||||
if (i > precision)
|
||||
precision = i;
|
||||
size -= precision;
|
||||
if (!(type & (ZEROPAD + LEFT)))
|
||||
while (size-- > 0)
|
||||
*str++ = ' ';
|
||||
if (sign)
|
||||
*str++ = sign;
|
||||
if (type & SPECIAL) {
|
||||
if (base == 8)
|
||||
*str++ = '0';
|
||||
else if (base == 16) {
|
||||
*str++ = '0';
|
||||
*str++ = ('X' | locase);
|
||||
}
|
||||
}
|
||||
if (!(type & LEFT))
|
||||
while (size-- > 0)
|
||||
*str++ = c;
|
||||
while (i < precision--)
|
||||
*str++ = '0';
|
||||
while (i-- > 0)
|
||||
*str++ = tmp[i];
|
||||
while (size-- > 0)
|
||||
*str++ = ' ';
|
||||
return str;
|
||||
}
|
||||
|
||||
int vsprintf(char *buf, const char *fmt, va_list args)
|
||||
{
|
||||
int len;
|
||||
unsigned long num;
|
||||
int i, base;
|
||||
char *str;
|
||||
const char *s;
|
||||
|
||||
int flags; /* flags to number() */
|
||||
|
||||
int field_width; /* width of output field */
|
||||
int precision; /* min. # of digits for integers; max
|
||||
number of chars for from string */
|
||||
int qualifier; /* 'h', 'l', or 'L' for integer fields */
|
||||
|
||||
for (str = buf; *fmt; ++fmt) {
|
||||
if (*fmt != '%') {
|
||||
*str++ = *fmt;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* process flags */
|
||||
flags = 0;
|
||||
repeat:
|
||||
++fmt; /* this also skips first '%' */
|
||||
switch (*fmt) {
|
||||
case '-':
|
||||
flags |= LEFT;
|
||||
goto repeat;
|
||||
case '+':
|
||||
flags |= PLUS;
|
||||
goto repeat;
|
||||
case ' ':
|
||||
flags |= SPACE;
|
||||
goto repeat;
|
||||
case '#':
|
||||
flags |= SPECIAL;
|
||||
goto repeat;
|
||||
case '0':
|
||||
flags |= ZEROPAD;
|
||||
goto repeat;
|
||||
}
|
||||
|
||||
/* get field width */
|
||||
field_width = -1;
|
||||
if (isdigit(*fmt))
|
||||
field_width = skip_atoi(&fmt);
|
||||
else if (*fmt == '*') {
|
||||
++fmt;
|
||||
/* it's the next argument */
|
||||
field_width = va_arg(args, int);
|
||||
if (field_width < 0) {
|
||||
field_width = -field_width;
|
||||
flags |= LEFT;
|
||||
}
|
||||
}
|
||||
|
||||
/* get the precision */
|
||||
precision = -1;
|
||||
if (*fmt == '.') {
|
||||
++fmt;
|
||||
if (isdigit(*fmt))
|
||||
precision = skip_atoi(&fmt);
|
||||
else if (*fmt == '*') {
|
||||
++fmt;
|
||||
/* it's the next argument */
|
||||
precision = va_arg(args, int);
|
||||
}
|
||||
if (precision < 0)
|
||||
precision = 0;
|
||||
}
|
||||
|
||||
/* get the conversion qualifier */
|
||||
qualifier = -1;
|
||||
if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L') {
|
||||
qualifier = *fmt;
|
||||
++fmt;
|
||||
}
|
||||
|
||||
/* default base */
|
||||
base = 10;
|
||||
|
||||
switch (*fmt) {
|
||||
case 'c':
|
||||
if (!(flags & LEFT))
|
||||
while (--field_width > 0)
|
||||
*str++ = ' ';
|
||||
*str++ = (unsigned char)va_arg(args, int);
|
||||
while (--field_width > 0)
|
||||
*str++ = ' ';
|
||||
continue;
|
||||
|
||||
case 's':
|
||||
s = va_arg(args, char *);
|
||||
len = strnlen(s, precision);
|
||||
|
||||
if (!(flags & LEFT))
|
||||
while (len < field_width--)
|
||||
*str++ = ' ';
|
||||
for (i = 0; i < len; ++i)
|
||||
*str++ = *s++;
|
||||
while (len < field_width--)
|
||||
*str++ = ' ';
|
||||
continue;
|
||||
|
||||
case 'p':
|
||||
if (field_width == -1) {
|
||||
field_width = 2 * sizeof(void *);
|
||||
flags |= ZEROPAD;
|
||||
}
|
||||
str = number(str,
|
||||
(unsigned long)va_arg(args, void *), 16,
|
||||
field_width, precision, flags);
|
||||
continue;
|
||||
|
||||
case 'n':
|
||||
if (qualifier == 'l') {
|
||||
long *ip = va_arg(args, long *);
|
||||
*ip = (str - buf);
|
||||
} else {
|
||||
int *ip = va_arg(args, int *);
|
||||
*ip = (str - buf);
|
||||
}
|
||||
continue;
|
||||
|
||||
case '%':
|
||||
*str++ = '%';
|
||||
continue;
|
||||
|
||||
/* integer number formats - set up the flags and "break" */
|
||||
case 'o':
|
||||
base = 8;
|
||||
break;
|
||||
|
||||
case 'x':
|
||||
flags |= SMALL;
|
||||
case 'X':
|
||||
base = 16;
|
||||
break;
|
||||
|
||||
case 'd':
|
||||
case 'i':
|
||||
flags |= SIGN;
|
||||
case 'u':
|
||||
break;
|
||||
|
||||
default:
|
||||
*str++ = '%';
|
||||
if (*fmt)
|
||||
*str++ = *fmt;
|
||||
else
|
||||
--fmt;
|
||||
continue;
|
||||
}
|
||||
if (qualifier == 'l')
|
||||
num = va_arg(args, unsigned long);
|
||||
else if (qualifier == 'h') {
|
||||
num = (unsigned short)va_arg(args, int);
|
||||
if (flags & SIGN)
|
||||
num = (short)num;
|
||||
} else if (flags & SIGN)
|
||||
num = va_arg(args, int);
|
||||
else
|
||||
num = va_arg(args, unsigned int);
|
||||
str = number(str, num, base, field_width, precision, flags);
|
||||
}
|
||||
*str = '\0';
|
||||
return str - buf;
|
||||
}
|
||||
|
||||
int sprintf(char *buf, const char *fmt, ...)
|
||||
{
|
||||
va_list args;
|
||||
int i;
|
||||
|
||||
va_start(args, fmt);
|
||||
i = vsprintf(buf, fmt, args);
|
||||
va_end(args);
|
||||
return i;
|
||||
}
|
||||
|
||||
int printf(const char *fmt, ...)
|
||||
{
|
||||
char printf_buf[1024];
|
||||
va_list args;
|
||||
int printed;
|
||||
|
||||
va_start(args, fmt);
|
||||
printed = vsprintf(printf_buf, fmt, args);
|
||||
va_end(args);
|
||||
|
||||
puts(printf_buf);
|
||||
|
||||
return printed;
|
||||
}
|
||||
30
arch/x86/boot/regs.c
Normal file
30
arch/x86/boot/regs.c
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
/* -----------------------------------------------------------------------
|
||||
*
|
||||
* Copyright 2009 Intel Corporation; author H. Peter Anvin
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2 or (at your
|
||||
* option) any later version; incorporated herein by reference.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Simple helper function for initializing a register set.
|
||||
*
|
||||
* Note that this sets EFLAGS_CF in the input register set; this
|
||||
* makes it easier to catch functions which do nothing but don't
|
||||
* explicitly set CF.
|
||||
*/
|
||||
|
||||
#include "boot.h"
|
||||
#include "string.h"
|
||||
|
||||
void initregs(struct biosregs *reg)
|
||||
{
|
||||
memset(reg, 0, sizeof *reg);
|
||||
reg->eflags |= X86_EFLAGS_CF;
|
||||
reg->ds = ds();
|
||||
reg->es = ds();
|
||||
reg->fs = fs();
|
||||
reg->gs = gs();
|
||||
}
|
||||
64
arch/x86/boot/setup.ld
Normal file
64
arch/x86/boot/setup.ld
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
* setup.ld
|
||||
*
|
||||
* Linker script for the i386 setup code
|
||||
*/
|
||||
OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
|
||||
OUTPUT_ARCH(i386)
|
||||
ENTRY(_start)
|
||||
|
||||
SECTIONS
|
||||
{
|
||||
. = 0;
|
||||
.bstext : { *(.bstext) }
|
||||
.bsdata : { *(.bsdata) }
|
||||
|
||||
. = 495;
|
||||
.header : { *(.header) }
|
||||
.entrytext : { *(.entrytext) }
|
||||
.inittext : { *(.inittext) }
|
||||
.initdata : { *(.initdata) }
|
||||
__end_init = .;
|
||||
|
||||
.text : { *(.text) }
|
||||
.text32 : { *(.text32) }
|
||||
|
||||
. = ALIGN(16);
|
||||
.rodata : { *(.rodata*) }
|
||||
|
||||
.videocards : {
|
||||
video_cards = .;
|
||||
*(.videocards)
|
||||
video_cards_end = .;
|
||||
}
|
||||
|
||||
. = ALIGN(16);
|
||||
.data : { *(.data*) }
|
||||
|
||||
.signature : {
|
||||
setup_sig = .;
|
||||
LONG(0x5a5aaa55)
|
||||
}
|
||||
|
||||
|
||||
. = ALIGN(16);
|
||||
.bss :
|
||||
{
|
||||
__bss_start = .;
|
||||
*(.bss)
|
||||
__bss_end = .;
|
||||
}
|
||||
. = ALIGN(16);
|
||||
_end = .;
|
||||
|
||||
/DISCARD/ : { *(.note*) }
|
||||
|
||||
/*
|
||||
* The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
|
||||
*/
|
||||
. = ASSERT(_end <= 0x8000, "Setup too big!");
|
||||
. = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!");
|
||||
/* Necessary for the very-old-loader check to work... */
|
||||
. = ASSERT(__end_init <= 5*512, "init sections too big!");
|
||||
|
||||
}
|
||||
157
arch/x86/boot/string.c
Normal file
157
arch/x86/boot/string.c
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
/* -*- linux-c -*- ------------------------------------------------------- *
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
* Copyright 2007 rPath, Inc. - All Rights Reserved
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Very basic string functions
|
||||
*/
|
||||
|
||||
#include <linux/types.h>
|
||||
#include "ctype.h"
|
||||
|
||||
int memcmp(const void *s1, const void *s2, size_t len)
|
||||
{
|
||||
u8 diff;
|
||||
asm("repe; cmpsb; setnz %0"
|
||||
: "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
|
||||
return diff;
|
||||
}
|
||||
|
||||
int strcmp(const char *str1, const char *str2)
|
||||
{
|
||||
const unsigned char *s1 = (const unsigned char *)str1;
|
||||
const unsigned char *s2 = (const unsigned char *)str2;
|
||||
int delta = 0;
|
||||
|
||||
while (*s1 || *s2) {
|
||||
delta = *s2 - *s1;
|
||||
if (delta)
|
||||
return delta;
|
||||
s1++;
|
||||
s2++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int strncmp(const char *cs, const char *ct, size_t count)
|
||||
{
|
||||
unsigned char c1, c2;
|
||||
|
||||
while (count) {
|
||||
c1 = *cs++;
|
||||
c2 = *ct++;
|
||||
if (c1 != c2)
|
||||
return c1 < c2 ? -1 : 1;
|
||||
if (!c1)
|
||||
break;
|
||||
count--;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t strnlen(const char *s, size_t maxlen)
|
||||
{
|
||||
const char *es = s;
|
||||
while (*es && maxlen) {
|
||||
es++;
|
||||
maxlen--;
|
||||
}
|
||||
|
||||
return (es - s);
|
||||
}
|
||||
|
||||
unsigned int atou(const char *s)
|
||||
{
|
||||
unsigned int i = 0;
|
||||
while (isdigit(*s))
|
||||
i = i * 10 + (*s++ - '0');
|
||||
return i;
|
||||
}
|
||||
|
||||
/* Works only for digits and letters, but small and fast */
|
||||
#define TOLOWER(x) ((x) | 0x20)
|
||||
|
||||
static unsigned int simple_guess_base(const char *cp)
|
||||
{
|
||||
if (cp[0] == '0') {
|
||||
if (TOLOWER(cp[1]) == 'x' && isxdigit(cp[2]))
|
||||
return 16;
|
||||
else
|
||||
return 8;
|
||||
} else {
|
||||
return 10;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* simple_strtoull - convert a string to an unsigned long long
|
||||
* @cp: The start of the string
|
||||
* @endp: A pointer to the end of the parsed string will be placed here
|
||||
* @base: The number base to use
|
||||
*/
|
||||
|
||||
unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base)
|
||||
{
|
||||
unsigned long long result = 0;
|
||||
|
||||
if (!base)
|
||||
base = simple_guess_base(cp);
|
||||
|
||||
if (base == 16 && cp[0] == '0' && TOLOWER(cp[1]) == 'x')
|
||||
cp += 2;
|
||||
|
||||
while (isxdigit(*cp)) {
|
||||
unsigned int value;
|
||||
|
||||
value = isdigit(*cp) ? *cp - '0' : TOLOWER(*cp) - 'a' + 10;
|
||||
if (value >= base)
|
||||
break;
|
||||
result = result * base + value;
|
||||
cp++;
|
||||
}
|
||||
if (endp)
|
||||
*endp = (char *)cp;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* strlen - Find the length of a string
|
||||
* @s: The string to be sized
|
||||
*/
|
||||
size_t strlen(const char *s)
|
||||
{
|
||||
const char *sc;
|
||||
|
||||
for (sc = s; *sc != '\0'; ++sc)
|
||||
/* nothing */;
|
||||
return sc - s;
|
||||
}
|
||||
|
||||
/**
|
||||
* strstr - Find the first substring in a %NUL terminated string
|
||||
* @s1: The string to be searched
|
||||
* @s2: The string to search for
|
||||
*/
|
||||
char *strstr(const char *s1, const char *s2)
|
||||
{
|
||||
size_t l1, l2;
|
||||
|
||||
l2 = strlen(s2);
|
||||
if (!l2)
|
||||
return (char *)s1;
|
||||
l1 = strlen(s1);
|
||||
while (l1 >= l2) {
|
||||
l1--;
|
||||
if (!memcmp(s1, s2, l2))
|
||||
return (char *)s1;
|
||||
s1++;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
21
arch/x86/boot/string.h
Normal file
21
arch/x86/boot/string.h
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
#ifndef BOOT_STRING_H
|
||||
#define BOOT_STRING_H
|
||||
|
||||
/* Undef any of these macros coming from string_32.h. */
|
||||
#undef memcpy
|
||||
#undef memset
|
||||
#undef memcmp
|
||||
|
||||
void *memcpy(void *dst, const void *src, size_t len);
|
||||
void *memset(void *dst, int c, size_t len);
|
||||
int memcmp(const void *s1, const void *s2, size_t len);
|
||||
|
||||
/*
|
||||
* Access builtin version by default. If one needs to use optimized version,
|
||||
* do "undef memcpy" in .c file and link against right string.c
|
||||
*/
|
||||
#define memcpy(d,s,l) __builtin_memcpy(d,s,l)
|
||||
#define memset(d,c,l) __builtin_memset(d,c,l)
|
||||
#define memcmp __builtin_memcmp
|
||||
|
||||
#endif /* BOOT_STRING_H */
|
||||
435
arch/x86/boot/tools/build.c
Normal file
435
arch/x86/boot/tools/build.c
Normal file
|
|
@ -0,0 +1,435 @@
|
|||
/*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
* Copyright (C) 1997 Martin Mares
|
||||
* Copyright (C) 2007 H. Peter Anvin
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file builds a disk-image from three different files:
|
||||
*
|
||||
* - setup: 8086 machine code, sets up system parm
|
||||
* - system: 80386 code for actual system
|
||||
* - zoffset.h: header with ZO_* defines
|
||||
*
|
||||
* It does some checking that all files are of the correct type, and writes
|
||||
* the result to the specified destination, removing headers and padding to
|
||||
* the right amount. It also writes some system data to stdout.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Changes by tytso to allow root device specification
|
||||
* High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
|
||||
* Cross compiling fixes by Gertjan van Wingerde, July 1996
|
||||
* Rewritten by Martin Mares, April 1997
|
||||
* Substantially overhauled by H. Peter Anvin, April 2007
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <tools/le_byteshift.h>
|
||||
|
||||
typedef unsigned char u8;
|
||||
typedef unsigned short u16;
|
||||
typedef unsigned int u32;
|
||||
|
||||
#define DEFAULT_MAJOR_ROOT 0
|
||||
#define DEFAULT_MINOR_ROOT 0
|
||||
#define DEFAULT_ROOT_DEV (DEFAULT_MAJOR_ROOT << 8 | DEFAULT_MINOR_ROOT)
|
||||
|
||||
/* Minimal number of setup sectors */
|
||||
#define SETUP_SECT_MIN 5
|
||||
#define SETUP_SECT_MAX 64
|
||||
|
||||
/* This must be large enough to hold the entire setup */
|
||||
u8 buf[SETUP_SECT_MAX*512];
|
||||
int is_big_kernel;
|
||||
|
||||
#define PECOFF_RELOC_RESERVE 0x20
|
||||
|
||||
unsigned long efi32_stub_entry;
|
||||
unsigned long efi64_stub_entry;
|
||||
unsigned long efi_pe_entry;
|
||||
unsigned long startup_64;
|
||||
|
||||
/*----------------------------------------------------------------------*/
|
||||
|
||||
static const u32 crctab32[] = {
|
||||
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419,
|
||||
0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4,
|
||||
0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07,
|
||||
0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
|
||||
0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856,
|
||||
0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
|
||||
0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
|
||||
0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
|
||||
0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3,
|
||||
0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a,
|
||||
0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599,
|
||||
0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
|
||||
0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190,
|
||||
0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
|
||||
0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e,
|
||||
0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
|
||||
0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed,
|
||||
0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
|
||||
0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3,
|
||||
0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
|
||||
0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
|
||||
0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
|
||||
0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010,
|
||||
0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
|
||||
0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17,
|
||||
0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6,
|
||||
0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615,
|
||||
0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
|
||||
0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344,
|
||||
0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
|
||||
0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a,
|
||||
0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
|
||||
0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1,
|
||||
0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c,
|
||||
0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
|
||||
0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
|
||||
0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe,
|
||||
0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
|
||||
0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c,
|
||||
0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
|
||||
0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b,
|
||||
0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
|
||||
0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1,
|
||||
0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
|
||||
0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278,
|
||||
0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
|
||||
0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66,
|
||||
0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
|
||||
0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
|
||||
0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8,
|
||||
0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b,
|
||||
0x2d02ef8d
|
||||
};
|
||||
|
||||
static u32 partial_crc32_one(u8 c, u32 crc)
|
||||
{
|
||||
return crctab32[(crc ^ c) & 0xff] ^ (crc >> 8);
|
||||
}
|
||||
|
||||
static u32 partial_crc32(const u8 *s, int len, u32 crc)
|
||||
{
|
||||
while (len--)
|
||||
crc = partial_crc32_one(*s++, crc);
|
||||
return crc;
|
||||
}
|
||||
|
||||
static void die(const char * str, ...)
|
||||
{
|
||||
va_list args;
|
||||
va_start(args, str);
|
||||
vfprintf(stderr, str, args);
|
||||
fputc('\n', stderr);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static void usage(void)
|
||||
{
|
||||
die("Usage: build setup system zoffset.h image");
|
||||
}
|
||||
|
||||
#ifdef CONFIG_EFI_STUB
|
||||
|
||||
static void update_pecoff_section_header_fields(char *section_name, u32 vma, u32 size, u32 datasz, u32 offset)
|
||||
{
|
||||
unsigned int pe_header;
|
||||
unsigned short num_sections;
|
||||
u8 *section;
|
||||
|
||||
pe_header = get_unaligned_le32(&buf[0x3c]);
|
||||
num_sections = get_unaligned_le16(&buf[pe_header + 6]);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
section = &buf[pe_header + 0xa8];
|
||||
#else
|
||||
section = &buf[pe_header + 0xb8];
|
||||
#endif
|
||||
|
||||
while (num_sections > 0) {
|
||||
if (strncmp((char*)section, section_name, 8) == 0) {
|
||||
/* section header size field */
|
||||
put_unaligned_le32(size, section + 0x8);
|
||||
|
||||
/* section header vma field */
|
||||
put_unaligned_le32(vma, section + 0xc);
|
||||
|
||||
/* section header 'size of initialised data' field */
|
||||
put_unaligned_le32(datasz, section + 0x10);
|
||||
|
||||
/* section header 'file offset' field */
|
||||
put_unaligned_le32(offset, section + 0x14);
|
||||
|
||||
break;
|
||||
}
|
||||
section += 0x28;
|
||||
num_sections--;
|
||||
}
|
||||
}
|
||||
|
||||
static void update_pecoff_section_header(char *section_name, u32 offset, u32 size)
|
||||
{
|
||||
update_pecoff_section_header_fields(section_name, offset, size, size, offset);
|
||||
}
|
||||
|
||||
static void update_pecoff_setup_and_reloc(unsigned int size)
|
||||
{
|
||||
u32 setup_offset = 0x200;
|
||||
u32 reloc_offset = size - PECOFF_RELOC_RESERVE;
|
||||
u32 setup_size = reloc_offset - setup_offset;
|
||||
|
||||
update_pecoff_section_header(".setup", setup_offset, setup_size);
|
||||
update_pecoff_section_header(".reloc", reloc_offset, PECOFF_RELOC_RESERVE);
|
||||
|
||||
/*
|
||||
* Modify .reloc section contents with a single entry. The
|
||||
* relocation is applied to offset 10 of the relocation section.
|
||||
*/
|
||||
put_unaligned_le32(reloc_offset + 10, &buf[reloc_offset]);
|
||||
put_unaligned_le32(10, &buf[reloc_offset + 4]);
|
||||
}
|
||||
|
||||
static void update_pecoff_text(unsigned int text_start, unsigned int file_sz)
|
||||
{
|
||||
unsigned int pe_header;
|
||||
unsigned int text_sz = file_sz - text_start;
|
||||
|
||||
pe_header = get_unaligned_le32(&buf[0x3c]);
|
||||
|
||||
/*
|
||||
* Size of code: Subtract the size of the first sector (512 bytes)
|
||||
* which includes the header.
|
||||
*/
|
||||
put_unaligned_le32(file_sz - 512, &buf[pe_header + 0x1c]);
|
||||
|
||||
/*
|
||||
* Address of entry point for PE/COFF executable
|
||||
*/
|
||||
put_unaligned_le32(text_start + efi_pe_entry, &buf[pe_header + 0x28]);
|
||||
|
||||
update_pecoff_section_header(".text", text_start, text_sz);
|
||||
}
|
||||
|
||||
static void update_pecoff_bss(unsigned int file_sz, unsigned int init_sz)
|
||||
{
|
||||
unsigned int pe_header;
|
||||
unsigned int bss_sz = init_sz - file_sz;
|
||||
|
||||
pe_header = get_unaligned_le32(&buf[0x3c]);
|
||||
|
||||
/* Size of uninitialized data */
|
||||
put_unaligned_le32(bss_sz, &buf[pe_header + 0x24]);
|
||||
|
||||
/* Size of image */
|
||||
put_unaligned_le32(init_sz, &buf[pe_header + 0x50]);
|
||||
|
||||
update_pecoff_section_header_fields(".bss", file_sz, bss_sz, 0, 0);
|
||||
}
|
||||
|
||||
static int reserve_pecoff_reloc_section(int c)
|
||||
{
|
||||
/* Reserve 0x20 bytes for .reloc section */
|
||||
memset(buf+c, 0, PECOFF_RELOC_RESERVE);
|
||||
return PECOFF_RELOC_RESERVE;
|
||||
}
|
||||
|
||||
static void efi_stub_defaults(void)
|
||||
{
|
||||
/* Defaults for old kernel */
|
||||
#ifdef CONFIG_X86_32
|
||||
efi_pe_entry = 0x10;
|
||||
#else
|
||||
efi_pe_entry = 0x210;
|
||||
startup_64 = 0x200;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void efi_stub_entry_update(void)
|
||||
{
|
||||
unsigned long addr = efi32_stub_entry;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* Yes, this is really how we defined it :( */
|
||||
addr = efi64_stub_entry - 0x200;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_EFI_MIXED
|
||||
if (efi32_stub_entry != addr)
|
||||
die("32-bit and 64-bit EFI entry points do not match\n");
|
||||
#endif
|
||||
put_unaligned_le32(addr, &buf[0x264]);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline void update_pecoff_setup_and_reloc(unsigned int size) {}
|
||||
static inline void update_pecoff_text(unsigned int text_start,
|
||||
unsigned int file_sz) {}
|
||||
static inline void update_pecoff_bss(unsigned int file_sz,
|
||||
unsigned int init_sz) {}
|
||||
static inline void efi_stub_defaults(void) {}
|
||||
static inline void efi_stub_entry_update(void) {}
|
||||
|
||||
static inline int reserve_pecoff_reloc_section(int c)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_EFI_STUB */
|
||||
|
||||
|
||||
/*
|
||||
* Parse zoffset.h and find the entry points. We could just #include zoffset.h
|
||||
* but that would mean tools/build would have to be rebuilt every time. It's
|
||||
* not as if parsing it is hard...
|
||||
*/
|
||||
#define PARSE_ZOFS(p, sym) do { \
|
||||
if (!strncmp(p, "#define ZO_" #sym " ", 11+sizeof(#sym))) \
|
||||
sym = strtoul(p + 11 + sizeof(#sym), NULL, 16); \
|
||||
} while (0)
|
||||
|
||||
static void parse_zoffset(char *fname)
|
||||
{
|
||||
FILE *file;
|
||||
char *p;
|
||||
int c;
|
||||
|
||||
file = fopen(fname, "r");
|
||||
if (!file)
|
||||
die("Unable to open `%s': %m", fname);
|
||||
c = fread(buf, 1, sizeof(buf) - 1, file);
|
||||
if (ferror(file))
|
||||
die("read-error on `zoffset.h'");
|
||||
fclose(file);
|
||||
buf[c] = 0;
|
||||
|
||||
p = (char *)buf;
|
||||
|
||||
while (p && *p) {
|
||||
PARSE_ZOFS(p, efi32_stub_entry);
|
||||
PARSE_ZOFS(p, efi64_stub_entry);
|
||||
PARSE_ZOFS(p, efi_pe_entry);
|
||||
PARSE_ZOFS(p, startup_64);
|
||||
|
||||
p = strchr(p, '\n');
|
||||
while (p && (*p == '\r' || *p == '\n'))
|
||||
p++;
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv)
|
||||
{
|
||||
unsigned int i, sz, setup_sectors, init_sz;
|
||||
int c;
|
||||
u32 sys_size;
|
||||
struct stat sb;
|
||||
FILE *file, *dest;
|
||||
int fd;
|
||||
void *kernel;
|
||||
u32 crc = 0xffffffffUL;
|
||||
|
||||
efi_stub_defaults();
|
||||
|
||||
if (argc != 5)
|
||||
usage();
|
||||
parse_zoffset(argv[3]);
|
||||
|
||||
dest = fopen(argv[4], "w");
|
||||
if (!dest)
|
||||
die("Unable to write `%s': %m", argv[4]);
|
||||
|
||||
/* Copy the setup code */
|
||||
file = fopen(argv[1], "r");
|
||||
if (!file)
|
||||
die("Unable to open `%s': %m", argv[1]);
|
||||
c = fread(buf, 1, sizeof(buf), file);
|
||||
if (ferror(file))
|
||||
die("read-error on `setup'");
|
||||
if (c < 1024)
|
||||
die("The setup must be at least 1024 bytes");
|
||||
if (get_unaligned_le16(&buf[510]) != 0xAA55)
|
||||
die("Boot block hasn't got boot flag (0xAA55)");
|
||||
fclose(file);
|
||||
|
||||
c += reserve_pecoff_reloc_section(c);
|
||||
|
||||
/* Pad unused space with zeros */
|
||||
setup_sectors = (c + 511) / 512;
|
||||
if (setup_sectors < SETUP_SECT_MIN)
|
||||
setup_sectors = SETUP_SECT_MIN;
|
||||
i = setup_sectors*512;
|
||||
memset(buf+c, 0, i-c);
|
||||
|
||||
update_pecoff_setup_and_reloc(i);
|
||||
|
||||
/* Set the default root device */
|
||||
put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]);
|
||||
|
||||
printf("Setup is %d bytes (padded to %d bytes).\n", c, i);
|
||||
|
||||
/* Open and stat the kernel file */
|
||||
fd = open(argv[2], O_RDONLY);
|
||||
if (fd < 0)
|
||||
die("Unable to open `%s': %m", argv[2]);
|
||||
if (fstat(fd, &sb))
|
||||
die("Unable to stat `%s': %m", argv[2]);
|
||||
sz = sb.st_size;
|
||||
printf("System is %d kB\n", (sz+1023)/1024);
|
||||
kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0);
|
||||
if (kernel == MAP_FAILED)
|
||||
die("Unable to mmap '%s': %m", argv[2]);
|
||||
/* Number of 16-byte paragraphs, including space for a 4-byte CRC */
|
||||
sys_size = (sz + 15 + 4) / 16;
|
||||
|
||||
/* Patch the setup code with the appropriate size parameters */
|
||||
buf[0x1f1] = setup_sectors-1;
|
||||
put_unaligned_le32(sys_size, &buf[0x1f4]);
|
||||
|
||||
update_pecoff_text(setup_sectors * 512, i + (sys_size * 16));
|
||||
init_sz = get_unaligned_le32(&buf[0x260]);
|
||||
update_pecoff_bss(i + (sys_size * 16), init_sz);
|
||||
|
||||
efi_stub_entry_update();
|
||||
|
||||
crc = partial_crc32(buf, i, crc);
|
||||
if (fwrite(buf, 1, i, dest) != i)
|
||||
die("Writing setup failed");
|
||||
|
||||
/* Copy the kernel code */
|
||||
crc = partial_crc32(kernel, sz, crc);
|
||||
if (fwrite(kernel, 1, sz, dest) != sz)
|
||||
die("Writing kernel failed");
|
||||
|
||||
/* Add padding leaving 4 bytes for the checksum */
|
||||
while (sz++ < (sys_size*16) - 4) {
|
||||
crc = partial_crc32_one('\0', crc);
|
||||
if (fwrite("\0", 1, 1, dest) != 1)
|
||||
die("Writing padding failed");
|
||||
}
|
||||
|
||||
/* Write the CRC */
|
||||
printf("CRC %x\n", crc);
|
||||
put_unaligned_le32(crc, buf);
|
||||
if (fwrite(buf, 1, 4, dest) != 4)
|
||||
die("Writing CRC failed");
|
||||
|
||||
/* Catch any delayed write failures */
|
||||
if (fclose(dest))
|
||||
die("Writing image failed");
|
||||
|
||||
close(fd);
|
||||
|
||||
/* Everything is OK */
|
||||
return 0;
|
||||
}
|
||||
139
arch/x86/boot/tty.c
Normal file
139
arch/x86/boot/tty.c
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
/* -*- linux-c -*- ------------------------------------------------------- *
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
* Copyright 2007 rPath, Inc. - All Rights Reserved
|
||||
* Copyright 2009 Intel Corporation; author H. Peter Anvin
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Very simple screen and serial I/O
|
||||
*/
|
||||
|
||||
#include "boot.h"
|
||||
|
||||
int early_serial_base;
|
||||
|
||||
#define XMTRDY 0x20
|
||||
|
||||
#define TXR 0 /* Transmit register (WRITE) */
|
||||
#define LSR 5 /* Line Status */
|
||||
|
||||
/*
|
||||
* These functions are in .inittext so they can be used to signal
|
||||
* error during initialization.
|
||||
*/
|
||||
|
||||
static void __attribute__((section(".inittext"))) serial_putchar(int ch)
|
||||
{
|
||||
unsigned timeout = 0xffff;
|
||||
|
||||
while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout)
|
||||
cpu_relax();
|
||||
|
||||
outb(ch, early_serial_base + TXR);
|
||||
}
|
||||
|
||||
static void __attribute__((section(".inittext"))) bios_putchar(int ch)
|
||||
{
|
||||
struct biosregs ireg;
|
||||
|
||||
initregs(&ireg);
|
||||
ireg.bx = 0x0007;
|
||||
ireg.cx = 0x0001;
|
||||
ireg.ah = 0x0e;
|
||||
ireg.al = ch;
|
||||
intcall(0x10, &ireg, NULL);
|
||||
}
|
||||
|
||||
void __attribute__((section(".inittext"))) putchar(int ch)
|
||||
{
|
||||
if (ch == '\n')
|
||||
putchar('\r'); /* \n -> \r\n */
|
||||
|
||||
bios_putchar(ch);
|
||||
|
||||
if (early_serial_base != 0)
|
||||
serial_putchar(ch);
|
||||
}
|
||||
|
||||
void __attribute__((section(".inittext"))) puts(const char *str)
|
||||
{
|
||||
while (*str)
|
||||
putchar(*str++);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the CMOS clock through the BIOS, and return the
|
||||
* seconds in BCD.
|
||||
*/
|
||||
|
||||
static u8 gettime(void)
|
||||
{
|
||||
struct biosregs ireg, oreg;
|
||||
|
||||
initregs(&ireg);
|
||||
ireg.ah = 0x02;
|
||||
intcall(0x1a, &ireg, &oreg);
|
||||
|
||||
return oreg.dh;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read from the keyboard
|
||||
*/
|
||||
int getchar(void)
|
||||
{
|
||||
struct biosregs ireg, oreg;
|
||||
|
||||
initregs(&ireg);
|
||||
/* ireg.ah = 0x00; */
|
||||
intcall(0x16, &ireg, &oreg);
|
||||
|
||||
return oreg.al;
|
||||
}
|
||||
|
||||
static int kbd_pending(void)
|
||||
{
|
||||
struct biosregs ireg, oreg;
|
||||
|
||||
initregs(&ireg);
|
||||
ireg.ah = 0x01;
|
||||
intcall(0x16, &ireg, &oreg);
|
||||
|
||||
return !(oreg.eflags & X86_EFLAGS_ZF);
|
||||
}
|
||||
|
||||
void kbd_flush(void)
|
||||
{
|
||||
for (;;) {
|
||||
if (!kbd_pending())
|
||||
break;
|
||||
getchar();
|
||||
}
|
||||
}
|
||||
|
||||
int getchar_timeout(void)
|
||||
{
|
||||
int cnt = 30;
|
||||
int t0, t1;
|
||||
|
||||
t0 = gettime();
|
||||
|
||||
while (cnt) {
|
||||
if (kbd_pending())
|
||||
return getchar();
|
||||
|
||||
t1 = gettime();
|
||||
if (t0 != t1) {
|
||||
cnt--;
|
||||
t0 = t1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0; /* Timeout! */
|
||||
}
|
||||
|
||||
21
arch/x86/boot/version.c
Normal file
21
arch/x86/boot/version.c
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
/* -*- linux-c -*- ------------------------------------------------------- *
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
* Copyright 2007 rPath, Inc. - All Rights Reserved
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Kernel version string
|
||||
*/
|
||||
|
||||
#include "boot.h"
|
||||
#include <generated/utsrelease.h>
|
||||
#include <generated/compile.h>
|
||||
|
||||
const char kernel_version[] =
|
||||
UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") "
|
||||
UTS_VERSION;
|
||||
72
arch/x86/boot/vesa.h
Normal file
72
arch/x86/boot/vesa.h
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
/* ----------------------------------------------------------------------- *
|
||||
*
|
||||
* Copyright 1999-2007 H. Peter Anvin - All Rights Reserved
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, Inc., 53 Temple Place Ste 330,
|
||||
* Boston MA 02111-1307, USA; either version 2 of the License, or
|
||||
* (at your option) any later version; incorporated herein by reference.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
#ifndef BOOT_VESA_H
|
||||
#define BOOT_VESA_H
|
||||
|
||||
typedef struct {
|
||||
u16 off, seg;
|
||||
} far_ptr;
|
||||
|
||||
/* VESA General Information table */
|
||||
struct vesa_general_info {
|
||||
u32 signature; /* 0 Magic number = "VESA" */
|
||||
u16 version; /* 4 */
|
||||
far_ptr vendor_string; /* 6 */
|
||||
u32 capabilities; /* 10 */
|
||||
far_ptr video_mode_ptr; /* 14 */
|
||||
u16 total_memory; /* 18 */
|
||||
|
||||
u8 reserved[236]; /* 20 */
|
||||
} __attribute__ ((packed));
|
||||
|
||||
#define VESA_MAGIC ('V' + ('E' << 8) + ('S' << 16) + ('A' << 24))
|
||||
|
||||
struct vesa_mode_info {
|
||||
u16 mode_attr; /* 0 */
|
||||
u8 win_attr[2]; /* 2 */
|
||||
u16 win_grain; /* 4 */
|
||||
u16 win_size; /* 6 */
|
||||
u16 win_seg[2]; /* 8 */
|
||||
far_ptr win_scheme; /* 12 */
|
||||
u16 logical_scan; /* 16 */
|
||||
|
||||
u16 h_res; /* 18 */
|
||||
u16 v_res; /* 20 */
|
||||
u8 char_width; /* 22 */
|
||||
u8 char_height; /* 23 */
|
||||
u8 memory_planes; /* 24 */
|
||||
u8 bpp; /* 25 */
|
||||
u8 banks; /* 26 */
|
||||
u8 memory_layout; /* 27 */
|
||||
u8 bank_size; /* 28 */
|
||||
u8 image_planes; /* 29 */
|
||||
u8 page_function; /* 30 */
|
||||
|
||||
u8 rmask; /* 31 */
|
||||
u8 rpos; /* 32 */
|
||||
u8 gmask; /* 33 */
|
||||
u8 gpos; /* 34 */
|
||||
u8 bmask; /* 35 */
|
||||
u8 bpos; /* 36 */
|
||||
u8 resv_mask; /* 37 */
|
||||
u8 resv_pos; /* 38 */
|
||||
u8 dcm_info; /* 39 */
|
||||
|
||||
u32 lfb_ptr; /* 40 Linear frame buffer address */
|
||||
u32 offscreen_ptr; /* 44 Offscreen memory address */
|
||||
u16 offscreen_size; /* 48 */
|
||||
|
||||
u8 reserved[206]; /* 50 */
|
||||
} __attribute__ ((packed));
|
||||
|
||||
#endif /* LIB_SYS_VESA_H */
|
||||
128
arch/x86/boot/video-bios.c
Normal file
128
arch/x86/boot/video-bios.c
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
/* -*- linux-c -*- ------------------------------------------------------- *
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
* Copyright 2007 rPath, Inc. - All Rights Reserved
|
||||
* Copyright 2009 Intel Corporation; author H. Peter Anvin
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Standard video BIOS modes
|
||||
*
|
||||
* We have two options for this; silent and scanned.
|
||||
*/
|
||||
|
||||
#include "boot.h"
|
||||
#include "video.h"
|
||||
|
||||
static __videocard video_bios;
|
||||
|
||||
/* Set a conventional BIOS mode */
|
||||
static int set_bios_mode(u8 mode);
|
||||
|
||||
static int bios_set_mode(struct mode_info *mi)
|
||||
{
|
||||
return set_bios_mode(mi->mode - VIDEO_FIRST_BIOS);
|
||||
}
|
||||
|
||||
static int set_bios_mode(u8 mode)
|
||||
{
|
||||
struct biosregs ireg, oreg;
|
||||
u8 new_mode;
|
||||
|
||||
initregs(&ireg);
|
||||
ireg.al = mode; /* AH=0x00 Set Video Mode */
|
||||
intcall(0x10, &ireg, NULL);
|
||||
|
||||
ireg.ah = 0x0f; /* Get Current Video Mode */
|
||||
intcall(0x10, &ireg, &oreg);
|
||||
|
||||
do_restore = 1; /* Assume video contents were lost */
|
||||
|
||||
/* Not all BIOSes are clean with the top bit */
|
||||
new_mode = oreg.al & 0x7f;
|
||||
|
||||
if (new_mode == mode)
|
||||
return 0; /* Mode change OK */
|
||||
|
||||
#ifndef _WAKEUP
|
||||
if (new_mode != boot_params.screen_info.orig_video_mode) {
|
||||
/* Mode setting failed, but we didn't end up where we
|
||||
started. That's bad. Try to revert to the original
|
||||
video mode. */
|
||||
ireg.ax = boot_params.screen_info.orig_video_mode;
|
||||
intcall(0x10, &ireg, NULL);
|
||||
}
|
||||
#endif
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int bios_probe(void)
|
||||
{
|
||||
u8 mode;
|
||||
#ifdef _WAKEUP
|
||||
u8 saved_mode = 0x03;
|
||||
#else
|
||||
u8 saved_mode = boot_params.screen_info.orig_video_mode;
|
||||
#endif
|
||||
u16 crtc;
|
||||
struct mode_info *mi;
|
||||
int nmodes = 0;
|
||||
|
||||
if (adapter != ADAPTER_EGA && adapter != ADAPTER_VGA)
|
||||
return 0;
|
||||
|
||||
set_fs(0);
|
||||
crtc = vga_crtc();
|
||||
|
||||
video_bios.modes = GET_HEAP(struct mode_info, 0);
|
||||
|
||||
for (mode = 0x14; mode <= 0x7f; mode++) {
|
||||
if (!heap_free(sizeof(struct mode_info)))
|
||||
break;
|
||||
|
||||
if (mode_defined(VIDEO_FIRST_BIOS+mode))
|
||||
continue;
|
||||
|
||||
if (set_bios_mode(mode))
|
||||
continue;
|
||||
|
||||
/* Try to verify that it's a text mode. */
|
||||
|
||||
/* Attribute Controller: make graphics controller disabled */
|
||||
if (in_idx(0x3c0, 0x10) & 0x01)
|
||||
continue;
|
||||
|
||||
/* Graphics Controller: verify Alpha addressing enabled */
|
||||
if (in_idx(0x3ce, 0x06) & 0x01)
|
||||
continue;
|
||||
|
||||
/* CRTC cursor location low should be zero(?) */
|
||||
if (in_idx(crtc, 0x0f))
|
||||
continue;
|
||||
|
||||
mi = GET_HEAP(struct mode_info, 1);
|
||||
mi->mode = VIDEO_FIRST_BIOS+mode;
|
||||
mi->depth = 0; /* text */
|
||||
mi->x = rdfs16(0x44a);
|
||||
mi->y = rdfs8(0x484)+1;
|
||||
nmodes++;
|
||||
}
|
||||
|
||||
set_bios_mode(saved_mode);
|
||||
|
||||
return nmodes;
|
||||
}
|
||||
|
||||
static __videocard video_bios =
|
||||
{
|
||||
.card_name = "BIOS",
|
||||
.probe = bios_probe,
|
||||
.set_mode = bios_set_mode,
|
||||
.unsafe = 1,
|
||||
.xmode_first = VIDEO_FIRST_BIOS,
|
||||
.xmode_n = 0x80,
|
||||
};
|
||||
173
arch/x86/boot/video-mode.c
Normal file
173
arch/x86/boot/video-mode.c
Normal file
|
|
@ -0,0 +1,173 @@
|
|||
/* -*- linux-c -*- ------------------------------------------------------- *
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
* Copyright 2007-2008 rPath, Inc. - All Rights Reserved
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* arch/i386/boot/video-mode.c
|
||||
*
|
||||
* Set the video mode. This is separated out into a different
|
||||
* file in order to be shared with the ACPI wakeup code.
|
||||
*/
|
||||
|
||||
#include "boot.h"
|
||||
#include "video.h"
|
||||
#include "vesa.h"
|
||||
|
||||
/*
|
||||
* Common variables
|
||||
*/
|
||||
int adapter; /* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */
|
||||
u16 video_segment;
|
||||
int force_x, force_y; /* Don't query the BIOS for cols/rows */
|
||||
|
||||
int do_restore; /* Screen contents changed during mode flip */
|
||||
int graphic_mode; /* Graphic mode with linear frame buffer */
|
||||
|
||||
/* Probe the video drivers and have them generate their mode lists. */
|
||||
void probe_cards(int unsafe)
|
||||
{
|
||||
struct card_info *card;
|
||||
static u8 probed[2];
|
||||
|
||||
if (probed[unsafe])
|
||||
return;
|
||||
|
||||
probed[unsafe] = 1;
|
||||
|
||||
for (card = video_cards; card < video_cards_end; card++) {
|
||||
if (card->unsafe == unsafe) {
|
||||
if (card->probe)
|
||||
card->nmodes = card->probe();
|
||||
else
|
||||
card->nmodes = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Test if a mode is defined */
|
||||
int mode_defined(u16 mode)
|
||||
{
|
||||
struct card_info *card;
|
||||
struct mode_info *mi;
|
||||
int i;
|
||||
|
||||
for (card = video_cards; card < video_cards_end; card++) {
|
||||
mi = card->modes;
|
||||
for (i = 0; i < card->nmodes; i++, mi++) {
|
||||
if (mi->mode == mode)
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Set mode (without recalc) */
|
||||
static int raw_set_mode(u16 mode, u16 *real_mode)
|
||||
{
|
||||
int nmode, i;
|
||||
struct card_info *card;
|
||||
struct mode_info *mi;
|
||||
|
||||
/* Drop the recalc bit if set */
|
||||
mode &= ~VIDEO_RECALC;
|
||||
|
||||
/* Scan for mode based on fixed ID, position, or resolution */
|
||||
nmode = 0;
|
||||
for (card = video_cards; card < video_cards_end; card++) {
|
||||
mi = card->modes;
|
||||
for (i = 0; i < card->nmodes; i++, mi++) {
|
||||
int visible = mi->x || mi->y;
|
||||
|
||||
if ((mode == nmode && visible) ||
|
||||
mode == mi->mode ||
|
||||
mode == (mi->y << 8)+mi->x) {
|
||||
*real_mode = mi->mode;
|
||||
return card->set_mode(mi);
|
||||
}
|
||||
|
||||
if (visible)
|
||||
nmode++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Nothing found? Is it an "exceptional" (unprobed) mode? */
|
||||
for (card = video_cards; card < video_cards_end; card++) {
|
||||
if (mode >= card->xmode_first &&
|
||||
mode < card->xmode_first+card->xmode_n) {
|
||||
struct mode_info mix;
|
||||
*real_mode = mix.mode = mode;
|
||||
mix.x = mix.y = 0;
|
||||
return card->set_mode(&mix);
|
||||
}
|
||||
}
|
||||
|
||||
/* Otherwise, failure... */
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Recalculate the vertical video cutoff (hack!)
|
||||
*/
|
||||
static void vga_recalc_vertical(void)
|
||||
{
|
||||
unsigned int font_size, rows;
|
||||
u16 crtc;
|
||||
u8 pt, ov;
|
||||
|
||||
set_fs(0);
|
||||
font_size = rdfs8(0x485); /* BIOS: font size (pixels) */
|
||||
rows = force_y ? force_y : rdfs8(0x484)+1; /* Text rows */
|
||||
|
||||
rows *= font_size; /* Visible scan lines */
|
||||
rows--; /* ... minus one */
|
||||
|
||||
crtc = vga_crtc();
|
||||
|
||||
pt = in_idx(crtc, 0x11);
|
||||
pt &= ~0x80; /* Unlock CR0-7 */
|
||||
out_idx(pt, crtc, 0x11);
|
||||
|
||||
out_idx((u8)rows, crtc, 0x12); /* Lower height register */
|
||||
|
||||
ov = in_idx(crtc, 0x07); /* Overflow register */
|
||||
ov &= 0xbd;
|
||||
ov |= (rows >> (8-1)) & 0x02;
|
||||
ov |= (rows >> (9-6)) & 0x40;
|
||||
out_idx(ov, crtc, 0x07);
|
||||
}
|
||||
|
||||
/* Set mode (with recalc if specified) */
|
||||
int set_mode(u16 mode)
|
||||
{
|
||||
int rv;
|
||||
u16 real_mode;
|
||||
|
||||
/* Very special mode numbers... */
|
||||
if (mode == VIDEO_CURRENT_MODE)
|
||||
return 0; /* Nothing to do... */
|
||||
else if (mode == NORMAL_VGA)
|
||||
mode = VIDEO_80x25;
|
||||
else if (mode == EXTENDED_VGA)
|
||||
mode = VIDEO_8POINT;
|
||||
|
||||
rv = raw_set_mode(mode, &real_mode);
|
||||
if (rv)
|
||||
return rv;
|
||||
|
||||
if (mode & VIDEO_RECALC)
|
||||
vga_recalc_vertical();
|
||||
|
||||
/* Save the canonical mode number for the kernel, not
|
||||
an alias, size specification or menu position */
|
||||
#ifndef _WAKEUP
|
||||
boot_params.hdr.vid_mode = real_mode;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
281
arch/x86/boot/video-vesa.c
Normal file
281
arch/x86/boot/video-vesa.c
Normal file
|
|
@ -0,0 +1,281 @@
|
|||
/* -*- linux-c -*- ------------------------------------------------------- *
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
* Copyright 2007 rPath, Inc. - All Rights Reserved
|
||||
* Copyright 2009 Intel Corporation; author H. Peter Anvin
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* VESA text modes
|
||||
*/
|
||||
|
||||
#include "boot.h"
|
||||
#include "video.h"
|
||||
#include "vesa.h"
|
||||
#include "string.h"
|
||||
|
||||
/* VESA information */
|
||||
static struct vesa_general_info vginfo;
|
||||
static struct vesa_mode_info vminfo;
|
||||
|
||||
static __videocard video_vesa;
|
||||
|
||||
#ifndef _WAKEUP
|
||||
static void vesa_store_mode_params_graphics(void);
|
||||
#else /* _WAKEUP */
|
||||
static inline void vesa_store_mode_params_graphics(void) {}
|
||||
#endif /* _WAKEUP */
|
||||
|
||||
static int vesa_probe(void)
|
||||
{
|
||||
struct biosregs ireg, oreg;
|
||||
u16 mode;
|
||||
addr_t mode_ptr;
|
||||
struct mode_info *mi;
|
||||
int nmodes = 0;
|
||||
|
||||
video_vesa.modes = GET_HEAP(struct mode_info, 0);
|
||||
|
||||
initregs(&ireg);
|
||||
ireg.ax = 0x4f00;
|
||||
ireg.di = (size_t)&vginfo;
|
||||
intcall(0x10, &ireg, &oreg);
|
||||
|
||||
if (oreg.ax != 0x004f ||
|
||||
vginfo.signature != VESA_MAGIC ||
|
||||
vginfo.version < 0x0102)
|
||||
return 0; /* Not present */
|
||||
|
||||
set_fs(vginfo.video_mode_ptr.seg);
|
||||
mode_ptr = vginfo.video_mode_ptr.off;
|
||||
|
||||
while ((mode = rdfs16(mode_ptr)) != 0xffff) {
|
||||
mode_ptr += 2;
|
||||
|
||||
if (!heap_free(sizeof(struct mode_info)))
|
||||
break; /* Heap full, can't save mode info */
|
||||
|
||||
if (mode & ~0x1ff)
|
||||
continue;
|
||||
|
||||
memset(&vminfo, 0, sizeof vminfo); /* Just in case... */
|
||||
|
||||
ireg.ax = 0x4f01;
|
||||
ireg.cx = mode;
|
||||
ireg.di = (size_t)&vminfo;
|
||||
intcall(0x10, &ireg, &oreg);
|
||||
|
||||
if (oreg.ax != 0x004f)
|
||||
continue;
|
||||
|
||||
if ((vminfo.mode_attr & 0x15) == 0x05) {
|
||||
/* Text Mode, TTY BIOS supported,
|
||||
supported by hardware */
|
||||
mi = GET_HEAP(struct mode_info, 1);
|
||||
mi->mode = mode + VIDEO_FIRST_VESA;
|
||||
mi->depth = 0; /* text */
|
||||
mi->x = vminfo.h_res;
|
||||
mi->y = vminfo.v_res;
|
||||
nmodes++;
|
||||
} else if ((vminfo.mode_attr & 0x99) == 0x99 &&
|
||||
(vminfo.memory_layout == 4 ||
|
||||
vminfo.memory_layout == 6) &&
|
||||
vminfo.memory_planes == 1) {
|
||||
#ifdef CONFIG_FB_BOOT_VESA_SUPPORT
|
||||
/* Graphics mode, color, linear frame buffer
|
||||
supported. Only register the mode if
|
||||
if framebuffer is configured, however,
|
||||
otherwise the user will be left without a screen. */
|
||||
mi = GET_HEAP(struct mode_info, 1);
|
||||
mi->mode = mode + VIDEO_FIRST_VESA;
|
||||
mi->depth = vminfo.bpp;
|
||||
mi->x = vminfo.h_res;
|
||||
mi->y = vminfo.v_res;
|
||||
nmodes++;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
return nmodes;
|
||||
}
|
||||
|
||||
static int vesa_set_mode(struct mode_info *mode)
|
||||
{
|
||||
struct biosregs ireg, oreg;
|
||||
int is_graphic;
|
||||
u16 vesa_mode = mode->mode - VIDEO_FIRST_VESA;
|
||||
|
||||
memset(&vminfo, 0, sizeof vminfo); /* Just in case... */
|
||||
|
||||
initregs(&ireg);
|
||||
ireg.ax = 0x4f01;
|
||||
ireg.cx = vesa_mode;
|
||||
ireg.di = (size_t)&vminfo;
|
||||
intcall(0x10, &ireg, &oreg);
|
||||
|
||||
if (oreg.ax != 0x004f)
|
||||
return -1;
|
||||
|
||||
if ((vminfo.mode_attr & 0x15) == 0x05) {
|
||||
/* It's a supported text mode */
|
||||
is_graphic = 0;
|
||||
#ifdef CONFIG_FB_BOOT_VESA_SUPPORT
|
||||
} else if ((vminfo.mode_attr & 0x99) == 0x99) {
|
||||
/* It's a graphics mode with linear frame buffer */
|
||||
is_graphic = 1;
|
||||
vesa_mode |= 0x4000; /* Request linear frame buffer */
|
||||
#endif
|
||||
} else {
|
||||
return -1; /* Invalid mode */
|
||||
}
|
||||
|
||||
|
||||
initregs(&ireg);
|
||||
ireg.ax = 0x4f02;
|
||||
ireg.bx = vesa_mode;
|
||||
intcall(0x10, &ireg, &oreg);
|
||||
|
||||
if (oreg.ax != 0x004f)
|
||||
return -1;
|
||||
|
||||
graphic_mode = is_graphic;
|
||||
if (!is_graphic) {
|
||||
/* Text mode */
|
||||
force_x = mode->x;
|
||||
force_y = mode->y;
|
||||
do_restore = 1;
|
||||
} else {
|
||||
/* Graphics mode */
|
||||
vesa_store_mode_params_graphics();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#ifndef _WAKEUP
|
||||
|
||||
/* Switch DAC to 8-bit mode */
|
||||
static void vesa_dac_set_8bits(void)
|
||||
{
|
||||
struct biosregs ireg, oreg;
|
||||
u8 dac_size = 6;
|
||||
|
||||
/* If possible, switch the DAC to 8-bit mode */
|
||||
if (vginfo.capabilities & 1) {
|
||||
initregs(&ireg);
|
||||
ireg.ax = 0x4f08;
|
||||
ireg.bh = 0x08;
|
||||
intcall(0x10, &ireg, &oreg);
|
||||
if (oreg.ax == 0x004f)
|
||||
dac_size = oreg.bh;
|
||||
}
|
||||
|
||||
/* Set the color sizes to the DAC size, and offsets to 0 */
|
||||
boot_params.screen_info.red_size = dac_size;
|
||||
boot_params.screen_info.green_size = dac_size;
|
||||
boot_params.screen_info.blue_size = dac_size;
|
||||
boot_params.screen_info.rsvd_size = dac_size;
|
||||
|
||||
boot_params.screen_info.red_pos = 0;
|
||||
boot_params.screen_info.green_pos = 0;
|
||||
boot_params.screen_info.blue_pos = 0;
|
||||
boot_params.screen_info.rsvd_pos = 0;
|
||||
}
|
||||
|
||||
/* Save the VESA protected mode info */
|
||||
static void vesa_store_pm_info(void)
|
||||
{
|
||||
struct biosregs ireg, oreg;
|
||||
|
||||
initregs(&ireg);
|
||||
ireg.ax = 0x4f0a;
|
||||
intcall(0x10, &ireg, &oreg);
|
||||
|
||||
if (oreg.ax != 0x004f)
|
||||
return;
|
||||
|
||||
boot_params.screen_info.vesapm_seg = oreg.es;
|
||||
boot_params.screen_info.vesapm_off = oreg.di;
|
||||
}
|
||||
|
||||
/*
|
||||
* Save video mode parameters for graphics mode
|
||||
*/
|
||||
static void vesa_store_mode_params_graphics(void)
|
||||
{
|
||||
/* Tell the kernel we're in VESA graphics mode */
|
||||
boot_params.screen_info.orig_video_isVGA = VIDEO_TYPE_VLFB;
|
||||
|
||||
/* Mode parameters */
|
||||
boot_params.screen_info.vesa_attributes = vminfo.mode_attr;
|
||||
boot_params.screen_info.lfb_linelength = vminfo.logical_scan;
|
||||
boot_params.screen_info.lfb_width = vminfo.h_res;
|
||||
boot_params.screen_info.lfb_height = vminfo.v_res;
|
||||
boot_params.screen_info.lfb_depth = vminfo.bpp;
|
||||
boot_params.screen_info.pages = vminfo.image_planes;
|
||||
boot_params.screen_info.lfb_base = vminfo.lfb_ptr;
|
||||
memcpy(&boot_params.screen_info.red_size,
|
||||
&vminfo.rmask, 8);
|
||||
|
||||
/* General parameters */
|
||||
boot_params.screen_info.lfb_size = vginfo.total_memory;
|
||||
|
||||
if (vminfo.bpp <= 8)
|
||||
vesa_dac_set_8bits();
|
||||
|
||||
vesa_store_pm_info();
|
||||
}
|
||||
|
||||
/*
|
||||
* Save EDID information for the kernel; this is invoked, separately,
|
||||
* after mode-setting.
|
||||
*/
|
||||
void vesa_store_edid(void)
|
||||
{
|
||||
#ifdef CONFIG_FIRMWARE_EDID
|
||||
struct biosregs ireg, oreg;
|
||||
|
||||
/* Apparently used as a nonsense token... */
|
||||
memset(&boot_params.edid_info, 0x13, sizeof boot_params.edid_info);
|
||||
|
||||
if (vginfo.version < 0x0200)
|
||||
return; /* EDID requires VBE 2.0+ */
|
||||
|
||||
initregs(&ireg);
|
||||
ireg.ax = 0x4f15; /* VBE DDC */
|
||||
/* ireg.bx = 0x0000; */ /* Report DDC capabilities */
|
||||
/* ireg.cx = 0; */ /* Controller 0 */
|
||||
ireg.es = 0; /* ES:DI must be 0 by spec */
|
||||
intcall(0x10, &ireg, &oreg);
|
||||
|
||||
if (oreg.ax != 0x004f)
|
||||
return; /* No EDID */
|
||||
|
||||
/* BH = time in seconds to transfer EDD information */
|
||||
/* BL = DDC level supported */
|
||||
|
||||
ireg.ax = 0x4f15; /* VBE DDC */
|
||||
ireg.bx = 0x0001; /* Read EDID */
|
||||
/* ireg.cx = 0; */ /* Controller 0 */
|
||||
/* ireg.dx = 0; */ /* EDID block number */
|
||||
ireg.es = ds();
|
||||
ireg.di =(size_t)&boot_params.edid_info; /* (ES:)Pointer to block */
|
||||
intcall(0x10, &ireg, &oreg);
|
||||
#endif /* CONFIG_FIRMWARE_EDID */
|
||||
}
|
||||
|
||||
#endif /* not _WAKEUP */
|
||||
|
||||
static __videocard video_vesa =
|
||||
{
|
||||
.card_name = "VESA",
|
||||
.probe = vesa_probe,
|
||||
.set_mode = vesa_set_mode,
|
||||
.xmode_first = VIDEO_FIRST_VESA,
|
||||
.xmode_n = 0x200,
|
||||
};
|
||||
288
arch/x86/boot/video-vga.c
Normal file
288
arch/x86/boot/video-vga.c
Normal file
|
|
@ -0,0 +1,288 @@
|
|||
/* -*- linux-c -*- ------------------------------------------------------- *
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
* Copyright 2007 rPath, Inc. - All Rights Reserved
|
||||
* Copyright 2009 Intel Corporation; author H. Peter Anvin
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Common all-VGA modes
|
||||
*/
|
||||
|
||||
#include "boot.h"
|
||||
#include "video.h"
|
||||
|
||||
static struct mode_info vga_modes[] = {
|
||||
{ VIDEO_80x25, 80, 25, 0 },
|
||||
{ VIDEO_8POINT, 80, 50, 0 },
|
||||
{ VIDEO_80x43, 80, 43, 0 },
|
||||
{ VIDEO_80x28, 80, 28, 0 },
|
||||
{ VIDEO_80x30, 80, 30, 0 },
|
||||
{ VIDEO_80x34, 80, 34, 0 },
|
||||
{ VIDEO_80x60, 80, 60, 0 },
|
||||
};
|
||||
|
||||
static struct mode_info ega_modes[] = {
|
||||
{ VIDEO_80x25, 80, 25, 0 },
|
||||
{ VIDEO_8POINT, 80, 43, 0 },
|
||||
};
|
||||
|
||||
static struct mode_info cga_modes[] = {
|
||||
{ VIDEO_80x25, 80, 25, 0 },
|
||||
};
|
||||
|
||||
static __videocard video_vga;
|
||||
|
||||
/* Set basic 80x25 mode */
|
||||
static u8 vga_set_basic_mode(void)
|
||||
{
|
||||
struct biosregs ireg, oreg;
|
||||
u8 mode;
|
||||
|
||||
initregs(&ireg);
|
||||
|
||||
/* Query current mode */
|
||||
ireg.ax = 0x0f00;
|
||||
intcall(0x10, &ireg, &oreg);
|
||||
mode = oreg.al;
|
||||
|
||||
if (mode != 3 && mode != 7)
|
||||
mode = 3;
|
||||
|
||||
/* Set the mode */
|
||||
ireg.ax = mode; /* AH=0: set mode */
|
||||
intcall(0x10, &ireg, NULL);
|
||||
do_restore = 1;
|
||||
return mode;
|
||||
}
|
||||
|
||||
static void vga_set_8font(void)
|
||||
{
|
||||
/* Set 8x8 font - 80x43 on EGA, 80x50 on VGA */
|
||||
struct biosregs ireg;
|
||||
|
||||
initregs(&ireg);
|
||||
|
||||
/* Set 8x8 font */
|
||||
ireg.ax = 0x1112;
|
||||
/* ireg.bl = 0; */
|
||||
intcall(0x10, &ireg, NULL);
|
||||
|
||||
/* Use alternate print screen */
|
||||
ireg.ax = 0x1200;
|
||||
ireg.bl = 0x20;
|
||||
intcall(0x10, &ireg, NULL);
|
||||
|
||||
/* Turn off cursor emulation */
|
||||
ireg.ax = 0x1201;
|
||||
ireg.bl = 0x34;
|
||||
intcall(0x10, &ireg, NULL);
|
||||
|
||||
/* Cursor is scan lines 6-7 */
|
||||
ireg.ax = 0x0100;
|
||||
ireg.cx = 0x0607;
|
||||
intcall(0x10, &ireg, NULL);
|
||||
}
|
||||
|
||||
static void vga_set_14font(void)
|
||||
{
|
||||
/* Set 9x14 font - 80x28 on VGA */
|
||||
struct biosregs ireg;
|
||||
|
||||
initregs(&ireg);
|
||||
|
||||
/* Set 9x14 font */
|
||||
ireg.ax = 0x1111;
|
||||
/* ireg.bl = 0; */
|
||||
intcall(0x10, &ireg, NULL);
|
||||
|
||||
/* Turn off cursor emulation */
|
||||
ireg.ax = 0x1201;
|
||||
ireg.bl = 0x34;
|
||||
intcall(0x10, &ireg, NULL);
|
||||
|
||||
/* Cursor is scan lines 11-12 */
|
||||
ireg.ax = 0x0100;
|
||||
ireg.cx = 0x0b0c;
|
||||
intcall(0x10, &ireg, NULL);
|
||||
}
|
||||
|
||||
static void vga_set_80x43(void)
|
||||
{
|
||||
/* Set 80x43 mode on VGA (not EGA) */
|
||||
struct biosregs ireg;
|
||||
|
||||
initregs(&ireg);
|
||||
|
||||
/* Set 350 scans */
|
||||
ireg.ax = 0x1201;
|
||||
ireg.bl = 0x30;
|
||||
intcall(0x10, &ireg, NULL);
|
||||
|
||||
/* Reset video mode */
|
||||
ireg.ax = 0x0003;
|
||||
intcall(0x10, &ireg, NULL);
|
||||
|
||||
vga_set_8font();
|
||||
}
|
||||
|
||||
/* I/O address of the VGA CRTC */
|
||||
u16 vga_crtc(void)
|
||||
{
|
||||
return (inb(0x3cc) & 1) ? 0x3d4 : 0x3b4;
|
||||
}
|
||||
|
||||
static void vga_set_480_scanlines(void)
|
||||
{
|
||||
u16 crtc; /* CRTC base address */
|
||||
u8 csel; /* CRTC miscellaneous output register */
|
||||
|
||||
crtc = vga_crtc();
|
||||
|
||||
out_idx(0x0c, crtc, 0x11); /* Vertical sync end, unlock CR0-7 */
|
||||
out_idx(0x0b, crtc, 0x06); /* Vertical total */
|
||||
out_idx(0x3e, crtc, 0x07); /* Vertical overflow */
|
||||
out_idx(0xea, crtc, 0x10); /* Vertical sync start */
|
||||
out_idx(0xdf, crtc, 0x12); /* Vertical display end */
|
||||
out_idx(0xe7, crtc, 0x15); /* Vertical blank start */
|
||||
out_idx(0x04, crtc, 0x16); /* Vertical blank end */
|
||||
csel = inb(0x3cc);
|
||||
csel &= 0x0d;
|
||||
csel |= 0xe2;
|
||||
outb(csel, 0x3c2);
|
||||
}
|
||||
|
||||
static void vga_set_vertical_end(int lines)
|
||||
{
|
||||
u16 crtc; /* CRTC base address */
|
||||
u8 ovfw; /* CRTC overflow register */
|
||||
int end = lines-1;
|
||||
|
||||
crtc = vga_crtc();
|
||||
|
||||
ovfw = 0x3c | ((end >> (8-1)) & 0x02) | ((end >> (9-6)) & 0x40);
|
||||
|
||||
out_idx(ovfw, crtc, 0x07); /* Vertical overflow */
|
||||
out_idx(end, crtc, 0x12); /* Vertical display end */
|
||||
}
|
||||
|
||||
static void vga_set_80x30(void)
|
||||
{
|
||||
vga_set_480_scanlines();
|
||||
vga_set_vertical_end(30*16);
|
||||
}
|
||||
|
||||
static void vga_set_80x34(void)
|
||||
{
|
||||
vga_set_480_scanlines();
|
||||
vga_set_14font();
|
||||
vga_set_vertical_end(34*14);
|
||||
}
|
||||
|
||||
static void vga_set_80x60(void)
|
||||
{
|
||||
vga_set_480_scanlines();
|
||||
vga_set_8font();
|
||||
vga_set_vertical_end(60*8);
|
||||
}
|
||||
|
||||
static int vga_set_mode(struct mode_info *mode)
|
||||
{
|
||||
/* Set the basic mode */
|
||||
vga_set_basic_mode();
|
||||
|
||||
/* Override a possibly broken BIOS */
|
||||
force_x = mode->x;
|
||||
force_y = mode->y;
|
||||
|
||||
switch (mode->mode) {
|
||||
case VIDEO_80x25:
|
||||
break;
|
||||
case VIDEO_8POINT:
|
||||
vga_set_8font();
|
||||
break;
|
||||
case VIDEO_80x43:
|
||||
vga_set_80x43();
|
||||
break;
|
||||
case VIDEO_80x28:
|
||||
vga_set_14font();
|
||||
break;
|
||||
case VIDEO_80x30:
|
||||
vga_set_80x30();
|
||||
break;
|
||||
case VIDEO_80x34:
|
||||
vga_set_80x34();
|
||||
break;
|
||||
case VIDEO_80x60:
|
||||
vga_set_80x60();
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Note: this probe includes basic information required by all
|
||||
* systems. It should be executed first, by making sure
|
||||
* video-vga.c is listed first in the Makefile.
|
||||
*/
|
||||
static int vga_probe(void)
|
||||
{
|
||||
static const char *card_name[] = {
|
||||
"CGA/MDA/HGC", "EGA", "VGA"
|
||||
};
|
||||
static struct mode_info *mode_lists[] = {
|
||||
cga_modes,
|
||||
ega_modes,
|
||||
vga_modes,
|
||||
};
|
||||
static int mode_count[] = {
|
||||
sizeof(cga_modes)/sizeof(struct mode_info),
|
||||
sizeof(ega_modes)/sizeof(struct mode_info),
|
||||
sizeof(vga_modes)/sizeof(struct mode_info),
|
||||
};
|
||||
|
||||
struct biosregs ireg, oreg;
|
||||
|
||||
initregs(&ireg);
|
||||
|
||||
ireg.ax = 0x1200;
|
||||
ireg.bl = 0x10; /* Check EGA/VGA */
|
||||
intcall(0x10, &ireg, &oreg);
|
||||
|
||||
#ifndef _WAKEUP
|
||||
boot_params.screen_info.orig_video_ega_bx = oreg.bx;
|
||||
#endif
|
||||
|
||||
/* If we have MDA/CGA/HGC then BL will be unchanged at 0x10 */
|
||||
if (oreg.bl != 0x10) {
|
||||
/* EGA/VGA */
|
||||
ireg.ax = 0x1a00;
|
||||
intcall(0x10, &ireg, &oreg);
|
||||
|
||||
if (oreg.al == 0x1a) {
|
||||
adapter = ADAPTER_VGA;
|
||||
#ifndef _WAKEUP
|
||||
boot_params.screen_info.orig_video_isVGA = 1;
|
||||
#endif
|
||||
} else {
|
||||
adapter = ADAPTER_EGA;
|
||||
}
|
||||
} else {
|
||||
adapter = ADAPTER_CGA;
|
||||
}
|
||||
|
||||
video_vga.modes = mode_lists[adapter];
|
||||
video_vga.card_name = card_name[adapter];
|
||||
return mode_count[adapter];
|
||||
}
|
||||
|
||||
static __videocard video_vga = {
|
||||
.card_name = "VGA",
|
||||
.probe = vga_probe,
|
||||
.set_mode = vga_set_mode,
|
||||
};
|
||||
341
arch/x86/boot/video.c
Normal file
341
arch/x86/boot/video.c
Normal file
|
|
@ -0,0 +1,341 @@
|
|||
/* -*- linux-c -*- ------------------------------------------------------- *
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
* Copyright 2007 rPath, Inc. - All Rights Reserved
|
||||
* Copyright 2009 Intel Corporation; author H. Peter Anvin
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Select video mode
|
||||
*/
|
||||
|
||||
#include "boot.h"
|
||||
#include "video.h"
|
||||
#include "vesa.h"
|
||||
|
||||
static void store_cursor_position(void)
|
||||
{
|
||||
struct biosregs ireg, oreg;
|
||||
|
||||
initregs(&ireg);
|
||||
ireg.ah = 0x03;
|
||||
intcall(0x10, &ireg, &oreg);
|
||||
|
||||
boot_params.screen_info.orig_x = oreg.dl;
|
||||
boot_params.screen_info.orig_y = oreg.dh;
|
||||
|
||||
if (oreg.ch & 0x20)
|
||||
boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR;
|
||||
|
||||
if ((oreg.ch & 0x1f) > (oreg.cl & 0x1f))
|
||||
boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR;
|
||||
}
|
||||
|
||||
static void store_video_mode(void)
|
||||
{
|
||||
struct biosregs ireg, oreg;
|
||||
|
||||
/* N.B.: the saving of the video page here is a bit silly,
|
||||
since we pretty much assume page 0 everywhere. */
|
||||
initregs(&ireg);
|
||||
ireg.ah = 0x0f;
|
||||
intcall(0x10, &ireg, &oreg);
|
||||
|
||||
/* Not all BIOSes are clean with respect to the top bit */
|
||||
boot_params.screen_info.orig_video_mode = oreg.al & 0x7f;
|
||||
boot_params.screen_info.orig_video_page = oreg.bh;
|
||||
}
|
||||
|
||||
/*
|
||||
* Store the video mode parameters for later usage by the kernel.
|
||||
* This is done by asking the BIOS except for the rows/columns
|
||||
* parameters in the default 80x25 mode -- these are set directly,
|
||||
* because some very obscure BIOSes supply insane values.
|
||||
*/
|
||||
static void store_mode_params(void)
|
||||
{
|
||||
u16 font_size;
|
||||
int x, y;
|
||||
|
||||
/* For graphics mode, it is up to the mode-setting driver
|
||||
(currently only video-vesa.c) to store the parameters */
|
||||
if (graphic_mode)
|
||||
return;
|
||||
|
||||
store_cursor_position();
|
||||
store_video_mode();
|
||||
|
||||
if (boot_params.screen_info.orig_video_mode == 0x07) {
|
||||
/* MDA, HGC, or VGA in monochrome mode */
|
||||
video_segment = 0xb000;
|
||||
} else {
|
||||
/* CGA, EGA, VGA and so forth */
|
||||
video_segment = 0xb800;
|
||||
}
|
||||
|
||||
set_fs(0);
|
||||
font_size = rdfs16(0x485); /* Font size, BIOS area */
|
||||
boot_params.screen_info.orig_video_points = font_size;
|
||||
|
||||
x = rdfs16(0x44a);
|
||||
y = (adapter == ADAPTER_CGA) ? 25 : rdfs8(0x484)+1;
|
||||
|
||||
if (force_x)
|
||||
x = force_x;
|
||||
if (force_y)
|
||||
y = force_y;
|
||||
|
||||
boot_params.screen_info.orig_video_cols = x;
|
||||
boot_params.screen_info.orig_video_lines = y;
|
||||
}
|
||||
|
||||
static unsigned int get_entry(void)
|
||||
{
|
||||
char entry_buf[4];
|
||||
int i, len = 0;
|
||||
int key;
|
||||
unsigned int v;
|
||||
|
||||
do {
|
||||
key = getchar();
|
||||
|
||||
if (key == '\b') {
|
||||
if (len > 0) {
|
||||
puts("\b \b");
|
||||
len--;
|
||||
}
|
||||
} else if ((key >= '0' && key <= '9') ||
|
||||
(key >= 'A' && key <= 'Z') ||
|
||||
(key >= 'a' && key <= 'z')) {
|
||||
if (len < sizeof entry_buf) {
|
||||
entry_buf[len++] = key;
|
||||
putchar(key);
|
||||
}
|
||||
}
|
||||
} while (key != '\r');
|
||||
putchar('\n');
|
||||
|
||||
if (len == 0)
|
||||
return VIDEO_CURRENT_MODE; /* Default */
|
||||
|
||||
v = 0;
|
||||
for (i = 0; i < len; i++) {
|
||||
v <<= 4;
|
||||
key = entry_buf[i] | 0x20;
|
||||
v += (key > '9') ? key-'a'+10 : key-'0';
|
||||
}
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
static void display_menu(void)
|
||||
{
|
||||
struct card_info *card;
|
||||
struct mode_info *mi;
|
||||
char ch;
|
||||
int i;
|
||||
int nmodes;
|
||||
int modes_per_line;
|
||||
int col;
|
||||
|
||||
nmodes = 0;
|
||||
for (card = video_cards; card < video_cards_end; card++)
|
||||
nmodes += card->nmodes;
|
||||
|
||||
modes_per_line = 1;
|
||||
if (nmodes >= 20)
|
||||
modes_per_line = 3;
|
||||
|
||||
for (col = 0; col < modes_per_line; col++)
|
||||
puts("Mode: Resolution: Type: ");
|
||||
putchar('\n');
|
||||
|
||||
col = 0;
|
||||
ch = '0';
|
||||
for (card = video_cards; card < video_cards_end; card++) {
|
||||
mi = card->modes;
|
||||
for (i = 0; i < card->nmodes; i++, mi++) {
|
||||
char resbuf[32];
|
||||
int visible = mi->x && mi->y;
|
||||
u16 mode_id = mi->mode ? mi->mode :
|
||||
(mi->y << 8)+mi->x;
|
||||
|
||||
if (!visible)
|
||||
continue; /* Hidden mode */
|
||||
|
||||
if (mi->depth)
|
||||
sprintf(resbuf, "%dx%d", mi->y, mi->depth);
|
||||
else
|
||||
sprintf(resbuf, "%d", mi->y);
|
||||
|
||||
printf("%c %03X %4dx%-7s %-6s",
|
||||
ch, mode_id, mi->x, resbuf, card->card_name);
|
||||
col++;
|
||||
if (col >= modes_per_line) {
|
||||
putchar('\n');
|
||||
col = 0;
|
||||
}
|
||||
|
||||
if (ch == '9')
|
||||
ch = 'a';
|
||||
else if (ch == 'z' || ch == ' ')
|
||||
ch = ' '; /* Out of keys... */
|
||||
else
|
||||
ch++;
|
||||
}
|
||||
}
|
||||
if (col)
|
||||
putchar('\n');
|
||||
}
|
||||
|
||||
#define H(x) ((x)-'a'+10)
|
||||
#define SCAN ((H('s')<<12)+(H('c')<<8)+(H('a')<<4)+H('n'))
|
||||
|
||||
static unsigned int mode_menu(void)
|
||||
{
|
||||
int key;
|
||||
unsigned int sel;
|
||||
|
||||
puts("Press <ENTER> to see video modes available, "
|
||||
"<SPACE> to continue, or wait 30 sec\n");
|
||||
|
||||
kbd_flush();
|
||||
while (1) {
|
||||
key = getchar_timeout();
|
||||
if (key == ' ' || key == 0)
|
||||
return VIDEO_CURRENT_MODE; /* Default */
|
||||
if (key == '\r')
|
||||
break;
|
||||
putchar('\a'); /* Beep! */
|
||||
}
|
||||
|
||||
|
||||
for (;;) {
|
||||
display_menu();
|
||||
|
||||
puts("Enter a video mode or \"scan\" to scan for "
|
||||
"additional modes: ");
|
||||
sel = get_entry();
|
||||
if (sel != SCAN)
|
||||
return sel;
|
||||
|
||||
probe_cards(1);
|
||||
}
|
||||
}
|
||||
|
||||
/* Save screen content to the heap */
|
||||
static struct saved_screen {
|
||||
int x, y;
|
||||
int curx, cury;
|
||||
u16 *data;
|
||||
} saved;
|
||||
|
||||
static void save_screen(void)
|
||||
{
|
||||
/* Should be called after store_mode_params() */
|
||||
saved.x = boot_params.screen_info.orig_video_cols;
|
||||
saved.y = boot_params.screen_info.orig_video_lines;
|
||||
saved.curx = boot_params.screen_info.orig_x;
|
||||
saved.cury = boot_params.screen_info.orig_y;
|
||||
|
||||
if (!heap_free(saved.x*saved.y*sizeof(u16)+512))
|
||||
return; /* Not enough heap to save the screen */
|
||||
|
||||
saved.data = GET_HEAP(u16, saved.x*saved.y);
|
||||
|
||||
set_fs(video_segment);
|
||||
copy_from_fs(saved.data, 0, saved.x*saved.y*sizeof(u16));
|
||||
}
|
||||
|
||||
static void restore_screen(void)
|
||||
{
|
||||
/* Should be called after store_mode_params() */
|
||||
int xs = boot_params.screen_info.orig_video_cols;
|
||||
int ys = boot_params.screen_info.orig_video_lines;
|
||||
int y;
|
||||
addr_t dst = 0;
|
||||
u16 *src = saved.data;
|
||||
struct biosregs ireg;
|
||||
|
||||
if (graphic_mode)
|
||||
return; /* Can't restore onto a graphic mode */
|
||||
|
||||
if (!src)
|
||||
return; /* No saved screen contents */
|
||||
|
||||
/* Restore screen contents */
|
||||
|
||||
set_fs(video_segment);
|
||||
for (y = 0; y < ys; y++) {
|
||||
int npad;
|
||||
|
||||
if (y < saved.y) {
|
||||
int copy = (xs < saved.x) ? xs : saved.x;
|
||||
copy_to_fs(dst, src, copy*sizeof(u16));
|
||||
dst += copy*sizeof(u16);
|
||||
src += saved.x;
|
||||
npad = (xs < saved.x) ? 0 : xs-saved.x;
|
||||
} else {
|
||||
npad = xs;
|
||||
}
|
||||
|
||||
/* Writes "npad" blank characters to
|
||||
video_segment:dst and advances dst */
|
||||
asm volatile("pushw %%es ; "
|
||||
"movw %2,%%es ; "
|
||||
"shrw %%cx ; "
|
||||
"jnc 1f ; "
|
||||
"stosw \n\t"
|
||||
"1: rep;stosl ; "
|
||||
"popw %%es"
|
||||
: "+D" (dst), "+c" (npad)
|
||||
: "bdS" (video_segment),
|
||||
"a" (0x07200720));
|
||||
}
|
||||
|
||||
/* Restore cursor position */
|
||||
if (saved.curx >= xs)
|
||||
saved.curx = xs-1;
|
||||
if (saved.cury >= ys)
|
||||
saved.cury = ys-1;
|
||||
|
||||
initregs(&ireg);
|
||||
ireg.ah = 0x02; /* Set cursor position */
|
||||
ireg.dh = saved.cury;
|
||||
ireg.dl = saved.curx;
|
||||
intcall(0x10, &ireg, NULL);
|
||||
|
||||
store_cursor_position();
|
||||
}
|
||||
|
||||
void set_video(void)
|
||||
{
|
||||
u16 mode = boot_params.hdr.vid_mode;
|
||||
|
||||
RESET_HEAP();
|
||||
|
||||
store_mode_params();
|
||||
save_screen();
|
||||
probe_cards(0);
|
||||
|
||||
for (;;) {
|
||||
if (mode == ASK_VGA)
|
||||
mode = mode_menu();
|
||||
|
||||
if (!set_mode(mode))
|
||||
break;
|
||||
|
||||
printf("Undefined video mode number: %x\n", mode);
|
||||
mode = ASK_VGA;
|
||||
}
|
||||
boot_params.hdr.vid_mode = mode;
|
||||
vesa_store_edid();
|
||||
store_mode_params();
|
||||
|
||||
if (do_restore)
|
||||
restore_screen();
|
||||
}
|
||||
121
arch/x86/boot/video.h
Normal file
121
arch/x86/boot/video.h
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
/* -*- linux-c -*- ------------------------------------------------------- *
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
* Copyright 2007 rPath, Inc. - All Rights Reserved
|
||||
*
|
||||
* This file is part of the Linux kernel, and is made available under
|
||||
* the terms of the GNU General Public License version 2.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Header file for the real-mode video probing code
|
||||
*/
|
||||
|
||||
#ifndef BOOT_VIDEO_H
|
||||
#define BOOT_VIDEO_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
/*
|
||||
* This code uses an extended set of video mode numbers. These include:
|
||||
* Aliases for standard modes
|
||||
* NORMAL_VGA (-1)
|
||||
* EXTENDED_VGA (-2)
|
||||
* ASK_VGA (-3)
|
||||
* Video modes numbered by menu position -- NOT RECOMMENDED because of lack
|
||||
* of compatibility when extending the table. These are between 0x00 and 0xff.
|
||||
*/
|
||||
#define VIDEO_FIRST_MENU 0x0000
|
||||
|
||||
/* Standard BIOS video modes (BIOS number + 0x0100) */
|
||||
#define VIDEO_FIRST_BIOS 0x0100
|
||||
|
||||
/* VESA BIOS video modes (VESA number + 0x0200) */
|
||||
#define VIDEO_FIRST_VESA 0x0200
|
||||
|
||||
/* Video7 special modes (BIOS number + 0x0900) */
|
||||
#define VIDEO_FIRST_V7 0x0900
|
||||
|
||||
/* Special video modes */
|
||||
#define VIDEO_FIRST_SPECIAL 0x0f00
|
||||
#define VIDEO_80x25 0x0f00
|
||||
#define VIDEO_8POINT 0x0f01
|
||||
#define VIDEO_80x43 0x0f02
|
||||
#define VIDEO_80x28 0x0f03
|
||||
#define VIDEO_CURRENT_MODE 0x0f04
|
||||
#define VIDEO_80x30 0x0f05
|
||||
#define VIDEO_80x34 0x0f06
|
||||
#define VIDEO_80x60 0x0f07
|
||||
#define VIDEO_GFX_HACK 0x0f08
|
||||
#define VIDEO_LAST_SPECIAL 0x0f09
|
||||
|
||||
/* Video modes given by resolution */
|
||||
#define VIDEO_FIRST_RESOLUTION 0x1000
|
||||
|
||||
/* The "recalculate timings" flag */
|
||||
#define VIDEO_RECALC 0x8000
|
||||
|
||||
void store_screen(void);
|
||||
#define DO_STORE() store_screen()
|
||||
|
||||
/*
|
||||
* Mode table structures
|
||||
*/
|
||||
|
||||
struct mode_info {
|
||||
u16 mode; /* Mode number (vga= style) */
|
||||
u16 x, y; /* Width, height */
|
||||
u16 depth; /* Bits per pixel, 0 for text mode */
|
||||
};
|
||||
|
||||
struct card_info {
|
||||
const char *card_name;
|
||||
int (*set_mode)(struct mode_info *mode);
|
||||
int (*probe)(void);
|
||||
struct mode_info *modes;
|
||||
int nmodes; /* Number of probed modes so far */
|
||||
int unsafe; /* Probing is unsafe, only do after "scan" */
|
||||
u16 xmode_first; /* Unprobed modes to try to call anyway */
|
||||
u16 xmode_n; /* Size of unprobed mode range */
|
||||
};
|
||||
|
||||
#define __videocard struct card_info __attribute__((used,section(".videocards")))
|
||||
extern struct card_info video_cards[], video_cards_end[];
|
||||
|
||||
int mode_defined(u16 mode); /* video.c */
|
||||
|
||||
/* Basic video information */
|
||||
#define ADAPTER_CGA 0 /* CGA/MDA/HGC */
|
||||
#define ADAPTER_EGA 1
|
||||
#define ADAPTER_VGA 2
|
||||
|
||||
extern int adapter;
|
||||
extern u16 video_segment;
|
||||
extern int force_x, force_y; /* Don't query the BIOS for cols/rows */
|
||||
extern int do_restore; /* Restore screen contents */
|
||||
extern int graphic_mode; /* Graphics mode with linear frame buffer */
|
||||
|
||||
/* Accessing VGA indexed registers */
|
||||
static inline u8 in_idx(u16 port, u8 index)
|
||||
{
|
||||
outb(index, port);
|
||||
return inb(port+1);
|
||||
}
|
||||
|
||||
static inline void out_idx(u8 v, u16 port, u8 index)
|
||||
{
|
||||
outw(index+(v << 8), port);
|
||||
}
|
||||
|
||||
/* Writes a value to an indexed port and then reads the port again */
|
||||
static inline u8 tst_idx(u8 v, u16 port, u8 index)
|
||||
{
|
||||
out_idx(port, index, v);
|
||||
return in_idx(port, index);
|
||||
}
|
||||
|
||||
/* Get the I/O port of the VGA CRTC */
|
||||
u16 vga_crtc(void); /* video-vga.c */
|
||||
|
||||
#endif /* BOOT_VIDEO_H */
|
||||
311
arch/x86/configs/i386_defconfig
Normal file
311
arch/x86/configs/i386_defconfig
Normal file
|
|
@ -0,0 +1,311 @@
|
|||
# CONFIG_64BIT is not set
|
||||
CONFIG_EXPERIMENTAL=y
|
||||
# CONFIG_LOCALVERSION_AUTO is not set
|
||||
CONFIG_SYSVIPC=y
|
||||
CONFIG_POSIX_MQUEUE=y
|
||||
CONFIG_BSD_PROCESS_ACCT=y
|
||||
CONFIG_TASKSTATS=y
|
||||
CONFIG_TASK_DELAY_ACCT=y
|
||||
CONFIG_TASK_XACCT=y
|
||||
CONFIG_TASK_IO_ACCOUNTING=y
|
||||
CONFIG_AUDIT=y
|
||||
CONFIG_NO_HZ=y
|
||||
CONFIG_HIGH_RES_TIMERS=y
|
||||
CONFIG_LOG_BUF_SHIFT=18
|
||||
CONFIG_CGROUPS=y
|
||||
CONFIG_CGROUP_FREEZER=y
|
||||
CONFIG_CPUSETS=y
|
||||
CONFIG_CGROUP_CPUACCT=y
|
||||
CONFIG_RESOURCE_COUNTERS=y
|
||||
CONFIG_CGROUP_SCHED=y
|
||||
CONFIG_BLK_DEV_INITRD=y
|
||||
# CONFIG_COMPAT_BRK is not set
|
||||
CONFIG_PROFILING=y
|
||||
CONFIG_KPROBES=y
|
||||
CONFIG_MODULES=y
|
||||
CONFIG_MODULE_UNLOAD=y
|
||||
CONFIG_MODULE_FORCE_UNLOAD=y
|
||||
CONFIG_PARTITION_ADVANCED=y
|
||||
CONFIG_OSF_PARTITION=y
|
||||
CONFIG_AMIGA_PARTITION=y
|
||||
CONFIG_MAC_PARTITION=y
|
||||
CONFIG_BSD_DISKLABEL=y
|
||||
CONFIG_MINIX_SUBPARTITION=y
|
||||
CONFIG_SOLARIS_X86_PARTITION=y
|
||||
CONFIG_UNIXWARE_DISKLABEL=y
|
||||
CONFIG_SGI_PARTITION=y
|
||||
CONFIG_SUN_PARTITION=y
|
||||
CONFIG_KARMA_PARTITION=y
|
||||
CONFIG_EFI_PARTITION=y
|
||||
CONFIG_SMP=y
|
||||
CONFIG_X86_GENERIC=y
|
||||
CONFIG_HPET_TIMER=y
|
||||
CONFIG_SCHED_SMT=y
|
||||
CONFIG_PREEMPT_VOLUNTARY=y
|
||||
CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
|
||||
CONFIG_X86_MCE=y
|
||||
CONFIG_X86_REBOOTFIXUPS=y
|
||||
CONFIG_MICROCODE=y
|
||||
CONFIG_MICROCODE_AMD=y
|
||||
CONFIG_X86_MSR=y
|
||||
CONFIG_X86_CPUID=y
|
||||
CONFIG_HIGHPTE=y
|
||||
CONFIG_X86_CHECK_BIOS_CORRUPTION=y
|
||||
# CONFIG_MTRR_SANITIZER is not set
|
||||
CONFIG_EFI=y
|
||||
CONFIG_HZ_1000=y
|
||||
CONFIG_KEXEC=y
|
||||
CONFIG_CRASH_DUMP=y
|
||||
# CONFIG_COMPAT_VDSO is not set
|
||||
CONFIG_HIBERNATION=y
|
||||
CONFIG_PM_DEBUG=y
|
||||
CONFIG_PM_TRACE_RTC=y
|
||||
CONFIG_ACPI_DOCK=y
|
||||
CONFIG_CPU_FREQ=y
|
||||
# CONFIG_CPU_FREQ_STAT is not set
|
||||
CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
|
||||
CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
|
||||
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
|
||||
CONFIG_X86_ACPI_CPUFREQ=y
|
||||
CONFIG_PCIEPORTBUS=y
|
||||
CONFIG_PCI_MSI=y
|
||||
CONFIG_PCCARD=y
|
||||
CONFIG_YENTA=y
|
||||
CONFIG_HOTPLUG_PCI=y
|
||||
CONFIG_BINFMT_MISC=y
|
||||
CONFIG_NET=y
|
||||
CONFIG_PACKET=y
|
||||
CONFIG_UNIX=y
|
||||
CONFIG_XFRM_USER=y
|
||||
CONFIG_INET=y
|
||||
CONFIG_IP_MULTICAST=y
|
||||
CONFIG_IP_ADVANCED_ROUTER=y
|
||||
CONFIG_IP_MULTIPLE_TABLES=y
|
||||
CONFIG_IP_ROUTE_MULTIPATH=y
|
||||
CONFIG_IP_ROUTE_VERBOSE=y
|
||||
CONFIG_IP_PNP=y
|
||||
CONFIG_IP_PNP_DHCP=y
|
||||
CONFIG_IP_PNP_BOOTP=y
|
||||
CONFIG_IP_PNP_RARP=y
|
||||
CONFIG_IP_MROUTE=y
|
||||
CONFIG_IP_PIMSM_V1=y
|
||||
CONFIG_IP_PIMSM_V2=y
|
||||
CONFIG_SYN_COOKIES=y
|
||||
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
|
||||
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
|
||||
# CONFIG_INET_XFRM_MODE_BEET is not set
|
||||
# CONFIG_INET_DIAG is not set
|
||||
CONFIG_TCP_CONG_ADVANCED=y
|
||||
# CONFIG_TCP_CONG_BIC is not set
|
||||
# CONFIG_TCP_CONG_WESTWOOD is not set
|
||||
# CONFIG_TCP_CONG_HTCP is not set
|
||||
CONFIG_TCP_MD5SIG=y
|
||||
CONFIG_IPV6=y
|
||||
CONFIG_INET6_AH=y
|
||||
CONFIG_INET6_ESP=y
|
||||
CONFIG_NETLABEL=y
|
||||
CONFIG_NETFILTER=y
|
||||
# CONFIG_NETFILTER_ADVANCED is not set
|
||||
CONFIG_NF_CONNTRACK=y
|
||||
CONFIG_NF_CONNTRACK_FTP=y
|
||||
CONFIG_NF_CONNTRACK_IRC=y
|
||||
CONFIG_NF_CONNTRACK_SIP=y
|
||||
CONFIG_NF_CT_NETLINK=y
|
||||
CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
|
||||
CONFIG_NETFILTER_XT_TARGET_NFLOG=y
|
||||
CONFIG_NETFILTER_XT_TARGET_SECMARK=y
|
||||
CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
|
||||
CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
|
||||
CONFIG_NETFILTER_XT_MATCH_POLICY=y
|
||||
CONFIG_NETFILTER_XT_MATCH_STATE=y
|
||||
CONFIG_NF_CONNTRACK_IPV4=y
|
||||
CONFIG_IP_NF_IPTABLES=y
|
||||
CONFIG_IP_NF_FILTER=y
|
||||
CONFIG_IP_NF_TARGET_REJECT=y
|
||||
CONFIG_IP_NF_TARGET_ULOG=y
|
||||
CONFIG_NF_NAT=y
|
||||
CONFIG_IP_NF_TARGET_MASQUERADE=y
|
||||
CONFIG_IP_NF_MANGLE=y
|
||||
CONFIG_NF_CONNTRACK_IPV6=y
|
||||
CONFIG_IP6_NF_IPTABLES=y
|
||||
CONFIG_IP6_NF_MATCH_IPV6HEADER=y
|
||||
CONFIG_IP6_NF_FILTER=y
|
||||
CONFIG_IP6_NF_TARGET_REJECT=y
|
||||
CONFIG_IP6_NF_MANGLE=y
|
||||
CONFIG_NET_SCHED=y
|
||||
CONFIG_NET_EMATCH=y
|
||||
CONFIG_NET_CLS_ACT=y
|
||||
CONFIG_HAMRADIO=y
|
||||
CONFIG_CFG80211=y
|
||||
CONFIG_MAC80211=y
|
||||
CONFIG_MAC80211_LEDS=y
|
||||
CONFIG_RFKILL=y
|
||||
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
|
||||
CONFIG_DEVTMPFS=y
|
||||
CONFIG_DEVTMPFS_MOUNT=y
|
||||
CONFIG_DEBUG_DEVRES=y
|
||||
CONFIG_CONNECTOR=y
|
||||
CONFIG_BLK_DEV_LOOP=y
|
||||
CONFIG_BLK_DEV_SD=y
|
||||
CONFIG_BLK_DEV_SR=y
|
||||
CONFIG_BLK_DEV_SR_VENDOR=y
|
||||
CONFIG_CHR_DEV_SG=y
|
||||
CONFIG_SCSI_CONSTANTS=y
|
||||
CONFIG_SCSI_SPI_ATTRS=y
|
||||
# CONFIG_SCSI_LOWLEVEL is not set
|
||||
CONFIG_ATA=y
|
||||
CONFIG_SATA_AHCI=y
|
||||
CONFIG_ATA_PIIX=y
|
||||
CONFIG_PATA_AMD=y
|
||||
CONFIG_PATA_OLDPIIX=y
|
||||
CONFIG_PATA_SCH=y
|
||||
CONFIG_PATA_MPIIX=y
|
||||
CONFIG_ATA_GENERIC=y
|
||||
CONFIG_MD=y
|
||||
CONFIG_BLK_DEV_MD=y
|
||||
CONFIG_BLK_DEV_DM=y
|
||||
CONFIG_DM_MIRROR=y
|
||||
CONFIG_DM_ZERO=y
|
||||
CONFIG_MACINTOSH_DRIVERS=y
|
||||
CONFIG_MAC_EMUMOUSEBTN=y
|
||||
CONFIG_NETDEVICES=y
|
||||
CONFIG_NETCONSOLE=y
|
||||
CONFIG_BNX2=y
|
||||
CONFIG_TIGON3=y
|
||||
CONFIG_NET_TULIP=y
|
||||
CONFIG_E100=y
|
||||
CONFIG_E1000=y
|
||||
CONFIG_E1000E=y
|
||||
CONFIG_SKY2=y
|
||||
CONFIG_NE2K_PCI=y
|
||||
CONFIG_FORCEDETH=y
|
||||
CONFIG_8139TOO=y
|
||||
# CONFIG_8139TOO_PIO is not set
|
||||
CONFIG_R8169=y
|
||||
CONFIG_FDDI=y
|
||||
CONFIG_INPUT_POLLDEV=y
|
||||
# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
|
||||
CONFIG_INPUT_EVDEV=y
|
||||
CONFIG_INPUT_JOYSTICK=y
|
||||
CONFIG_INPUT_TABLET=y
|
||||
CONFIG_INPUT_TOUCHSCREEN=y
|
||||
CONFIG_INPUT_MISC=y
|
||||
CONFIG_VT_HW_CONSOLE_BINDING=y
|
||||
# CONFIG_LEGACY_PTYS is not set
|
||||
CONFIG_SERIAL_NONSTANDARD=y
|
||||
CONFIG_SERIAL_8250=y
|
||||
CONFIG_SERIAL_8250_CONSOLE=y
|
||||
CONFIG_SERIAL_8250_NR_UARTS=32
|
||||
CONFIG_SERIAL_8250_EXTENDED=y
|
||||
CONFIG_SERIAL_8250_MANY_PORTS=y
|
||||
CONFIG_SERIAL_8250_SHARE_IRQ=y
|
||||
CONFIG_SERIAL_8250_DETECT_IRQ=y
|
||||
CONFIG_SERIAL_8250_RSA=y
|
||||
CONFIG_HW_RANDOM=y
|
||||
CONFIG_NVRAM=y
|
||||
CONFIG_HPET=y
|
||||
# CONFIG_HPET_MMAP is not set
|
||||
CONFIG_I2C_I801=y
|
||||
CONFIG_WATCHDOG=y
|
||||
CONFIG_AGP=y
|
||||
CONFIG_AGP_AMD64=y
|
||||
CONFIG_AGP_INTEL=y
|
||||
CONFIG_DRM=y
|
||||
CONFIG_DRM_I915=y
|
||||
CONFIG_FB_MODE_HELPERS=y
|
||||
CONFIG_FB_TILEBLITTING=y
|
||||
CONFIG_FB_EFI=y
|
||||
# CONFIG_LCD_CLASS_DEVICE is not set
|
||||
CONFIG_VGACON_SOFT_SCROLLBACK=y
|
||||
CONFIG_LOGO=y
|
||||
# CONFIG_LOGO_LINUX_MONO is not set
|
||||
# CONFIG_LOGO_LINUX_VGA16 is not set
|
||||
CONFIG_SOUND=y
|
||||
CONFIG_SND=y
|
||||
CONFIG_SND_SEQUENCER=y
|
||||
CONFIG_SND_SEQ_DUMMY=y
|
||||
CONFIG_SND_MIXER_OSS=y
|
||||
CONFIG_SND_PCM_OSS=y
|
||||
CONFIG_SND_SEQUENCER_OSS=y
|
||||
CONFIG_SND_HRTIMER=y
|
||||
CONFIG_SND_HDA_INTEL=y
|
||||
CONFIG_SND_HDA_HWDEP=y
|
||||
CONFIG_HIDRAW=y
|
||||
CONFIG_HID_GYRATION=y
|
||||
CONFIG_LOGITECH_FF=y
|
||||
CONFIG_HID_NTRIG=y
|
||||
CONFIG_HID_PANTHERLORD=y
|
||||
CONFIG_PANTHERLORD_FF=y
|
||||
CONFIG_HID_PETALYNX=y
|
||||
CONFIG_HID_SAMSUNG=y
|
||||
CONFIG_HID_SONY=y
|
||||
CONFIG_HID_SUNPLUS=y
|
||||
CONFIG_HID_TOPSEED=y
|
||||
CONFIG_HID_PID=y
|
||||
CONFIG_USB_HIDDEV=y
|
||||
CONFIG_USB=y
|
||||
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
|
||||
CONFIG_USB_MON=y
|
||||
CONFIG_USB_EHCI_HCD=y
|
||||
# CONFIG_USB_EHCI_TT_NEWSCHED is not set
|
||||
CONFIG_USB_OHCI_HCD=y
|
||||
CONFIG_USB_UHCI_HCD=y
|
||||
CONFIG_USB_PRINTER=y
|
||||
CONFIG_USB_STORAGE=y
|
||||
CONFIG_USB_LIBUSUAL=y
|
||||
CONFIG_EDAC=y
|
||||
CONFIG_RTC_CLASS=y
|
||||
# CONFIG_RTC_HCTOSYS is not set
|
||||
CONFIG_DMADEVICES=y
|
||||
CONFIG_EEEPC_LAPTOP=y
|
||||
CONFIG_EFI_VARS=y
|
||||
CONFIG_EXT4_FS=y
|
||||
CONFIG_EXT4_FS_POSIX_ACL=y
|
||||
CONFIG_EXT4_FS_SECURITY=y
|
||||
CONFIG_QUOTA=y
|
||||
CONFIG_QUOTA_NETLINK_INTERFACE=y
|
||||
# CONFIG_PRINT_QUOTA_WARNING is not set
|
||||
CONFIG_QFMT_V2=y
|
||||
CONFIG_AUTOFS4_FS=y
|
||||
CONFIG_ISO9660_FS=y
|
||||
CONFIG_JOLIET=y
|
||||
CONFIG_ZISOFS=y
|
||||
CONFIG_MSDOS_FS=y
|
||||
CONFIG_VFAT_FS=y
|
||||
CONFIG_PROC_KCORE=y
|
||||
CONFIG_TMPFS_POSIX_ACL=y
|
||||
CONFIG_HUGETLBFS=y
|
||||
CONFIG_NFS_FS=y
|
||||
CONFIG_NFS_V3_ACL=y
|
||||
CONFIG_NFS_V4=y
|
||||
CONFIG_ROOT_NFS=y
|
||||
CONFIG_NLS_DEFAULT="utf8"
|
||||
CONFIG_NLS_CODEPAGE_437=y
|
||||
CONFIG_NLS_ASCII=y
|
||||
CONFIG_NLS_ISO8859_1=y
|
||||
CONFIG_NLS_UTF8=y
|
||||
CONFIG_PRINTK_TIME=y
|
||||
# CONFIG_ENABLE_WARN_DEPRECATED is not set
|
||||
CONFIG_FRAME_WARN=2048
|
||||
CONFIG_MAGIC_SYSRQ=y
|
||||
# CONFIG_UNUSED_SYMBOLS is not set
|
||||
CONFIG_DEBUG_KERNEL=y
|
||||
# CONFIG_SCHED_DEBUG is not set
|
||||
CONFIG_SCHEDSTATS=y
|
||||
CONFIG_TIMER_STATS=y
|
||||
CONFIG_DEBUG_STACK_USAGE=y
|
||||
CONFIG_BLK_DEV_IO_TRACE=y
|
||||
CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
|
||||
CONFIG_EARLY_PRINTK_DBGP=y
|
||||
CONFIG_DEBUG_STACKOVERFLOW=y
|
||||
# CONFIG_DEBUG_RODATA_TEST is not set
|
||||
CONFIG_DEBUG_BOOT_PARAMS=y
|
||||
CONFIG_OPTIMIZE_INLINING=y
|
||||
CONFIG_KEYS_DEBUG_PROC_KEYS=y
|
||||
CONFIG_SECURITY=y
|
||||
CONFIG_SECURITY_NETWORK=y
|
||||
CONFIG_SECURITY_SELINUX=y
|
||||
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
|
||||
CONFIG_SECURITY_SELINUX_DISABLE=y
|
||||
CONFIG_CRYPTO_AES_586=y
|
||||
# CONFIG_CRYPTO_ANSI_CPRNG is not set
|
||||
28
arch/x86/configs/kvm_guest.config
Normal file
28
arch/x86/configs/kvm_guest.config
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
CONFIG_NET=y
|
||||
CONFIG_NET_CORE=y
|
||||
CONFIG_NETDEVICES=y
|
||||
CONFIG_BLOCK=y
|
||||
CONFIG_BLK_DEV=y
|
||||
CONFIG_NETWORK_FILESYSTEMS=y
|
||||
CONFIG_INET=y
|
||||
CONFIG_TTY=y
|
||||
CONFIG_SERIAL_8250=y
|
||||
CONFIG_SERIAL_8250_CONSOLE=y
|
||||
CONFIG_IP_PNP=y
|
||||
CONFIG_IP_PNP_DHCP=y
|
||||
CONFIG_BINFMT_ELF=y
|
||||
CONFIG_PCI=y
|
||||
CONFIG_PCI_MSI=y
|
||||
CONFIG_DEBUG_KERNEL=y
|
||||
CONFIG_VIRTUALIZATION=y
|
||||
CONFIG_HYPERVISOR_GUEST=y
|
||||
CONFIG_PARAVIRT=y
|
||||
CONFIG_KVM_GUEST=y
|
||||
CONFIG_VIRTIO=y
|
||||
CONFIG_VIRTIO_PCI=y
|
||||
CONFIG_VIRTIO_BLK=y
|
||||
CONFIG_VIRTIO_CONSOLE=y
|
||||
CONFIG_VIRTIO_NET=y
|
||||
CONFIG_9P_FS=y
|
||||
CONFIG_NET_9P=y
|
||||
CONFIG_NET_9P_VIRTIO=y
|
||||
1
arch/x86/configs/tiny.config
Normal file
1
arch/x86/configs/tiny.config
Normal file
|
|
@ -0,0 +1 @@
|
|||
CONFIG_NOHIGHMEM=y
|
||||
308
arch/x86/configs/x86_64_defconfig
Normal file
308
arch/x86/configs/x86_64_defconfig
Normal file
|
|
@ -0,0 +1,308 @@
|
|||
CONFIG_EXPERIMENTAL=y
|
||||
# CONFIG_LOCALVERSION_AUTO is not set
|
||||
CONFIG_SYSVIPC=y
|
||||
CONFIG_POSIX_MQUEUE=y
|
||||
CONFIG_BSD_PROCESS_ACCT=y
|
||||
CONFIG_TASKSTATS=y
|
||||
CONFIG_TASK_DELAY_ACCT=y
|
||||
CONFIG_TASK_XACCT=y
|
||||
CONFIG_TASK_IO_ACCOUNTING=y
|
||||
CONFIG_AUDIT=y
|
||||
CONFIG_NO_HZ=y
|
||||
CONFIG_HIGH_RES_TIMERS=y
|
||||
CONFIG_LOG_BUF_SHIFT=18
|
||||
CONFIG_CGROUPS=y
|
||||
CONFIG_CGROUP_FREEZER=y
|
||||
CONFIG_CPUSETS=y
|
||||
CONFIG_CGROUP_CPUACCT=y
|
||||
CONFIG_RESOURCE_COUNTERS=y
|
||||
CONFIG_CGROUP_SCHED=y
|
||||
CONFIG_BLK_DEV_INITRD=y
|
||||
# CONFIG_COMPAT_BRK is not set
|
||||
CONFIG_PROFILING=y
|
||||
CONFIG_KPROBES=y
|
||||
CONFIG_MODULES=y
|
||||
CONFIG_MODULE_UNLOAD=y
|
||||
CONFIG_MODULE_FORCE_UNLOAD=y
|
||||
CONFIG_PARTITION_ADVANCED=y
|
||||
CONFIG_OSF_PARTITION=y
|
||||
CONFIG_AMIGA_PARTITION=y
|
||||
CONFIG_MAC_PARTITION=y
|
||||
CONFIG_BSD_DISKLABEL=y
|
||||
CONFIG_MINIX_SUBPARTITION=y
|
||||
CONFIG_SOLARIS_X86_PARTITION=y
|
||||
CONFIG_UNIXWARE_DISKLABEL=y
|
||||
CONFIG_SGI_PARTITION=y
|
||||
CONFIG_SUN_PARTITION=y
|
||||
CONFIG_KARMA_PARTITION=y
|
||||
CONFIG_EFI_PARTITION=y
|
||||
CONFIG_SMP=y
|
||||
CONFIG_CALGARY_IOMMU=y
|
||||
CONFIG_NR_CPUS=64
|
||||
CONFIG_SCHED_SMT=y
|
||||
CONFIG_PREEMPT_VOLUNTARY=y
|
||||
CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
|
||||
CONFIG_X86_MCE=y
|
||||
CONFIG_MICROCODE=y
|
||||
CONFIG_MICROCODE_AMD=y
|
||||
CONFIG_X86_MSR=y
|
||||
CONFIG_X86_CPUID=y
|
||||
CONFIG_NUMA=y
|
||||
CONFIG_X86_CHECK_BIOS_CORRUPTION=y
|
||||
# CONFIG_MTRR_SANITIZER is not set
|
||||
CONFIG_EFI=y
|
||||
CONFIG_HZ_1000=y
|
||||
CONFIG_KEXEC=y
|
||||
CONFIG_CRASH_DUMP=y
|
||||
# CONFIG_COMPAT_VDSO is not set
|
||||
CONFIG_HIBERNATION=y
|
||||
CONFIG_PM_DEBUG=y
|
||||
CONFIG_PM_TRACE_RTC=y
|
||||
CONFIG_ACPI_DOCK=y
|
||||
CONFIG_CPU_FREQ=y
|
||||
# CONFIG_CPU_FREQ_STAT is not set
|
||||
CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
|
||||
CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
|
||||
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
|
||||
CONFIG_X86_ACPI_CPUFREQ=y
|
||||
CONFIG_PCI_MMCONFIG=y
|
||||
CONFIG_PCIEPORTBUS=y
|
||||
CONFIG_PCCARD=y
|
||||
CONFIG_YENTA=y
|
||||
CONFIG_HOTPLUG_PCI=y
|
||||
CONFIG_BINFMT_MISC=y
|
||||
CONFIG_IA32_EMULATION=y
|
||||
CONFIG_NET=y
|
||||
CONFIG_PACKET=y
|
||||
CONFIG_UNIX=y
|
||||
CONFIG_XFRM_USER=y
|
||||
CONFIG_INET=y
|
||||
CONFIG_IP_MULTICAST=y
|
||||
CONFIG_IP_ADVANCED_ROUTER=y
|
||||
CONFIG_IP_MULTIPLE_TABLES=y
|
||||
CONFIG_IP_ROUTE_MULTIPATH=y
|
||||
CONFIG_IP_ROUTE_VERBOSE=y
|
||||
CONFIG_IP_PNP=y
|
||||
CONFIG_IP_PNP_DHCP=y
|
||||
CONFIG_IP_PNP_BOOTP=y
|
||||
CONFIG_IP_PNP_RARP=y
|
||||
CONFIG_IP_MROUTE=y
|
||||
CONFIG_IP_PIMSM_V1=y
|
||||
CONFIG_IP_PIMSM_V2=y
|
||||
CONFIG_SYN_COOKIES=y
|
||||
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
|
||||
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
|
||||
# CONFIG_INET_XFRM_MODE_BEET is not set
|
||||
# CONFIG_INET_DIAG is not set
|
||||
CONFIG_TCP_CONG_ADVANCED=y
|
||||
# CONFIG_TCP_CONG_BIC is not set
|
||||
# CONFIG_TCP_CONG_WESTWOOD is not set
|
||||
# CONFIG_TCP_CONG_HTCP is not set
|
||||
CONFIG_TCP_MD5SIG=y
|
||||
CONFIG_IPV6=y
|
||||
CONFIG_INET6_AH=y
|
||||
CONFIG_INET6_ESP=y
|
||||
CONFIG_NETLABEL=y
|
||||
CONFIG_NETFILTER=y
|
||||
# CONFIG_NETFILTER_ADVANCED is not set
|
||||
CONFIG_NF_CONNTRACK=y
|
||||
CONFIG_NF_CONNTRACK_FTP=y
|
||||
CONFIG_NF_CONNTRACK_IRC=y
|
||||
CONFIG_NF_CONNTRACK_SIP=y
|
||||
CONFIG_NF_CT_NETLINK=y
|
||||
CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
|
||||
CONFIG_NETFILTER_XT_TARGET_NFLOG=y
|
||||
CONFIG_NETFILTER_XT_TARGET_SECMARK=y
|
||||
CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
|
||||
CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
|
||||
CONFIG_NETFILTER_XT_MATCH_POLICY=y
|
||||
CONFIG_NETFILTER_XT_MATCH_STATE=y
|
||||
CONFIG_NF_CONNTRACK_IPV4=y
|
||||
CONFIG_IP_NF_IPTABLES=y
|
||||
CONFIG_IP_NF_FILTER=y
|
||||
CONFIG_IP_NF_TARGET_REJECT=y
|
||||
CONFIG_IP_NF_TARGET_ULOG=y
|
||||
CONFIG_NF_NAT=y
|
||||
CONFIG_IP_NF_TARGET_MASQUERADE=y
|
||||
CONFIG_IP_NF_MANGLE=y
|
||||
CONFIG_NF_CONNTRACK_IPV6=y
|
||||
CONFIG_IP6_NF_IPTABLES=y
|
||||
CONFIG_IP6_NF_MATCH_IPV6HEADER=y
|
||||
CONFIG_IP6_NF_FILTER=y
|
||||
CONFIG_IP6_NF_TARGET_REJECT=y
|
||||
CONFIG_IP6_NF_MANGLE=y
|
||||
CONFIG_NET_SCHED=y
|
||||
CONFIG_NET_EMATCH=y
|
||||
CONFIG_NET_CLS_ACT=y
|
||||
CONFIG_HAMRADIO=y
|
||||
CONFIG_CFG80211=y
|
||||
CONFIG_MAC80211=y
|
||||
CONFIG_MAC80211_LEDS=y
|
||||
CONFIG_RFKILL=y
|
||||
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
|
||||
CONFIG_DEVTMPFS=y
|
||||
CONFIG_DEVTMPFS_MOUNT=y
|
||||
CONFIG_DEBUG_DEVRES=y
|
||||
CONFIG_CONNECTOR=y
|
||||
CONFIG_BLK_DEV_LOOP=y
|
||||
CONFIG_BLK_DEV_SD=y
|
||||
CONFIG_BLK_DEV_SR=y
|
||||
CONFIG_BLK_DEV_SR_VENDOR=y
|
||||
CONFIG_CHR_DEV_SG=y
|
||||
CONFIG_SCSI_CONSTANTS=y
|
||||
CONFIG_SCSI_SPI_ATTRS=y
|
||||
# CONFIG_SCSI_LOWLEVEL is not set
|
||||
CONFIG_ATA=y
|
||||
CONFIG_SATA_AHCI=y
|
||||
CONFIG_ATA_PIIX=y
|
||||
CONFIG_PATA_AMD=y
|
||||
CONFIG_PATA_OLDPIIX=y
|
||||
CONFIG_PATA_SCH=y
|
||||
CONFIG_MD=y
|
||||
CONFIG_BLK_DEV_MD=y
|
||||
CONFIG_BLK_DEV_DM=y
|
||||
CONFIG_DM_MIRROR=y
|
||||
CONFIG_DM_ZERO=y
|
||||
CONFIG_MACINTOSH_DRIVERS=y
|
||||
CONFIG_MAC_EMUMOUSEBTN=y
|
||||
CONFIG_NETDEVICES=y
|
||||
CONFIG_NETCONSOLE=y
|
||||
CONFIG_TIGON3=y
|
||||
CONFIG_NET_TULIP=y
|
||||
CONFIG_E100=y
|
||||
CONFIG_E1000=y
|
||||
CONFIG_SKY2=y
|
||||
CONFIG_FORCEDETH=y
|
||||
CONFIG_8139TOO=y
|
||||
CONFIG_FDDI=y
|
||||
CONFIG_INPUT_POLLDEV=y
|
||||
# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
|
||||
CONFIG_INPUT_EVDEV=y
|
||||
CONFIG_INPUT_JOYSTICK=y
|
||||
CONFIG_INPUT_TABLET=y
|
||||
CONFIG_INPUT_TOUCHSCREEN=y
|
||||
CONFIG_INPUT_MISC=y
|
||||
CONFIG_VT_HW_CONSOLE_BINDING=y
|
||||
# CONFIG_LEGACY_PTYS is not set
|
||||
CONFIG_SERIAL_NONSTANDARD=y
|
||||
CONFIG_SERIAL_8250=y
|
||||
CONFIG_SERIAL_8250_CONSOLE=y
|
||||
CONFIG_SERIAL_8250_NR_UARTS=32
|
||||
CONFIG_SERIAL_8250_EXTENDED=y
|
||||
CONFIG_SERIAL_8250_MANY_PORTS=y
|
||||
CONFIG_SERIAL_8250_SHARE_IRQ=y
|
||||
CONFIG_SERIAL_8250_DETECT_IRQ=y
|
||||
CONFIG_SERIAL_8250_RSA=y
|
||||
CONFIG_HW_RANDOM=y
|
||||
# CONFIG_HW_RANDOM_INTEL is not set
|
||||
# CONFIG_HW_RANDOM_AMD is not set
|
||||
CONFIG_NVRAM=y
|
||||
CONFIG_HPET=y
|
||||
# CONFIG_HPET_MMAP is not set
|
||||
CONFIG_I2C_I801=y
|
||||
CONFIG_WATCHDOG=y
|
||||
CONFIG_AGP=y
|
||||
CONFIG_AGP_AMD64=y
|
||||
CONFIG_AGP_INTEL=y
|
||||
CONFIG_DRM=y
|
||||
CONFIG_DRM_I915=y
|
||||
CONFIG_DRM_I915_KMS=y
|
||||
CONFIG_FB_MODE_HELPERS=y
|
||||
CONFIG_FB_TILEBLITTING=y
|
||||
CONFIG_FB_EFI=y
|
||||
# CONFIG_LCD_CLASS_DEVICE is not set
|
||||
CONFIG_VGACON_SOFT_SCROLLBACK=y
|
||||
CONFIG_LOGO=y
|
||||
# CONFIG_LOGO_LINUX_MONO is not set
|
||||
# CONFIG_LOGO_LINUX_VGA16 is not set
|
||||
CONFIG_SOUND=y
|
||||
CONFIG_SND=y
|
||||
CONFIG_SND_SEQUENCER=y
|
||||
CONFIG_SND_SEQ_DUMMY=y
|
||||
CONFIG_SND_MIXER_OSS=y
|
||||
CONFIG_SND_PCM_OSS=y
|
||||
CONFIG_SND_SEQUENCER_OSS=y
|
||||
CONFIG_SND_HRTIMER=y
|
||||
CONFIG_SND_HDA_INTEL=y
|
||||
CONFIG_SND_HDA_HWDEP=y
|
||||
CONFIG_HIDRAW=y
|
||||
CONFIG_HID_GYRATION=y
|
||||
CONFIG_LOGITECH_FF=y
|
||||
CONFIG_HID_NTRIG=y
|
||||
CONFIG_HID_PANTHERLORD=y
|
||||
CONFIG_PANTHERLORD_FF=y
|
||||
CONFIG_HID_PETALYNX=y
|
||||
CONFIG_HID_SAMSUNG=y
|
||||
CONFIG_HID_SONY=y
|
||||
CONFIG_HID_SUNPLUS=y
|
||||
CONFIG_HID_TOPSEED=y
|
||||
CONFIG_HID_PID=y
|
||||
CONFIG_USB_HIDDEV=y
|
||||
CONFIG_USB=y
|
||||
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
|
||||
CONFIG_USB_MON=y
|
||||
CONFIG_USB_EHCI_HCD=y
|
||||
# CONFIG_USB_EHCI_TT_NEWSCHED is not set
|
||||
CONFIG_USB_OHCI_HCD=y
|
||||
CONFIG_USB_UHCI_HCD=y
|
||||
CONFIG_USB_PRINTER=y
|
||||
CONFIG_USB_STORAGE=y
|
||||
CONFIG_USB_LIBUSUAL=y
|
||||
CONFIG_EDAC=y
|
||||
CONFIG_RTC_CLASS=y
|
||||
# CONFIG_RTC_HCTOSYS is not set
|
||||
CONFIG_DMADEVICES=y
|
||||
CONFIG_EEEPC_LAPTOP=y
|
||||
CONFIG_AMD_IOMMU=y
|
||||
CONFIG_AMD_IOMMU_STATS=y
|
||||
CONFIG_INTEL_IOMMU=y
|
||||
# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
|
||||
CONFIG_EFI_VARS=y
|
||||
CONFIG_EXT4_FS=y
|
||||
CONFIG_EXT4_FS_POSIX_ACL=y
|
||||
CONFIG_EXT4_FS_SECURITY=y
|
||||
CONFIG_QUOTA=y
|
||||
CONFIG_QUOTA_NETLINK_INTERFACE=y
|
||||
# CONFIG_PRINT_QUOTA_WARNING is not set
|
||||
CONFIG_QFMT_V2=y
|
||||
CONFIG_AUTOFS4_FS=y
|
||||
CONFIG_ISO9660_FS=y
|
||||
CONFIG_JOLIET=y
|
||||
CONFIG_ZISOFS=y
|
||||
CONFIG_MSDOS_FS=y
|
||||
CONFIG_VFAT_FS=y
|
||||
CONFIG_PROC_KCORE=y
|
||||
CONFIG_TMPFS_POSIX_ACL=y
|
||||
CONFIG_HUGETLBFS=y
|
||||
CONFIG_NFS_FS=y
|
||||
CONFIG_NFS_V3_ACL=y
|
||||
CONFIG_NFS_V4=y
|
||||
CONFIG_ROOT_NFS=y
|
||||
CONFIG_NLS_DEFAULT="utf8"
|
||||
CONFIG_NLS_CODEPAGE_437=y
|
||||
CONFIG_NLS_ASCII=y
|
||||
CONFIG_NLS_ISO8859_1=y
|
||||
CONFIG_NLS_UTF8=y
|
||||
CONFIG_PRINTK_TIME=y
|
||||
# CONFIG_ENABLE_WARN_DEPRECATED is not set
|
||||
CONFIG_MAGIC_SYSRQ=y
|
||||
# CONFIG_UNUSED_SYMBOLS is not set
|
||||
CONFIG_DEBUG_KERNEL=y
|
||||
# CONFIG_SCHED_DEBUG is not set
|
||||
CONFIG_SCHEDSTATS=y
|
||||
CONFIG_TIMER_STATS=y
|
||||
CONFIG_DEBUG_STACK_USAGE=y
|
||||
CONFIG_BLK_DEV_IO_TRACE=y
|
||||
CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
|
||||
CONFIG_EARLY_PRINTK_DBGP=y
|
||||
CONFIG_DEBUG_STACKOVERFLOW=y
|
||||
# CONFIG_DEBUG_RODATA_TEST is not set
|
||||
CONFIG_DEBUG_BOOT_PARAMS=y
|
||||
CONFIG_OPTIMIZE_INLINING=y
|
||||
CONFIG_KEYS_DEBUG_PROC_KEYS=y
|
||||
CONFIG_SECURITY=y
|
||||
CONFIG_SECURITY_NETWORK=y
|
||||
CONFIG_SECURITY_SELINUX=y
|
||||
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
|
||||
CONFIG_SECURITY_SELINUX_DISABLE=y
|
||||
# CONFIG_CRYPTO_ANSI_CPRNG is not set
|
||||
93
arch/x86/crypto/Makefile
Normal file
93
arch/x86/crypto/Makefile
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
#
|
||||
# Arch-specific CryptoAPI modules.
|
||||
#
|
||||
|
||||
avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no)
|
||||
avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
|
||||
$(comma)4)$(comma)%ymm2,yes,no)
|
||||
|
||||
obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
|
||||
obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
|
||||
obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
|
||||
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
|
||||
obj-$(CONFIG_CRYPTO_DES3_EDE_X86_64) += des3_ede-x86_64.o
|
||||
obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
|
||||
obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
|
||||
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
|
||||
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
|
||||
obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
|
||||
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
|
||||
obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
|
||||
obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
|
||||
obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
|
||||
obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
|
||||
obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
|
||||
obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
|
||||
obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
|
||||
|
||||
# These modules require assembler to support AVX.
|
||||
ifeq ($(avx_supported),yes)
|
||||
obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64) += \
|
||||
camellia-aesni-avx-x86_64.o
|
||||
obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o
|
||||
obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o
|
||||
obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o
|
||||
obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o
|
||||
endif
|
||||
|
||||
# These modules require assembler to support AVX2.
|
||||
ifeq ($(avx2_supported),yes)
|
||||
obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
|
||||
obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
|
||||
obj-$(CONFIG_CRYPTO_SHA1_MB) += sha-mb/
|
||||
endif
|
||||
|
||||
aes-i586-y := aes-i586-asm_32.o aes_glue.o
|
||||
twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
|
||||
salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
|
||||
serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
|
||||
|
||||
aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
|
||||
des3_ede-x86_64-y := des3_ede-asm_64.o des3_ede_glue.o
|
||||
camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
|
||||
blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
|
||||
twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
|
||||
twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
|
||||
salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
|
||||
serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
|
||||
|
||||
ifeq ($(avx_supported),yes)
|
||||
camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
|
||||
camellia_aesni_avx_glue.o
|
||||
cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o
|
||||
cast6-avx-x86_64-y := cast6-avx-x86_64-asm_64.o cast6_avx_glue.o
|
||||
twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o \
|
||||
twofish_avx_glue.o
|
||||
serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o \
|
||||
serpent_avx_glue.o
|
||||
endif
|
||||
|
||||
ifeq ($(avx2_supported),yes)
|
||||
camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o
|
||||
serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
|
||||
endif
|
||||
|
||||
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
|
||||
aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o
|
||||
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
|
||||
sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
|
||||
ifeq ($(avx2_supported),yes)
|
||||
sha1-ssse3-y += sha1_avx2_x86_64_asm.o
|
||||
endif
|
||||
crc32c-intel-y := crc32c-intel_glue.o
|
||||
crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
|
||||
crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
|
||||
sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o
|
||||
sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
|
||||
crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o
|
||||
362
arch/x86/crypto/aes-i586-asm_32.S
Normal file
362
arch/x86/crypto/aes-i586-asm_32.S
Normal file
|
|
@ -0,0 +1,362 @@
|
|||
// -------------------------------------------------------------------------
|
||||
// Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK.
|
||||
// All rights reserved.
|
||||
//
|
||||
// LICENSE TERMS
|
||||
//
|
||||
// The free distribution and use of this software in both source and binary
|
||||
// form is allowed (with or without changes) provided that:
|
||||
//
|
||||
// 1. distributions of this source code include the above copyright
|
||||
// notice, this list of conditions and the following disclaimer//
|
||||
//
|
||||
// 2. distributions in binary form include the above copyright
|
||||
// notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other associated materials//
|
||||
//
|
||||
// 3. the copyright holder's name is not used to endorse products
|
||||
// built using this software without specific written permission.
|
||||
//
|
||||
//
|
||||
// ALTERNATIVELY, provided that this notice is retained in full, this product
|
||||
// may be distributed under the terms of the GNU General Public License (GPL),
|
||||
// in which case the provisions of the GPL apply INSTEAD OF those given above.
|
||||
//
|
||||
// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
|
||||
// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
|
||||
|
||||
// DISCLAIMER
|
||||
//
|
||||
// This software is provided 'as is' with no explicit or implied warranties
|
||||
// in respect of its properties including, but not limited to, correctness
|
||||
// and fitness for purpose.
|
||||
// -------------------------------------------------------------------------
|
||||
// Issue Date: 29/07/2002
|
||||
|
||||
.file "aes-i586-asm.S"
|
||||
.text
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
|
||||
|
||||
/* offsets to parameters with one register pushed onto stack */
|
||||
#define ctx 8
|
||||
#define out_blk 12
|
||||
#define in_blk 16
|
||||
|
||||
/* offsets in crypto_aes_ctx structure */
|
||||
#define klen (480)
|
||||
#define ekey (0)
|
||||
#define dkey (240)
|
||||
|
||||
// register mapping for encrypt and decrypt subroutines
|
||||
|
||||
#define r0 eax
|
||||
#define r1 ebx
|
||||
#define r2 ecx
|
||||
#define r3 edx
|
||||
#define r4 esi
|
||||
#define r5 edi
|
||||
|
||||
#define eaxl al
|
||||
#define eaxh ah
|
||||
#define ebxl bl
|
||||
#define ebxh bh
|
||||
#define ecxl cl
|
||||
#define ecxh ch
|
||||
#define edxl dl
|
||||
#define edxh dh
|
||||
|
||||
#define _h(reg) reg##h
|
||||
#define h(reg) _h(reg)
|
||||
|
||||
#define _l(reg) reg##l
|
||||
#define l(reg) _l(reg)
|
||||
|
||||
// This macro takes a 32-bit word representing a column and uses
|
||||
// each of its four bytes to index into four tables of 256 32-bit
|
||||
// words to obtain values that are then xored into the appropriate
|
||||
// output registers r0, r1, r4 or r5.
|
||||
|
||||
// Parameters:
|
||||
// table table base address
|
||||
// %1 out_state[0]
|
||||
// %2 out_state[1]
|
||||
// %3 out_state[2]
|
||||
// %4 out_state[3]
|
||||
// idx input register for the round (destroyed)
|
||||
// tmp scratch register for the round
|
||||
// sched key schedule
|
||||
|
||||
#define do_col(table, a1,a2,a3,a4, idx, tmp) \
|
||||
movzx %l(idx),%tmp; \
|
||||
xor table(,%tmp,4),%a1; \
|
||||
movzx %h(idx),%tmp; \
|
||||
shr $16,%idx; \
|
||||
xor table+tlen(,%tmp,4),%a2; \
|
||||
movzx %l(idx),%tmp; \
|
||||
movzx %h(idx),%idx; \
|
||||
xor table+2*tlen(,%tmp,4),%a3; \
|
||||
xor table+3*tlen(,%idx,4),%a4;
|
||||
|
||||
// initialise output registers from the key schedule
|
||||
// NB1: original value of a3 is in idx on exit
|
||||
// NB2: original values of a1,a2,a4 aren't used
|
||||
#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
|
||||
mov 0 sched,%a1; \
|
||||
movzx %l(idx),%tmp; \
|
||||
mov 12 sched,%a2; \
|
||||
xor table(,%tmp,4),%a1; \
|
||||
mov 4 sched,%a4; \
|
||||
movzx %h(idx),%tmp; \
|
||||
shr $16,%idx; \
|
||||
xor table+tlen(,%tmp,4),%a2; \
|
||||
movzx %l(idx),%tmp; \
|
||||
movzx %h(idx),%idx; \
|
||||
xor table+3*tlen(,%idx,4),%a4; \
|
||||
mov %a3,%idx; \
|
||||
mov 8 sched,%a3; \
|
||||
xor table+2*tlen(,%tmp,4),%a3;
|
||||
|
||||
// initialise output registers from the key schedule
|
||||
// NB1: original value of a3 is in idx on exit
|
||||
// NB2: original values of a1,a2,a4 aren't used
|
||||
#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
|
||||
mov 0 sched,%a1; \
|
||||
movzx %l(idx),%tmp; \
|
||||
mov 4 sched,%a2; \
|
||||
xor table(,%tmp,4),%a1; \
|
||||
mov 12 sched,%a4; \
|
||||
movzx %h(idx),%tmp; \
|
||||
shr $16,%idx; \
|
||||
xor table+tlen(,%tmp,4),%a2; \
|
||||
movzx %l(idx),%tmp; \
|
||||
movzx %h(idx),%idx; \
|
||||
xor table+3*tlen(,%idx,4),%a4; \
|
||||
mov %a3,%idx; \
|
||||
mov 8 sched,%a3; \
|
||||
xor table+2*tlen(,%tmp,4),%a3;
|
||||
|
||||
|
||||
// original Gladman had conditional saves to MMX regs.
|
||||
#define save(a1, a2) \
|
||||
mov %a2,4*a1(%esp)
|
||||
|
||||
#define restore(a1, a2) \
|
||||
mov 4*a2(%esp),%a1
|
||||
|
||||
// These macros perform a forward encryption cycle. They are entered with
|
||||
// the first previous round column values in r0,r1,r4,r5 and
|
||||
// exit with the final values in the same registers, using stack
|
||||
// for temporary storage.
|
||||
|
||||
// round column values
|
||||
// on entry: r0,r1,r4,r5
|
||||
// on exit: r2,r1,r4,r5
|
||||
#define fwd_rnd1(arg, table) \
|
||||
save (0,r1); \
|
||||
save (1,r5); \
|
||||
\
|
||||
/* compute new column values */ \
|
||||
do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \
|
||||
do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \
|
||||
restore(r0,0); \
|
||||
do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \
|
||||
restore(r0,1); \
|
||||
do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */
|
||||
|
||||
// round column values
|
||||
// on entry: r2,r1,r4,r5
|
||||
// on exit: r0,r1,r4,r5
|
||||
#define fwd_rnd2(arg, table) \
|
||||
save (0,r1); \
|
||||
save (1,r5); \
|
||||
\
|
||||
/* compute new column values */ \
|
||||
do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \
|
||||
do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \
|
||||
restore(r2,0); \
|
||||
do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \
|
||||
restore(r2,1); \
|
||||
do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */
|
||||
|
||||
// These macros performs an inverse encryption cycle. They are entered with
|
||||
// the first previous round column values in r0,r1,r4,r5 and
|
||||
// exit with the final values in the same registers, using stack
|
||||
// for temporary storage
|
||||
|
||||
// round column values
|
||||
// on entry: r0,r1,r4,r5
|
||||
// on exit: r2,r1,r4,r5
|
||||
#define inv_rnd1(arg, table) \
|
||||
save (0,r1); \
|
||||
save (1,r5); \
|
||||
\
|
||||
/* compute new column values */ \
|
||||
do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \
|
||||
do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \
|
||||
restore(r0,0); \
|
||||
do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \
|
||||
restore(r0,1); \
|
||||
do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */
|
||||
|
||||
// round column values
|
||||
// on entry: r2,r1,r4,r5
|
||||
// on exit: r0,r1,r4,r5
|
||||
#define inv_rnd2(arg, table) \
|
||||
save (0,r1); \
|
||||
save (1,r5); \
|
||||
\
|
||||
/* compute new column values */ \
|
||||
do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \
|
||||
do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \
|
||||
restore(r2,0); \
|
||||
do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \
|
||||
restore(r2,1); \
|
||||
do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */
|
||||
|
||||
// AES (Rijndael) Encryption Subroutine
|
||||
/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
|
||||
|
||||
.extern crypto_ft_tab
|
||||
.extern crypto_fl_tab
|
||||
|
||||
ENTRY(aes_enc_blk)
|
||||
push %ebp
|
||||
mov ctx(%esp),%ebp
|
||||
|
||||
// CAUTION: the order and the values used in these assigns
|
||||
// rely on the register mappings
|
||||
|
||||
1: push %ebx
|
||||
mov in_blk+4(%esp),%r2
|
||||
push %esi
|
||||
mov klen(%ebp),%r3 // key size
|
||||
push %edi
|
||||
#if ekey != 0
|
||||
lea ekey(%ebp),%ebp // key pointer
|
||||
#endif
|
||||
|
||||
// input four columns and xor in first round key
|
||||
|
||||
mov (%r2),%r0
|
||||
mov 4(%r2),%r1
|
||||
mov 8(%r2),%r4
|
||||
mov 12(%r2),%r5
|
||||
xor (%ebp),%r0
|
||||
xor 4(%ebp),%r1
|
||||
xor 8(%ebp),%r4
|
||||
xor 12(%ebp),%r5
|
||||
|
||||
sub $8,%esp // space for register saves on stack
|
||||
add $16,%ebp // increment to next round key
|
||||
cmp $24,%r3
|
||||
jb 4f // 10 rounds for 128-bit key
|
||||
lea 32(%ebp),%ebp
|
||||
je 3f // 12 rounds for 192-bit key
|
||||
lea 32(%ebp),%ebp
|
||||
|
||||
2: fwd_rnd1( -64(%ebp), crypto_ft_tab) // 14 rounds for 256-bit key
|
||||
fwd_rnd2( -48(%ebp), crypto_ft_tab)
|
||||
3: fwd_rnd1( -32(%ebp), crypto_ft_tab) // 12 rounds for 192-bit key
|
||||
fwd_rnd2( -16(%ebp), crypto_ft_tab)
|
||||
4: fwd_rnd1( (%ebp), crypto_ft_tab) // 10 rounds for 128-bit key
|
||||
fwd_rnd2( +16(%ebp), crypto_ft_tab)
|
||||
fwd_rnd1( +32(%ebp), crypto_ft_tab)
|
||||
fwd_rnd2( +48(%ebp), crypto_ft_tab)
|
||||
fwd_rnd1( +64(%ebp), crypto_ft_tab)
|
||||
fwd_rnd2( +80(%ebp), crypto_ft_tab)
|
||||
fwd_rnd1( +96(%ebp), crypto_ft_tab)
|
||||
fwd_rnd2(+112(%ebp), crypto_ft_tab)
|
||||
fwd_rnd1(+128(%ebp), crypto_ft_tab)
|
||||
fwd_rnd2(+144(%ebp), crypto_fl_tab) // last round uses a different table
|
||||
|
||||
// move final values to the output array. CAUTION: the
|
||||
// order of these assigns rely on the register mappings
|
||||
|
||||
add $8,%esp
|
||||
mov out_blk+12(%esp),%ebp
|
||||
mov %r5,12(%ebp)
|
||||
pop %edi
|
||||
mov %r4,8(%ebp)
|
||||
pop %esi
|
||||
mov %r1,4(%ebp)
|
||||
pop %ebx
|
||||
mov %r0,(%ebp)
|
||||
pop %ebp
|
||||
ret
|
||||
ENDPROC(aes_enc_blk)
|
||||
|
||||
// AES (Rijndael) Decryption Subroutine
|
||||
/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
|
||||
|
||||
.extern crypto_it_tab
|
||||
.extern crypto_il_tab
|
||||
|
||||
ENTRY(aes_dec_blk)
|
||||
push %ebp
|
||||
mov ctx(%esp),%ebp
|
||||
|
||||
// CAUTION: the order and the values used in these assigns
|
||||
// rely on the register mappings
|
||||
|
||||
1: push %ebx
|
||||
mov in_blk+4(%esp),%r2
|
||||
push %esi
|
||||
mov klen(%ebp),%r3 // key size
|
||||
push %edi
|
||||
#if dkey != 0
|
||||
lea dkey(%ebp),%ebp // key pointer
|
||||
#endif
|
||||
|
||||
// input four columns and xor in first round key
|
||||
|
||||
mov (%r2),%r0
|
||||
mov 4(%r2),%r1
|
||||
mov 8(%r2),%r4
|
||||
mov 12(%r2),%r5
|
||||
xor (%ebp),%r0
|
||||
xor 4(%ebp),%r1
|
||||
xor 8(%ebp),%r4
|
||||
xor 12(%ebp),%r5
|
||||
|
||||
sub $8,%esp // space for register saves on stack
|
||||
add $16,%ebp // increment to next round key
|
||||
cmp $24,%r3
|
||||
jb 4f // 10 rounds for 128-bit key
|
||||
lea 32(%ebp),%ebp
|
||||
je 3f // 12 rounds for 192-bit key
|
||||
lea 32(%ebp),%ebp
|
||||
|
||||
2: inv_rnd1( -64(%ebp), crypto_it_tab) // 14 rounds for 256-bit key
|
||||
inv_rnd2( -48(%ebp), crypto_it_tab)
|
||||
3: inv_rnd1( -32(%ebp), crypto_it_tab) // 12 rounds for 192-bit key
|
||||
inv_rnd2( -16(%ebp), crypto_it_tab)
|
||||
4: inv_rnd1( (%ebp), crypto_it_tab) // 10 rounds for 128-bit key
|
||||
inv_rnd2( +16(%ebp), crypto_it_tab)
|
||||
inv_rnd1( +32(%ebp), crypto_it_tab)
|
||||
inv_rnd2( +48(%ebp), crypto_it_tab)
|
||||
inv_rnd1( +64(%ebp), crypto_it_tab)
|
||||
inv_rnd2( +80(%ebp), crypto_it_tab)
|
||||
inv_rnd1( +96(%ebp), crypto_it_tab)
|
||||
inv_rnd2(+112(%ebp), crypto_it_tab)
|
||||
inv_rnd1(+128(%ebp), crypto_it_tab)
|
||||
inv_rnd2(+144(%ebp), crypto_il_tab) // last round uses a different table
|
||||
|
||||
// move final values to the output array. CAUTION: the
|
||||
// order of these assigns rely on the register mappings
|
||||
|
||||
add $8,%esp
|
||||
mov out_blk+12(%esp),%ebp
|
||||
mov %r5,12(%ebp)
|
||||
pop %edi
|
||||
mov %r4,8(%ebp)
|
||||
pop %esi
|
||||
mov %r1,4(%ebp)
|
||||
pop %ebx
|
||||
mov %r0,(%ebp)
|
||||
pop %ebp
|
||||
ret
|
||||
ENDPROC(aes_dec_blk)
|
||||
188
arch/x86/crypto/aes-x86_64-asm_64.S
Normal file
188
arch/x86/crypto/aes-x86_64-asm_64.S
Normal file
|
|
@ -0,0 +1,188 @@
|
|||
/* AES (Rijndael) implementation (FIPS PUB 197) for x86_64
|
||||
*
|
||||
* Copyright (C) 2005 Andreas Steinmetz, <ast@domdv.de>
|
||||
*
|
||||
* License:
|
||||
* This code can be distributed under the terms of the GNU General Public
|
||||
* License (GPL) Version 2 provided that the above header down to and
|
||||
* including this sentence is retained in full.
|
||||
*/
|
||||
|
||||
.extern crypto_ft_tab
|
||||
.extern crypto_it_tab
|
||||
.extern crypto_fl_tab
|
||||
.extern crypto_il_tab
|
||||
|
||||
.text
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
#define R1 %rax
|
||||
#define R1E %eax
|
||||
#define R1X %ax
|
||||
#define R1H %ah
|
||||
#define R1L %al
|
||||
#define R2 %rbx
|
||||
#define R2E %ebx
|
||||
#define R2X %bx
|
||||
#define R2H %bh
|
||||
#define R2L %bl
|
||||
#define R3 %rcx
|
||||
#define R3E %ecx
|
||||
#define R3X %cx
|
||||
#define R3H %ch
|
||||
#define R3L %cl
|
||||
#define R4 %rdx
|
||||
#define R4E %edx
|
||||
#define R4X %dx
|
||||
#define R4H %dh
|
||||
#define R4L %dl
|
||||
#define R5 %rsi
|
||||
#define R5E %esi
|
||||
#define R6 %rdi
|
||||
#define R6E %edi
|
||||
#define R7 %rbp
|
||||
#define R7E %ebp
|
||||
#define R8 %r8
|
||||
#define R9 %r9
|
||||
#define R10 %r10
|
||||
#define R11 %r11
|
||||
|
||||
#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
|
||||
ENTRY(FUNC); \
|
||||
movq r1,r2; \
|
||||
movq r3,r4; \
|
||||
leaq KEY+48(r8),r9; \
|
||||
movq r10,r11; \
|
||||
movl (r7),r5 ## E; \
|
||||
movl 4(r7),r1 ## E; \
|
||||
movl 8(r7),r6 ## E; \
|
||||
movl 12(r7),r7 ## E; \
|
||||
movl 480(r8),r10 ## E; \
|
||||
xorl -48(r9),r5 ## E; \
|
||||
xorl -44(r9),r1 ## E; \
|
||||
xorl -40(r9),r6 ## E; \
|
||||
xorl -36(r9),r7 ## E; \
|
||||
cmpl $24,r10 ## E; \
|
||||
jb B128; \
|
||||
leaq 32(r9),r9; \
|
||||
je B192; \
|
||||
leaq 32(r9),r9;
|
||||
|
||||
#define epilogue(FUNC,r1,r2,r3,r4,r5,r6,r7,r8,r9) \
|
||||
movq r1,r2; \
|
||||
movq r3,r4; \
|
||||
movl r5 ## E,(r9); \
|
||||
movl r6 ## E,4(r9); \
|
||||
movl r7 ## E,8(r9); \
|
||||
movl r8 ## E,12(r9); \
|
||||
ret; \
|
||||
ENDPROC(FUNC);
|
||||
|
||||
#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
|
||||
movzbl r2 ## H,r5 ## E; \
|
||||
movzbl r2 ## L,r6 ## E; \
|
||||
movl TAB+1024(,r5,4),r5 ## E;\
|
||||
movw r4 ## X,r2 ## X; \
|
||||
movl TAB(,r6,4),r6 ## E; \
|
||||
roll $16,r2 ## E; \
|
||||
shrl $16,r4 ## E; \
|
||||
movzbl r4 ## H,r7 ## E; \
|
||||
movzbl r4 ## L,r4 ## E; \
|
||||
xorl OFFSET(r8),ra ## E; \
|
||||
xorl OFFSET+4(r8),rb ## E; \
|
||||
xorl TAB+3072(,r7,4),r5 ## E;\
|
||||
xorl TAB+2048(,r4,4),r6 ## E;\
|
||||
movzbl r1 ## L,r7 ## E; \
|
||||
movzbl r1 ## H,r4 ## E; \
|
||||
movl TAB+1024(,r4,4),r4 ## E;\
|
||||
movw r3 ## X,r1 ## X; \
|
||||
roll $16,r1 ## E; \
|
||||
shrl $16,r3 ## E; \
|
||||
xorl TAB(,r7,4),r5 ## E; \
|
||||
movzbl r3 ## H,r7 ## E; \
|
||||
movzbl r3 ## L,r3 ## E; \
|
||||
xorl TAB+3072(,r7,4),r4 ## E;\
|
||||
xorl TAB+2048(,r3,4),r5 ## E;\
|
||||
movzbl r1 ## H,r7 ## E; \
|
||||
movzbl r1 ## L,r3 ## E; \
|
||||
shrl $16,r1 ## E; \
|
||||
xorl TAB+3072(,r7,4),r6 ## E;\
|
||||
movl TAB+2048(,r3,4),r3 ## E;\
|
||||
movzbl r1 ## H,r7 ## E; \
|
||||
movzbl r1 ## L,r1 ## E; \
|
||||
xorl TAB+1024(,r7,4),r6 ## E;\
|
||||
xorl TAB(,r1,4),r3 ## E; \
|
||||
movzbl r2 ## H,r1 ## E; \
|
||||
movzbl r2 ## L,r7 ## E; \
|
||||
shrl $16,r2 ## E; \
|
||||
xorl TAB+3072(,r1,4),r3 ## E;\
|
||||
xorl TAB+2048(,r7,4),r4 ## E;\
|
||||
movzbl r2 ## H,r1 ## E; \
|
||||
movzbl r2 ## L,r2 ## E; \
|
||||
xorl OFFSET+8(r8),rc ## E; \
|
||||
xorl OFFSET+12(r8),rd ## E; \
|
||||
xorl TAB+1024(,r1,4),r3 ## E;\
|
||||
xorl TAB(,r2,4),r4 ## E;
|
||||
|
||||
#define move_regs(r1,r2,r3,r4) \
|
||||
movl r3 ## E,r1 ## E; \
|
||||
movl r4 ## E,r2 ## E;
|
||||
|
||||
#define entry(FUNC,KEY,B128,B192) \
|
||||
prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
|
||||
|
||||
#define return(FUNC) epilogue(FUNC,R8,R2,R9,R7,R5,R6,R3,R4,R11)
|
||||
|
||||
#define encrypt_round(TAB,OFFSET) \
|
||||
round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
|
||||
move_regs(R1,R2,R5,R6)
|
||||
|
||||
#define encrypt_final(TAB,OFFSET) \
|
||||
round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
|
||||
|
||||
#define decrypt_round(TAB,OFFSET) \
|
||||
round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \
|
||||
move_regs(R1,R2,R5,R6)
|
||||
|
||||
#define decrypt_final(TAB,OFFSET) \
|
||||
round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)
|
||||
|
||||
/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */
|
||||
|
||||
entry(aes_enc_blk,0,.Le128,.Le192)
|
||||
encrypt_round(crypto_ft_tab,-96)
|
||||
encrypt_round(crypto_ft_tab,-80)
|
||||
.Le192: encrypt_round(crypto_ft_tab,-64)
|
||||
encrypt_round(crypto_ft_tab,-48)
|
||||
.Le128: encrypt_round(crypto_ft_tab,-32)
|
||||
encrypt_round(crypto_ft_tab,-16)
|
||||
encrypt_round(crypto_ft_tab, 0)
|
||||
encrypt_round(crypto_ft_tab, 16)
|
||||
encrypt_round(crypto_ft_tab, 32)
|
||||
encrypt_round(crypto_ft_tab, 48)
|
||||
encrypt_round(crypto_ft_tab, 64)
|
||||
encrypt_round(crypto_ft_tab, 80)
|
||||
encrypt_round(crypto_ft_tab, 96)
|
||||
encrypt_final(crypto_fl_tab,112)
|
||||
return(aes_enc_blk)
|
||||
|
||||
/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */
|
||||
|
||||
entry(aes_dec_blk,240,.Ld128,.Ld192)
|
||||
decrypt_round(crypto_it_tab,-96)
|
||||
decrypt_round(crypto_it_tab,-80)
|
||||
.Ld192: decrypt_round(crypto_it_tab,-64)
|
||||
decrypt_round(crypto_it_tab,-48)
|
||||
.Ld128: decrypt_round(crypto_it_tab,-32)
|
||||
decrypt_round(crypto_it_tab,-16)
|
||||
decrypt_round(crypto_it_tab, 0)
|
||||
decrypt_round(crypto_it_tab, 16)
|
||||
decrypt_round(crypto_it_tab, 32)
|
||||
decrypt_round(crypto_it_tab, 48)
|
||||
decrypt_round(crypto_it_tab, 64)
|
||||
decrypt_round(crypto_it_tab, 80)
|
||||
decrypt_round(crypto_it_tab, 96)
|
||||
decrypt_final(crypto_il_tab,112)
|
||||
return(aes_dec_blk)
|
||||
580
arch/x86/crypto/aes_ctrby8_avx-x86_64.S
Normal file
580
arch/x86/crypto/aes_ctrby8_avx-x86_64.S
Normal file
|
|
@ -0,0 +1,580 @@
|
|||
/*
|
||||
* Implement AES CTR mode by8 optimization with AVX instructions. (x86_64)
|
||||
*
|
||||
* This is AES128/192/256 CTR mode optimization implementation. It requires
|
||||
* the support of Intel(R) AESNI and AVX instructions.
|
||||
*
|
||||
* This work was inspired by the AES CTR mode optimization published
|
||||
* in Intel Optimized IPSEC Cryptograhpic library.
|
||||
* Additional information on it can be found at:
|
||||
* http://downloadcenter.intel.com/Detail_Desc.aspx?agr=Y&DwnldID=22972
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* James Guilford <james.guilford@intel.com>
|
||||
* Sean Gulley <sean.m.gulley@intel.com>
|
||||
* Chandramouli Narayanan <mouli@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/inst.h>
|
||||
|
||||
#define CONCAT(a,b) a##b
|
||||
#define VMOVDQ vmovdqu
|
||||
|
||||
#define xdata0 %xmm0
|
||||
#define xdata1 %xmm1
|
||||
#define xdata2 %xmm2
|
||||
#define xdata3 %xmm3
|
||||
#define xdata4 %xmm4
|
||||
#define xdata5 %xmm5
|
||||
#define xdata6 %xmm6
|
||||
#define xdata7 %xmm7
|
||||
#define xcounter %xmm8
|
||||
#define xbyteswap %xmm9
|
||||
#define xkey0 %xmm10
|
||||
#define xkey4 %xmm11
|
||||
#define xkey8 %xmm12
|
||||
#define xkey12 %xmm13
|
||||
#define xkeyA %xmm14
|
||||
#define xkeyB %xmm15
|
||||
|
||||
#define p_in %rdi
|
||||
#define p_iv %rsi
|
||||
#define p_keys %rdx
|
||||
#define p_out %rcx
|
||||
#define num_bytes %r8
|
||||
|
||||
#define tmp %r10
|
||||
#define DDQ(i) CONCAT(ddq_add_,i)
|
||||
#define XMM(i) CONCAT(%xmm, i)
|
||||
#define DDQ_DATA 0
|
||||
#define XDATA 1
|
||||
#define KEY_128 1
|
||||
#define KEY_192 2
|
||||
#define KEY_256 3
|
||||
|
||||
.section .rodata
|
||||
.align 16
|
||||
|
||||
byteswap_const:
|
||||
.octa 0x000102030405060708090A0B0C0D0E0F
|
||||
ddq_low_msk:
|
||||
.octa 0x0000000000000000FFFFFFFFFFFFFFFF
|
||||
ddq_high_add_1:
|
||||
.octa 0x00000000000000010000000000000000
|
||||
ddq_add_1:
|
||||
.octa 0x00000000000000000000000000000001
|
||||
ddq_add_2:
|
||||
.octa 0x00000000000000000000000000000002
|
||||
ddq_add_3:
|
||||
.octa 0x00000000000000000000000000000003
|
||||
ddq_add_4:
|
||||
.octa 0x00000000000000000000000000000004
|
||||
ddq_add_5:
|
||||
.octa 0x00000000000000000000000000000005
|
||||
ddq_add_6:
|
||||
.octa 0x00000000000000000000000000000006
|
||||
ddq_add_7:
|
||||
.octa 0x00000000000000000000000000000007
|
||||
ddq_add_8:
|
||||
.octa 0x00000000000000000000000000000008
|
||||
|
||||
.text
|
||||
|
||||
/* generate a unique variable for ddq_add_x */
|
||||
|
||||
.macro setddq n
|
||||
var_ddq_add = DDQ(\n)
|
||||
.endm
|
||||
|
||||
/* generate a unique variable for xmm register */
|
||||
.macro setxdata n
|
||||
var_xdata = XMM(\n)
|
||||
.endm
|
||||
|
||||
/* club the numeric 'id' to the symbol 'name' */
|
||||
|
||||
.macro club name, id
|
||||
.altmacro
|
||||
.if \name == DDQ_DATA
|
||||
setddq %\id
|
||||
.elseif \name == XDATA
|
||||
setxdata %\id
|
||||
.endif
|
||||
.noaltmacro
|
||||
.endm
|
||||
|
||||
/*
|
||||
* do_aes num_in_par load_keys key_len
|
||||
* This increments p_in, but not p_out
|
||||
*/
|
||||
.macro do_aes b, k, key_len
|
||||
.set by, \b
|
||||
.set load_keys, \k
|
||||
.set klen, \key_len
|
||||
|
||||
.if (load_keys)
|
||||
vmovdqa 0*16(p_keys), xkey0
|
||||
.endif
|
||||
|
||||
vpshufb xbyteswap, xcounter, xdata0
|
||||
|
||||
.set i, 1
|
||||
.rept (by - 1)
|
||||
club DDQ_DATA, i
|
||||
club XDATA, i
|
||||
vpaddq var_ddq_add(%rip), xcounter, var_xdata
|
||||
vptest ddq_low_msk(%rip), var_xdata
|
||||
jnz 1f
|
||||
vpaddq ddq_high_add_1(%rip), var_xdata, var_xdata
|
||||
vpaddq ddq_high_add_1(%rip), xcounter, xcounter
|
||||
1:
|
||||
vpshufb xbyteswap, var_xdata, var_xdata
|
||||
.set i, (i +1)
|
||||
.endr
|
||||
|
||||
vmovdqa 1*16(p_keys), xkeyA
|
||||
|
||||
vpxor xkey0, xdata0, xdata0
|
||||
club DDQ_DATA, by
|
||||
vpaddq var_ddq_add(%rip), xcounter, xcounter
|
||||
vptest ddq_low_msk(%rip), xcounter
|
||||
jnz 1f
|
||||
vpaddq ddq_high_add_1(%rip), xcounter, xcounter
|
||||
1:
|
||||
|
||||
.set i, 1
|
||||
.rept (by - 1)
|
||||
club XDATA, i
|
||||
vpxor xkey0, var_xdata, var_xdata
|
||||
.set i, (i +1)
|
||||
.endr
|
||||
|
||||
vmovdqa 2*16(p_keys), xkeyB
|
||||
|
||||
.set i, 0
|
||||
.rept by
|
||||
club XDATA, i
|
||||
vaesenc xkeyA, var_xdata, var_xdata /* key 1 */
|
||||
.set i, (i +1)
|
||||
.endr
|
||||
|
||||
.if (klen == KEY_128)
|
||||
.if (load_keys)
|
||||
vmovdqa 3*16(p_keys), xkey4
|
||||
.endif
|
||||
.else
|
||||
vmovdqa 3*16(p_keys), xkeyA
|
||||
.endif
|
||||
|
||||
.set i, 0
|
||||
.rept by
|
||||
club XDATA, i
|
||||
vaesenc xkeyB, var_xdata, var_xdata /* key 2 */
|
||||
.set i, (i +1)
|
||||
.endr
|
||||
|
||||
add $(16*by), p_in
|
||||
|
||||
.if (klen == KEY_128)
|
||||
vmovdqa 4*16(p_keys), xkeyB
|
||||
.else
|
||||
.if (load_keys)
|
||||
vmovdqa 4*16(p_keys), xkey4
|
||||
.endif
|
||||
.endif
|
||||
|
||||
.set i, 0
|
||||
.rept by
|
||||
club XDATA, i
|
||||
/* key 3 */
|
||||
.if (klen == KEY_128)
|
||||
vaesenc xkey4, var_xdata, var_xdata
|
||||
.else
|
||||
vaesenc xkeyA, var_xdata, var_xdata
|
||||
.endif
|
||||
.set i, (i +1)
|
||||
.endr
|
||||
|
||||
vmovdqa 5*16(p_keys), xkeyA
|
||||
|
||||
.set i, 0
|
||||
.rept by
|
||||
club XDATA, i
|
||||
/* key 4 */
|
||||
.if (klen == KEY_128)
|
||||
vaesenc xkeyB, var_xdata, var_xdata
|
||||
.else
|
||||
vaesenc xkey4, var_xdata, var_xdata
|
||||
.endif
|
||||
.set i, (i +1)
|
||||
.endr
|
||||
|
||||
.if (klen == KEY_128)
|
||||
.if (load_keys)
|
||||
vmovdqa 6*16(p_keys), xkey8
|
||||
.endif
|
||||
.else
|
||||
vmovdqa 6*16(p_keys), xkeyB
|
||||
.endif
|
||||
|
||||
.set i, 0
|
||||
.rept by
|
||||
club XDATA, i
|
||||
vaesenc xkeyA, var_xdata, var_xdata /* key 5 */
|
||||
.set i, (i +1)
|
||||
.endr
|
||||
|
||||
vmovdqa 7*16(p_keys), xkeyA
|
||||
|
||||
.set i, 0
|
||||
.rept by
|
||||
club XDATA, i
|
||||
/* key 6 */
|
||||
.if (klen == KEY_128)
|
||||
vaesenc xkey8, var_xdata, var_xdata
|
||||
.else
|
||||
vaesenc xkeyB, var_xdata, var_xdata
|
||||
.endif
|
||||
.set i, (i +1)
|
||||
.endr
|
||||
|
||||
.if (klen == KEY_128)
|
||||
vmovdqa 8*16(p_keys), xkeyB
|
||||
.else
|
||||
.if (load_keys)
|
||||
vmovdqa 8*16(p_keys), xkey8
|
||||
.endif
|
||||
.endif
|
||||
|
||||
.set i, 0
|
||||
.rept by
|
||||
club XDATA, i
|
||||
vaesenc xkeyA, var_xdata, var_xdata /* key 7 */
|
||||
.set i, (i +1)
|
||||
.endr
|
||||
|
||||
.if (klen == KEY_128)
|
||||
.if (load_keys)
|
||||
vmovdqa 9*16(p_keys), xkey12
|
||||
.endif
|
||||
.else
|
||||
vmovdqa 9*16(p_keys), xkeyA
|
||||
.endif
|
||||
|
||||
.set i, 0
|
||||
.rept by
|
||||
club XDATA, i
|
||||
/* key 8 */
|
||||
.if (klen == KEY_128)
|
||||
vaesenc xkeyB, var_xdata, var_xdata
|
||||
.else
|
||||
vaesenc xkey8, var_xdata, var_xdata
|
||||
.endif
|
||||
.set i, (i +1)
|
||||
.endr
|
||||
|
||||
vmovdqa 10*16(p_keys), xkeyB
|
||||
|
||||
.set i, 0
|
||||
.rept by
|
||||
club XDATA, i
|
||||
/* key 9 */
|
||||
.if (klen == KEY_128)
|
||||
vaesenc xkey12, var_xdata, var_xdata
|
||||
.else
|
||||
vaesenc xkeyA, var_xdata, var_xdata
|
||||
.endif
|
||||
.set i, (i +1)
|
||||
.endr
|
||||
|
||||
.if (klen != KEY_128)
|
||||
vmovdqa 11*16(p_keys), xkeyA
|
||||
.endif
|
||||
|
||||
.set i, 0
|
||||
.rept by
|
||||
club XDATA, i
|
||||
/* key 10 */
|
||||
.if (klen == KEY_128)
|
||||
vaesenclast xkeyB, var_xdata, var_xdata
|
||||
.else
|
||||
vaesenc xkeyB, var_xdata, var_xdata
|
||||
.endif
|
||||
.set i, (i +1)
|
||||
.endr
|
||||
|
||||
.if (klen != KEY_128)
|
||||
.if (load_keys)
|
||||
vmovdqa 12*16(p_keys), xkey12
|
||||
.endif
|
||||
|
||||
.set i, 0
|
||||
.rept by
|
||||
club XDATA, i
|
||||
vaesenc xkeyA, var_xdata, var_xdata /* key 11 */
|
||||
.set i, (i +1)
|
||||
.endr
|
||||
|
||||
.if (klen == KEY_256)
|
||||
vmovdqa 13*16(p_keys), xkeyA
|
||||
.endif
|
||||
|
||||
.set i, 0
|
||||
.rept by
|
||||
club XDATA, i
|
||||
.if (klen == KEY_256)
|
||||
/* key 12 */
|
||||
vaesenc xkey12, var_xdata, var_xdata
|
||||
.else
|
||||
vaesenclast xkey12, var_xdata, var_xdata
|
||||
.endif
|
||||
.set i, (i +1)
|
||||
.endr
|
||||
|
||||
.if (klen == KEY_256)
|
||||
vmovdqa 14*16(p_keys), xkeyB
|
||||
|
||||
.set i, 0
|
||||
.rept by
|
||||
club XDATA, i
|
||||
/* key 13 */
|
||||
vaesenc xkeyA, var_xdata, var_xdata
|
||||
.set i, (i +1)
|
||||
.endr
|
||||
|
||||
.set i, 0
|
||||
.rept by
|
||||
club XDATA, i
|
||||
/* key 14 */
|
||||
vaesenclast xkeyB, var_xdata, var_xdata
|
||||
.set i, (i +1)
|
||||
.endr
|
||||
.endif
|
||||
.endif
|
||||
|
||||
.set i, 0
|
||||
.rept (by / 2)
|
||||
.set j, (i+1)
|
||||
VMOVDQ (i*16 - 16*by)(p_in), xkeyA
|
||||
VMOVDQ (j*16 - 16*by)(p_in), xkeyB
|
||||
club XDATA, i
|
||||
vpxor xkeyA, var_xdata, var_xdata
|
||||
club XDATA, j
|
||||
vpxor xkeyB, var_xdata, var_xdata
|
||||
.set i, (i+2)
|
||||
.endr
|
||||
|
||||
.if (i < by)
|
||||
VMOVDQ (i*16 - 16*by)(p_in), xkeyA
|
||||
club XDATA, i
|
||||
vpxor xkeyA, var_xdata, var_xdata
|
||||
.endif
|
||||
|
||||
.set i, 0
|
||||
.rept by
|
||||
club XDATA, i
|
||||
VMOVDQ var_xdata, i*16(p_out)
|
||||
.set i, (i+1)
|
||||
.endr
|
||||
.endm
|
||||
|
||||
.macro do_aes_load val, key_len
|
||||
do_aes \val, 1, \key_len
|
||||
.endm
|
||||
|
||||
.macro do_aes_noload val, key_len
|
||||
do_aes \val, 0, \key_len
|
||||
.endm
|
||||
|
||||
/* main body of aes ctr load */
|
||||
|
||||
.macro do_aes_ctrmain key_len
|
||||
cmp $16, num_bytes
|
||||
jb .Ldo_return2\key_len
|
||||
|
||||
vmovdqa byteswap_const(%rip), xbyteswap
|
||||
vmovdqu (p_iv), xcounter
|
||||
vpshufb xbyteswap, xcounter, xcounter
|
||||
|
||||
mov num_bytes, tmp
|
||||
and $(7*16), tmp
|
||||
jz .Lmult_of_8_blks\key_len
|
||||
|
||||
/* 1 <= tmp <= 7 */
|
||||
cmp $(4*16), tmp
|
||||
jg .Lgt4\key_len
|
||||
je .Leq4\key_len
|
||||
|
||||
.Llt4\key_len:
|
||||
cmp $(2*16), tmp
|
||||
jg .Leq3\key_len
|
||||
je .Leq2\key_len
|
||||
|
||||
.Leq1\key_len:
|
||||
do_aes_load 1, \key_len
|
||||
add $(1*16), p_out
|
||||
and $(~7*16), num_bytes
|
||||
jz .Ldo_return2\key_len
|
||||
jmp .Lmain_loop2\key_len
|
||||
|
||||
.Leq2\key_len:
|
||||
do_aes_load 2, \key_len
|
||||
add $(2*16), p_out
|
||||
and $(~7*16), num_bytes
|
||||
jz .Ldo_return2\key_len
|
||||
jmp .Lmain_loop2\key_len
|
||||
|
||||
|
||||
.Leq3\key_len:
|
||||
do_aes_load 3, \key_len
|
||||
add $(3*16), p_out
|
||||
and $(~7*16), num_bytes
|
||||
jz .Ldo_return2\key_len
|
||||
jmp .Lmain_loop2\key_len
|
||||
|
||||
.Leq4\key_len:
|
||||
do_aes_load 4, \key_len
|
||||
add $(4*16), p_out
|
||||
and $(~7*16), num_bytes
|
||||
jz .Ldo_return2\key_len
|
||||
jmp .Lmain_loop2\key_len
|
||||
|
||||
.Lgt4\key_len:
|
||||
cmp $(6*16), tmp
|
||||
jg .Leq7\key_len
|
||||
je .Leq6\key_len
|
||||
|
||||
.Leq5\key_len:
|
||||
do_aes_load 5, \key_len
|
||||
add $(5*16), p_out
|
||||
and $(~7*16), num_bytes
|
||||
jz .Ldo_return2\key_len
|
||||
jmp .Lmain_loop2\key_len
|
||||
|
||||
.Leq6\key_len:
|
||||
do_aes_load 6, \key_len
|
||||
add $(6*16), p_out
|
||||
and $(~7*16), num_bytes
|
||||
jz .Ldo_return2\key_len
|
||||
jmp .Lmain_loop2\key_len
|
||||
|
||||
.Leq7\key_len:
|
||||
do_aes_load 7, \key_len
|
||||
add $(7*16), p_out
|
||||
and $(~7*16), num_bytes
|
||||
jz .Ldo_return2\key_len
|
||||
jmp .Lmain_loop2\key_len
|
||||
|
||||
.Lmult_of_8_blks\key_len:
|
||||
.if (\key_len != KEY_128)
|
||||
vmovdqa 0*16(p_keys), xkey0
|
||||
vmovdqa 4*16(p_keys), xkey4
|
||||
vmovdqa 8*16(p_keys), xkey8
|
||||
vmovdqa 12*16(p_keys), xkey12
|
||||
.else
|
||||
vmovdqa 0*16(p_keys), xkey0
|
||||
vmovdqa 3*16(p_keys), xkey4
|
||||
vmovdqa 6*16(p_keys), xkey8
|
||||
vmovdqa 9*16(p_keys), xkey12
|
||||
.endif
|
||||
.align 16
|
||||
.Lmain_loop2\key_len:
|
||||
/* num_bytes is a multiple of 8 and >0 */
|
||||
do_aes_noload 8, \key_len
|
||||
add $(8*16), p_out
|
||||
sub $(8*16), num_bytes
|
||||
jne .Lmain_loop2\key_len
|
||||
|
||||
.Ldo_return2\key_len:
|
||||
/* return updated IV */
|
||||
vpshufb xbyteswap, xcounter, xcounter
|
||||
vmovdqu xcounter, (p_iv)
|
||||
ret
|
||||
.endm
|
||||
|
||||
/*
|
||||
* routine to do AES128 CTR enc/decrypt "by8"
|
||||
* XMM registers are clobbered.
|
||||
* Saving/restoring must be done at a higher level
|
||||
* aes_ctr_enc_128_avx_by8(void *in, void *iv, void *keys, void *out,
|
||||
* unsigned int num_bytes)
|
||||
*/
|
||||
ENTRY(aes_ctr_enc_128_avx_by8)
|
||||
/* call the aes main loop */
|
||||
do_aes_ctrmain KEY_128
|
||||
|
||||
ENDPROC(aes_ctr_enc_128_avx_by8)
|
||||
|
||||
/*
|
||||
* routine to do AES192 CTR enc/decrypt "by8"
|
||||
* XMM registers are clobbered.
|
||||
* Saving/restoring must be done at a higher level
|
||||
* aes_ctr_enc_192_avx_by8(void *in, void *iv, void *keys, void *out,
|
||||
* unsigned int num_bytes)
|
||||
*/
|
||||
ENTRY(aes_ctr_enc_192_avx_by8)
|
||||
/* call the aes main loop */
|
||||
do_aes_ctrmain KEY_192
|
||||
|
||||
ENDPROC(aes_ctr_enc_192_avx_by8)
|
||||
|
||||
/*
|
||||
* routine to do AES256 CTR enc/decrypt "by8"
|
||||
* XMM registers are clobbered.
|
||||
* Saving/restoring must be done at a higher level
|
||||
* aes_ctr_enc_256_avx_by8(void *in, void *iv, void *keys, void *out,
|
||||
* unsigned int num_bytes)
|
||||
*/
|
||||
ENTRY(aes_ctr_enc_256_avx_by8)
|
||||
/* call the aes main loop */
|
||||
do_aes_ctrmain KEY_256
|
||||
|
||||
ENDPROC(aes_ctr_enc_256_avx_by8)
|
||||
70
arch/x86/crypto/aes_glue.c
Normal file
70
arch/x86/crypto/aes_glue.c
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
/*
|
||||
* Glue Code for the asm optimized version of the AES Cipher Algorithm
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <crypto/aes.h>
|
||||
#include <asm/crypto/aes.h>
|
||||
|
||||
asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
|
||||
asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
|
||||
|
||||
void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
|
||||
{
|
||||
aes_enc_blk(ctx, dst, src);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_aes_encrypt_x86);
|
||||
|
||||
void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
|
||||
{
|
||||
aes_dec_blk(ctx, dst, src);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_aes_decrypt_x86);
|
||||
|
||||
static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
aes_enc_blk(crypto_tfm_ctx(tfm), dst, src);
|
||||
}
|
||||
|
||||
static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
aes_dec_blk(crypto_tfm_ctx(tfm), dst, src);
|
||||
}
|
||||
|
||||
static struct crypto_alg aes_alg = {
|
||||
.cra_name = "aes",
|
||||
.cra_driver_name = "aes-asm",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.cipher = {
|
||||
.cia_min_keysize = AES_MIN_KEY_SIZE,
|
||||
.cia_max_keysize = AES_MAX_KEY_SIZE,
|
||||
.cia_setkey = crypto_aes_set_key,
|
||||
.cia_encrypt = aes_encrypt,
|
||||
.cia_decrypt = aes_decrypt
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static int __init aes_init(void)
|
||||
{
|
||||
return crypto_register_alg(&aes_alg);
|
||||
}
|
||||
|
||||
static void __exit aes_fini(void)
|
||||
{
|
||||
crypto_unregister_alg(&aes_alg);
|
||||
}
|
||||
|
||||
module_init(aes_init);
|
||||
module_exit(aes_fini);
|
||||
|
||||
MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, asm optimized");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_ALIAS_CRYPTO("aes");
|
||||
MODULE_ALIAS_CRYPTO("aes-asm");
|
||||
2772
arch/x86/crypto/aesni-intel_asm.S
Normal file
2772
arch/x86/crypto/aesni-intel_asm.S
Normal file
File diff suppressed because it is too large
Load diff
2811
arch/x86/crypto/aesni-intel_avx-x86_64.S
Normal file
2811
arch/x86/crypto/aesni-intel_avx-x86_64.S
Normal file
File diff suppressed because it is too large
Load diff
1553
arch/x86/crypto/aesni-intel_glue.c
Normal file
1553
arch/x86/crypto/aesni-intel_glue.c
Normal file
File diff suppressed because it is too large
Load diff
379
arch/x86/crypto/blowfish-x86_64-asm_64.S
Normal file
379
arch/x86/crypto/blowfish-x86_64-asm_64.S
Normal file
|
|
@ -0,0 +1,379 @@
|
|||
/*
|
||||
* Blowfish Cipher Algorithm (x86_64)
|
||||
*
|
||||
* Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
|
||||
* USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.file "blowfish-x86_64-asm.S"
|
||||
.text
|
||||
|
||||
/* structure of crypto context */
|
||||
#define p 0
|
||||
#define s0 ((16 + 2) * 4)
|
||||
#define s1 ((16 + 2 + (1 * 256)) * 4)
|
||||
#define s2 ((16 + 2 + (2 * 256)) * 4)
|
||||
#define s3 ((16 + 2 + (3 * 256)) * 4)
|
||||
|
||||
/* register macros */
|
||||
#define CTX %rdi
|
||||
#define RIO %rsi
|
||||
|
||||
#define RX0 %rax
|
||||
#define RX1 %rbx
|
||||
#define RX2 %rcx
|
||||
#define RX3 %rdx
|
||||
|
||||
#define RX0d %eax
|
||||
#define RX1d %ebx
|
||||
#define RX2d %ecx
|
||||
#define RX3d %edx
|
||||
|
||||
#define RX0bl %al
|
||||
#define RX1bl %bl
|
||||
#define RX2bl %cl
|
||||
#define RX3bl %dl
|
||||
|
||||
#define RX0bh %ah
|
||||
#define RX1bh %bh
|
||||
#define RX2bh %ch
|
||||
#define RX3bh %dh
|
||||
|
||||
#define RT0 %rbp
|
||||
#define RT1 %rsi
|
||||
#define RT2 %r8
|
||||
#define RT3 %r9
|
||||
|
||||
#define RT0d %ebp
|
||||
#define RT1d %esi
|
||||
#define RT2d %r8d
|
||||
#define RT3d %r9d
|
||||
|
||||
#define RKEY %r10
|
||||
|
||||
/***********************************************************************
|
||||
* 1-way blowfish
|
||||
***********************************************************************/
|
||||
#define F() \
|
||||
rorq $16, RX0; \
|
||||
movzbl RX0bh, RT0d; \
|
||||
movzbl RX0bl, RT1d; \
|
||||
rolq $16, RX0; \
|
||||
movl s0(CTX,RT0,4), RT0d; \
|
||||
addl s1(CTX,RT1,4), RT0d; \
|
||||
movzbl RX0bh, RT1d; \
|
||||
movzbl RX0bl, RT2d; \
|
||||
rolq $32, RX0; \
|
||||
xorl s2(CTX,RT1,4), RT0d; \
|
||||
addl s3(CTX,RT2,4), RT0d; \
|
||||
xorq RT0, RX0;
|
||||
|
||||
#define add_roundkey_enc(n) \
|
||||
xorq p+4*(n)(CTX), RX0;
|
||||
|
||||
#define round_enc(n) \
|
||||
add_roundkey_enc(n); \
|
||||
\
|
||||
F(); \
|
||||
F();
|
||||
|
||||
#define add_roundkey_dec(n) \
|
||||
movq p+4*(n-1)(CTX), RT0; \
|
||||
rorq $32, RT0; \
|
||||
xorq RT0, RX0;
|
||||
|
||||
#define round_dec(n) \
|
||||
add_roundkey_dec(n); \
|
||||
\
|
||||
F(); \
|
||||
F(); \
|
||||
|
||||
#define read_block() \
|
||||
movq (RIO), RX0; \
|
||||
rorq $32, RX0; \
|
||||
bswapq RX0;
|
||||
|
||||
#define write_block() \
|
||||
bswapq RX0; \
|
||||
movq RX0, (RIO);
|
||||
|
||||
#define xor_block() \
|
||||
bswapq RX0; \
|
||||
xorq RX0, (RIO);
|
||||
|
||||
ENTRY(__blowfish_enc_blk)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
* %rdx: src
|
||||
* %rcx: bool, if true: xor output
|
||||
*/
|
||||
movq %rbp, %r11;
|
||||
|
||||
movq %rsi, %r10;
|
||||
movq %rdx, RIO;
|
||||
|
||||
read_block();
|
||||
|
||||
round_enc(0);
|
||||
round_enc(2);
|
||||
round_enc(4);
|
||||
round_enc(6);
|
||||
round_enc(8);
|
||||
round_enc(10);
|
||||
round_enc(12);
|
||||
round_enc(14);
|
||||
add_roundkey_enc(16);
|
||||
|
||||
movq %r11, %rbp;
|
||||
|
||||
movq %r10, RIO;
|
||||
test %cl, %cl;
|
||||
jnz .L__enc_xor;
|
||||
|
||||
write_block();
|
||||
ret;
|
||||
.L__enc_xor:
|
||||
xor_block();
|
||||
ret;
|
||||
ENDPROC(__blowfish_enc_blk)
|
||||
|
||||
ENTRY(blowfish_dec_blk)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
* %rdx: src
|
||||
*/
|
||||
movq %rbp, %r11;
|
||||
|
||||
movq %rsi, %r10;
|
||||
movq %rdx, RIO;
|
||||
|
||||
read_block();
|
||||
|
||||
round_dec(17);
|
||||
round_dec(15);
|
||||
round_dec(13);
|
||||
round_dec(11);
|
||||
round_dec(9);
|
||||
round_dec(7);
|
||||
round_dec(5);
|
||||
round_dec(3);
|
||||
add_roundkey_dec(1);
|
||||
|
||||
movq %r10, RIO;
|
||||
write_block();
|
||||
|
||||
movq %r11, %rbp;
|
||||
|
||||
ret;
|
||||
ENDPROC(blowfish_dec_blk)
|
||||
|
||||
/**********************************************************************
|
||||
4-way blowfish, four blocks parallel
|
||||
**********************************************************************/
|
||||
|
||||
/* F() for 4-way. Slower when used alone/1-way, but faster when used
|
||||
* parallel/4-way (tested on AMD Phenom II & Intel Xeon E7330).
|
||||
*/
|
||||
#define F4(x) \
|
||||
movzbl x ## bh, RT1d; \
|
||||
movzbl x ## bl, RT3d; \
|
||||
rorq $16, x; \
|
||||
movzbl x ## bh, RT0d; \
|
||||
movzbl x ## bl, RT2d; \
|
||||
rorq $16, x; \
|
||||
movl s0(CTX,RT0,4), RT0d; \
|
||||
addl s1(CTX,RT2,4), RT0d; \
|
||||
xorl s2(CTX,RT1,4), RT0d; \
|
||||
addl s3(CTX,RT3,4), RT0d; \
|
||||
xorq RT0, x;
|
||||
|
||||
#define add_preloaded_roundkey4() \
|
||||
xorq RKEY, RX0; \
|
||||
xorq RKEY, RX1; \
|
||||
xorq RKEY, RX2; \
|
||||
xorq RKEY, RX3;
|
||||
|
||||
#define preload_roundkey_enc(n) \
|
||||
movq p+4*(n)(CTX), RKEY;
|
||||
|
||||
#define add_roundkey_enc4(n) \
|
||||
add_preloaded_roundkey4(); \
|
||||
preload_roundkey_enc(n + 2);
|
||||
|
||||
#define round_enc4(n) \
|
||||
add_roundkey_enc4(n); \
|
||||
\
|
||||
F4(RX0); \
|
||||
F4(RX1); \
|
||||
F4(RX2); \
|
||||
F4(RX3); \
|
||||
\
|
||||
F4(RX0); \
|
||||
F4(RX1); \
|
||||
F4(RX2); \
|
||||
F4(RX3);
|
||||
|
||||
#define preload_roundkey_dec(n) \
|
||||
movq p+4*((n)-1)(CTX), RKEY; \
|
||||
rorq $32, RKEY;
|
||||
|
||||
#define add_roundkey_dec4(n) \
|
||||
add_preloaded_roundkey4(); \
|
||||
preload_roundkey_dec(n - 2);
|
||||
|
||||
#define round_dec4(n) \
|
||||
add_roundkey_dec4(n); \
|
||||
\
|
||||
F4(RX0); \
|
||||
F4(RX1); \
|
||||
F4(RX2); \
|
||||
F4(RX3); \
|
||||
\
|
||||
F4(RX0); \
|
||||
F4(RX1); \
|
||||
F4(RX2); \
|
||||
F4(RX3);
|
||||
|
||||
#define read_block4() \
|
||||
movq (RIO), RX0; \
|
||||
rorq $32, RX0; \
|
||||
bswapq RX0; \
|
||||
\
|
||||
movq 8(RIO), RX1; \
|
||||
rorq $32, RX1; \
|
||||
bswapq RX1; \
|
||||
\
|
||||
movq 16(RIO), RX2; \
|
||||
rorq $32, RX2; \
|
||||
bswapq RX2; \
|
||||
\
|
||||
movq 24(RIO), RX3; \
|
||||
rorq $32, RX3; \
|
||||
bswapq RX3;
|
||||
|
||||
#define write_block4() \
|
||||
bswapq RX0; \
|
||||
movq RX0, (RIO); \
|
||||
\
|
||||
bswapq RX1; \
|
||||
movq RX1, 8(RIO); \
|
||||
\
|
||||
bswapq RX2; \
|
||||
movq RX2, 16(RIO); \
|
||||
\
|
||||
bswapq RX3; \
|
||||
movq RX3, 24(RIO);
|
||||
|
||||
#define xor_block4() \
|
||||
bswapq RX0; \
|
||||
xorq RX0, (RIO); \
|
||||
\
|
||||
bswapq RX1; \
|
||||
xorq RX1, 8(RIO); \
|
||||
\
|
||||
bswapq RX2; \
|
||||
xorq RX2, 16(RIO); \
|
||||
\
|
||||
bswapq RX3; \
|
||||
xorq RX3, 24(RIO);
|
||||
|
||||
ENTRY(__blowfish_enc_blk_4way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
* %rdx: src
|
||||
* %rcx: bool, if true: xor output
|
||||
*/
|
||||
pushq %rbp;
|
||||
pushq %rbx;
|
||||
pushq %rcx;
|
||||
|
||||
preload_roundkey_enc(0);
|
||||
|
||||
movq %rsi, %r11;
|
||||
movq %rdx, RIO;
|
||||
|
||||
read_block4();
|
||||
|
||||
round_enc4(0);
|
||||
round_enc4(2);
|
||||
round_enc4(4);
|
||||
round_enc4(6);
|
||||
round_enc4(8);
|
||||
round_enc4(10);
|
||||
round_enc4(12);
|
||||
round_enc4(14);
|
||||
add_preloaded_roundkey4();
|
||||
|
||||
popq %rbp;
|
||||
movq %r11, RIO;
|
||||
|
||||
test %bpl, %bpl;
|
||||
jnz .L__enc_xor4;
|
||||
|
||||
write_block4();
|
||||
|
||||
popq %rbx;
|
||||
popq %rbp;
|
||||
ret;
|
||||
|
||||
.L__enc_xor4:
|
||||
xor_block4();
|
||||
|
||||
popq %rbx;
|
||||
popq %rbp;
|
||||
ret;
|
||||
ENDPROC(__blowfish_enc_blk_4way)
|
||||
|
||||
ENTRY(blowfish_dec_blk_4way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
* %rdx: src
|
||||
*/
|
||||
pushq %rbp;
|
||||
pushq %rbx;
|
||||
preload_roundkey_dec(17);
|
||||
|
||||
movq %rsi, %r11;
|
||||
movq %rdx, RIO;
|
||||
|
||||
read_block4();
|
||||
|
||||
round_dec4(17);
|
||||
round_dec4(15);
|
||||
round_dec4(13);
|
||||
round_dec4(11);
|
||||
round_dec4(9);
|
||||
round_dec4(7);
|
||||
round_dec4(5);
|
||||
round_dec4(3);
|
||||
add_preloaded_roundkey4();
|
||||
|
||||
movq %r11, RIO;
|
||||
write_block4();
|
||||
|
||||
popq %rbx;
|
||||
popq %rbp;
|
||||
|
||||
ret;
|
||||
ENDPROC(blowfish_dec_blk_4way)
|
||||
482
arch/x86/crypto/blowfish_glue.c
Normal file
482
arch/x86/crypto/blowfish_glue.c
Normal file
|
|
@ -0,0 +1,482 @@
|
|||
/*
|
||||
* Glue Code for assembler optimized version of Blowfish
|
||||
*
|
||||
* Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
|
||||
*
|
||||
* CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
|
||||
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
|
||||
* CTR part based on code (crypto/ctr.c) by:
|
||||
* (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
|
||||
* USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <crypto/blowfish.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/types.h>
|
||||
#include <crypto/algapi.h>
|
||||
|
||||
/* regular block cipher functions */
|
||||
asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
|
||||
bool xor);
|
||||
asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
|
||||
|
||||
/* 4-way parallel cipher functions */
|
||||
asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
|
||||
const u8 *src, bool xor);
|
||||
asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
|
||||
static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
|
||||
{
|
||||
__blowfish_enc_blk(ctx, dst, src, false);
|
||||
}
|
||||
|
||||
static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
|
||||
const u8 *src)
|
||||
{
|
||||
__blowfish_enc_blk(ctx, dst, src, true);
|
||||
}
|
||||
|
||||
static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
|
||||
const u8 *src)
|
||||
{
|
||||
__blowfish_enc_blk_4way(ctx, dst, src, false);
|
||||
}
|
||||
|
||||
static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
|
||||
const u8 *src)
|
||||
{
|
||||
__blowfish_enc_blk_4way(ctx, dst, src, true);
|
||||
}
|
||||
|
||||
static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
blowfish_enc_blk(crypto_tfm_ctx(tfm), dst, src);
|
||||
}
|
||||
|
||||
static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src);
|
||||
}
|
||||
|
||||
static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
|
||||
void (*fn)(struct bf_ctx *, u8 *, const u8 *),
|
||||
void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *))
|
||||
{
|
||||
struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
unsigned int bsize = BF_BLOCK_SIZE;
|
||||
unsigned int nbytes;
|
||||
int err;
|
||||
|
||||
err = blkcipher_walk_virt(desc, walk);
|
||||
|
||||
while ((nbytes = walk->nbytes)) {
|
||||
u8 *wsrc = walk->src.virt.addr;
|
||||
u8 *wdst = walk->dst.virt.addr;
|
||||
|
||||
/* Process four block batch */
|
||||
if (nbytes >= bsize * 4) {
|
||||
do {
|
||||
fn_4way(ctx, wdst, wsrc);
|
||||
|
||||
wsrc += bsize * 4;
|
||||
wdst += bsize * 4;
|
||||
nbytes -= bsize * 4;
|
||||
} while (nbytes >= bsize * 4);
|
||||
|
||||
if (nbytes < bsize)
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* Handle leftovers */
|
||||
do {
|
||||
fn(ctx, wdst, wsrc);
|
||||
|
||||
wsrc += bsize;
|
||||
wdst += bsize;
|
||||
nbytes -= bsize;
|
||||
} while (nbytes >= bsize);
|
||||
|
||||
done:
|
||||
err = blkcipher_walk_done(desc, walk, nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct blkcipher_walk walk;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
return ecb_crypt(desc, &walk, blowfish_enc_blk, blowfish_enc_blk_4way);
|
||||
}
|
||||
|
||||
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct blkcipher_walk walk;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way);
|
||||
}
|
||||
|
||||
static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
|
||||
struct blkcipher_walk *walk)
|
||||
{
|
||||
struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
unsigned int bsize = BF_BLOCK_SIZE;
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
u64 *src = (u64 *)walk->src.virt.addr;
|
||||
u64 *dst = (u64 *)walk->dst.virt.addr;
|
||||
u64 *iv = (u64 *)walk->iv;
|
||||
|
||||
do {
|
||||
*dst = *src ^ *iv;
|
||||
blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
|
||||
iv = dst;
|
||||
|
||||
src += 1;
|
||||
dst += 1;
|
||||
nbytes -= bsize;
|
||||
} while (nbytes >= bsize);
|
||||
|
||||
*(u64 *)walk->iv = *iv;
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
nbytes = __cbc_encrypt(desc, &walk);
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
|
||||
struct blkcipher_walk *walk)
|
||||
{
|
||||
struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
unsigned int bsize = BF_BLOCK_SIZE;
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
u64 *src = (u64 *)walk->src.virt.addr;
|
||||
u64 *dst = (u64 *)walk->dst.virt.addr;
|
||||
u64 ivs[4 - 1];
|
||||
u64 last_iv;
|
||||
|
||||
/* Start of the last block. */
|
||||
src += nbytes / bsize - 1;
|
||||
dst += nbytes / bsize - 1;
|
||||
|
||||
last_iv = *src;
|
||||
|
||||
/* Process four block batch */
|
||||
if (nbytes >= bsize * 4) {
|
||||
do {
|
||||
nbytes -= bsize * 4 - bsize;
|
||||
src -= 4 - 1;
|
||||
dst -= 4 - 1;
|
||||
|
||||
ivs[0] = src[0];
|
||||
ivs[1] = src[1];
|
||||
ivs[2] = src[2];
|
||||
|
||||
blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src);
|
||||
|
||||
dst[1] ^= ivs[0];
|
||||
dst[2] ^= ivs[1];
|
||||
dst[3] ^= ivs[2];
|
||||
|
||||
nbytes -= bsize;
|
||||
if (nbytes < bsize)
|
||||
goto done;
|
||||
|
||||
*dst ^= *(src - 1);
|
||||
src -= 1;
|
||||
dst -= 1;
|
||||
} while (nbytes >= bsize * 4);
|
||||
}
|
||||
|
||||
/* Handle leftovers */
|
||||
for (;;) {
|
||||
blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
|
||||
|
||||
nbytes -= bsize;
|
||||
if (nbytes < bsize)
|
||||
break;
|
||||
|
||||
*dst ^= *(src - 1);
|
||||
src -= 1;
|
||||
dst -= 1;
|
||||
}
|
||||
|
||||
done:
|
||||
*dst ^= *(u64 *)walk->iv;
|
||||
*(u64 *)walk->iv = last_iv;
|
||||
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
nbytes = __cbc_decrypt(desc, &walk);
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
|
||||
{
|
||||
u8 *ctrblk = walk->iv;
|
||||
u8 keystream[BF_BLOCK_SIZE];
|
||||
u8 *src = walk->src.virt.addr;
|
||||
u8 *dst = walk->dst.virt.addr;
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
|
||||
blowfish_enc_blk(ctx, keystream, ctrblk);
|
||||
crypto_xor(keystream, src, nbytes);
|
||||
memcpy(dst, keystream, nbytes);
|
||||
|
||||
crypto_inc(ctrblk, BF_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
|
||||
struct blkcipher_walk *walk)
|
||||
{
|
||||
struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
unsigned int bsize = BF_BLOCK_SIZE;
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
u64 *src = (u64 *)walk->src.virt.addr;
|
||||
u64 *dst = (u64 *)walk->dst.virt.addr;
|
||||
u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
|
||||
__be64 ctrblocks[4];
|
||||
|
||||
/* Process four block batch */
|
||||
if (nbytes >= bsize * 4) {
|
||||
do {
|
||||
if (dst != src) {
|
||||
dst[0] = src[0];
|
||||
dst[1] = src[1];
|
||||
dst[2] = src[2];
|
||||
dst[3] = src[3];
|
||||
}
|
||||
|
||||
/* create ctrblks for parallel encrypt */
|
||||
ctrblocks[0] = cpu_to_be64(ctrblk++);
|
||||
ctrblocks[1] = cpu_to_be64(ctrblk++);
|
||||
ctrblocks[2] = cpu_to_be64(ctrblk++);
|
||||
ctrblocks[3] = cpu_to_be64(ctrblk++);
|
||||
|
||||
blowfish_enc_blk_xor_4way(ctx, (u8 *)dst,
|
||||
(u8 *)ctrblocks);
|
||||
|
||||
src += 4;
|
||||
dst += 4;
|
||||
} while ((nbytes -= bsize * 4) >= bsize * 4);
|
||||
|
||||
if (nbytes < bsize)
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* Handle leftovers */
|
||||
do {
|
||||
if (dst != src)
|
||||
*dst = *src;
|
||||
|
||||
ctrblocks[0] = cpu_to_be64(ctrblk++);
|
||||
|
||||
blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks);
|
||||
|
||||
src += 1;
|
||||
dst += 1;
|
||||
} while ((nbytes -= bsize) >= bsize);
|
||||
|
||||
done:
|
||||
*(__be64 *)walk->iv = cpu_to_be64(ctrblk);
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE);
|
||||
|
||||
while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) {
|
||||
nbytes = __ctr_crypt(desc, &walk);
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
|
||||
if (walk.nbytes) {
|
||||
ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk);
|
||||
err = blkcipher_walk_done(desc, &walk, 0);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct crypto_alg bf_algs[4] = { {
|
||||
.cra_name = "blowfish",
|
||||
.cra_driver_name = "blowfish-asm",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = BF_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct bf_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.cipher = {
|
||||
.cia_min_keysize = BF_MIN_KEY_SIZE,
|
||||
.cia_max_keysize = BF_MAX_KEY_SIZE,
|
||||
.cia_setkey = blowfish_setkey,
|
||||
.cia_encrypt = blowfish_encrypt,
|
||||
.cia_decrypt = blowfish_decrypt,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
.cra_name = "ecb(blowfish)",
|
||||
.cra_driver_name = "ecb-blowfish-asm",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = BF_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct bf_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = BF_MIN_KEY_SIZE,
|
||||
.max_keysize = BF_MAX_KEY_SIZE,
|
||||
.setkey = blowfish_setkey,
|
||||
.encrypt = ecb_encrypt,
|
||||
.decrypt = ecb_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "cbc(blowfish)",
|
||||
.cra_driver_name = "cbc-blowfish-asm",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = BF_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct bf_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = BF_MIN_KEY_SIZE,
|
||||
.max_keysize = BF_MAX_KEY_SIZE,
|
||||
.ivsize = BF_BLOCK_SIZE,
|
||||
.setkey = blowfish_setkey,
|
||||
.encrypt = cbc_encrypt,
|
||||
.decrypt = cbc_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "ctr(blowfish)",
|
||||
.cra_driver_name = "ctr-blowfish-asm",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct bf_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = BF_MIN_KEY_SIZE,
|
||||
.max_keysize = BF_MAX_KEY_SIZE,
|
||||
.ivsize = BF_BLOCK_SIZE,
|
||||
.setkey = blowfish_setkey,
|
||||
.encrypt = ctr_crypt,
|
||||
.decrypt = ctr_crypt,
|
||||
},
|
||||
},
|
||||
} };
|
||||
|
||||
static bool is_blacklisted_cpu(void)
|
||||
{
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
|
||||
return false;
|
||||
|
||||
if (boot_cpu_data.x86 == 0x0f) {
|
||||
/*
|
||||
* On Pentium 4, blowfish-x86_64 is slower than generic C
|
||||
* implementation because use of 64bit rotates (which are really
|
||||
* slow on P4). Therefore blacklist P4s.
|
||||
*/
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int force;
|
||||
module_param(force, int, 0);
|
||||
MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
|
||||
|
||||
static int __init init(void)
|
||||
{
|
||||
if (!force && is_blacklisted_cpu()) {
|
||||
printk(KERN_INFO
|
||||
"blowfish-x86_64: performance on this CPU "
|
||||
"would be suboptimal: disabling "
|
||||
"blowfish-x86_64.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs));
|
||||
}
|
||||
|
||||
static void __exit fini(void)
|
||||
{
|
||||
crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs));
|
||||
}
|
||||
|
||||
module_init(init);
|
||||
module_exit(fini);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("Blowfish Cipher Algorithm, asm optimized");
|
||||
MODULE_ALIAS_CRYPTO("blowfish");
|
||||
MODULE_ALIAS_CRYPTO("blowfish-asm");
|
||||
1270
arch/x86/crypto/camellia-aesni-avx-asm_64.S
Normal file
1270
arch/x86/crypto/camellia-aesni-avx-asm_64.S
Normal file
File diff suppressed because it is too large
Load diff
1386
arch/x86/crypto/camellia-aesni-avx2-asm_64.S
Normal file
1386
arch/x86/crypto/camellia-aesni-avx2-asm_64.S
Normal file
File diff suppressed because it is too large
Load diff
514
arch/x86/crypto/camellia-x86_64-asm_64.S
Normal file
514
arch/x86/crypto/camellia-x86_64-asm_64.S
Normal file
|
|
@ -0,0 +1,514 @@
|
|||
/*
|
||||
* Camellia Cipher Algorithm (x86_64)
|
||||
*
|
||||
* Copyright (C) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
|
||||
* USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.file "camellia-x86_64-asm_64.S"
|
||||
.text
|
||||
|
||||
.extern camellia_sp10011110;
|
||||
.extern camellia_sp22000222;
|
||||
.extern camellia_sp03303033;
|
||||
.extern camellia_sp00444404;
|
||||
.extern camellia_sp02220222;
|
||||
.extern camellia_sp30333033;
|
||||
.extern camellia_sp44044404;
|
||||
.extern camellia_sp11101110;
|
||||
|
||||
#define sp10011110 camellia_sp10011110
|
||||
#define sp22000222 camellia_sp22000222
|
||||
#define sp03303033 camellia_sp03303033
|
||||
#define sp00444404 camellia_sp00444404
|
||||
#define sp02220222 camellia_sp02220222
|
||||
#define sp30333033 camellia_sp30333033
|
||||
#define sp44044404 camellia_sp44044404
|
||||
#define sp11101110 camellia_sp11101110
|
||||
|
||||
#define CAMELLIA_TABLE_BYTE_LEN 272
|
||||
|
||||
/* struct camellia_ctx: */
|
||||
#define key_table 0
|
||||
#define key_length CAMELLIA_TABLE_BYTE_LEN
|
||||
|
||||
/* register macros */
|
||||
#define CTX %rdi
|
||||
#define RIO %rsi
|
||||
#define RIOd %esi
|
||||
|
||||
#define RAB0 %rax
|
||||
#define RCD0 %rcx
|
||||
#define RAB1 %rbx
|
||||
#define RCD1 %rdx
|
||||
|
||||
#define RAB0d %eax
|
||||
#define RCD0d %ecx
|
||||
#define RAB1d %ebx
|
||||
#define RCD1d %edx
|
||||
|
||||
#define RAB0bl %al
|
||||
#define RCD0bl %cl
|
||||
#define RAB1bl %bl
|
||||
#define RCD1bl %dl
|
||||
|
||||
#define RAB0bh %ah
|
||||
#define RCD0bh %ch
|
||||
#define RAB1bh %bh
|
||||
#define RCD1bh %dh
|
||||
|
||||
#define RT0 %rsi
|
||||
#define RT1 %rbp
|
||||
#define RT2 %r8
|
||||
|
||||
#define RT0d %esi
|
||||
#define RT1d %ebp
|
||||
#define RT2d %r8d
|
||||
|
||||
#define RT2bl %r8b
|
||||
|
||||
#define RXOR %r9
|
||||
#define RRBP %r10
|
||||
#define RDST %r11
|
||||
|
||||
#define RXORd %r9d
|
||||
#define RXORbl %r9b
|
||||
|
||||
#define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \
|
||||
movzbl ab ## bl, tmp2 ## d; \
|
||||
movzbl ab ## bh, tmp1 ## d; \
|
||||
rorq $16, ab; \
|
||||
xorq T0(, tmp2, 8), dst; \
|
||||
xorq T1(, tmp1, 8), dst;
|
||||
|
||||
/**********************************************************************
|
||||
1-way camellia
|
||||
**********************************************************************/
|
||||
#define roundsm(ab, subkey, cd) \
|
||||
movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \
|
||||
\
|
||||
xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
|
||||
xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
|
||||
xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
|
||||
xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
|
||||
\
|
||||
xorq RT2, cd ## 0;
|
||||
|
||||
#define fls(l, r, kl, kr) \
|
||||
movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \
|
||||
andl l ## 0d, RT0d; \
|
||||
roll $1, RT0d; \
|
||||
shlq $32, RT0; \
|
||||
xorq RT0, l ## 0; \
|
||||
movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \
|
||||
orq r ## 0, RT1; \
|
||||
shrq $32, RT1; \
|
||||
xorq RT1, r ## 0; \
|
||||
\
|
||||
movq (key_table + ((kl) * 2) * 4)(CTX), RT2; \
|
||||
orq l ## 0, RT2; \
|
||||
shrq $32, RT2; \
|
||||
xorq RT2, l ## 0; \
|
||||
movl (key_table + ((kr) * 2) * 4)(CTX), RT0d; \
|
||||
andl r ## 0d, RT0d; \
|
||||
roll $1, RT0d; \
|
||||
shlq $32, RT0; \
|
||||
xorq RT0, r ## 0;
|
||||
|
||||
#define enc_rounds(i) \
|
||||
roundsm(RAB, i + 2, RCD); \
|
||||
roundsm(RCD, i + 3, RAB); \
|
||||
roundsm(RAB, i + 4, RCD); \
|
||||
roundsm(RCD, i + 5, RAB); \
|
||||
roundsm(RAB, i + 6, RCD); \
|
||||
roundsm(RCD, i + 7, RAB);
|
||||
|
||||
#define enc_fls(i) \
|
||||
fls(RAB, RCD, i + 0, i + 1);
|
||||
|
||||
#define enc_inpack() \
|
||||
movq (RIO), RAB0; \
|
||||
bswapq RAB0; \
|
||||
rolq $32, RAB0; \
|
||||
movq 4*2(RIO), RCD0; \
|
||||
bswapq RCD0; \
|
||||
rorq $32, RCD0; \
|
||||
xorq key_table(CTX), RAB0;
|
||||
|
||||
#define enc_outunpack(op, max) \
|
||||
xorq key_table(CTX, max, 8), RCD0; \
|
||||
rorq $32, RCD0; \
|
||||
bswapq RCD0; \
|
||||
op ## q RCD0, (RIO); \
|
||||
rolq $32, RAB0; \
|
||||
bswapq RAB0; \
|
||||
op ## q RAB0, 4*2(RIO);
|
||||
|
||||
#define dec_rounds(i) \
|
||||
roundsm(RAB, i + 7, RCD); \
|
||||
roundsm(RCD, i + 6, RAB); \
|
||||
roundsm(RAB, i + 5, RCD); \
|
||||
roundsm(RCD, i + 4, RAB); \
|
||||
roundsm(RAB, i + 3, RCD); \
|
||||
roundsm(RCD, i + 2, RAB);
|
||||
|
||||
#define dec_fls(i) \
|
||||
fls(RAB, RCD, i + 1, i + 0);
|
||||
|
||||
#define dec_inpack(max) \
|
||||
movq (RIO), RAB0; \
|
||||
bswapq RAB0; \
|
||||
rolq $32, RAB0; \
|
||||
movq 4*2(RIO), RCD0; \
|
||||
bswapq RCD0; \
|
||||
rorq $32, RCD0; \
|
||||
xorq key_table(CTX, max, 8), RAB0;
|
||||
|
||||
#define dec_outunpack() \
|
||||
xorq key_table(CTX), RCD0; \
|
||||
rorq $32, RCD0; \
|
||||
bswapq RCD0; \
|
||||
movq RCD0, (RIO); \
|
||||
rolq $32, RAB0; \
|
||||
bswapq RAB0; \
|
||||
movq RAB0, 4*2(RIO);
|
||||
|
||||
ENTRY(__camellia_enc_blk)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
* %rdx: src
|
||||
* %rcx: bool xor
|
||||
*/
|
||||
movq %rbp, RRBP;
|
||||
|
||||
movq %rcx, RXOR;
|
||||
movq %rsi, RDST;
|
||||
movq %rdx, RIO;
|
||||
|
||||
enc_inpack();
|
||||
|
||||
enc_rounds(0);
|
||||
enc_fls(8);
|
||||
enc_rounds(8);
|
||||
enc_fls(16);
|
||||
enc_rounds(16);
|
||||
movl $24, RT1d; /* max */
|
||||
|
||||
cmpb $16, key_length(CTX);
|
||||
je .L__enc_done;
|
||||
|
||||
enc_fls(24);
|
||||
enc_rounds(24);
|
||||
movl $32, RT1d; /* max */
|
||||
|
||||
.L__enc_done:
|
||||
testb RXORbl, RXORbl;
|
||||
movq RDST, RIO;
|
||||
|
||||
jnz .L__enc_xor;
|
||||
|
||||
enc_outunpack(mov, RT1);
|
||||
|
||||
movq RRBP, %rbp;
|
||||
ret;
|
||||
|
||||
.L__enc_xor:
|
||||
enc_outunpack(xor, RT1);
|
||||
|
||||
movq RRBP, %rbp;
|
||||
ret;
|
||||
ENDPROC(__camellia_enc_blk)
|
||||
|
||||
ENTRY(camellia_dec_blk)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
* %rdx: src
|
||||
*/
|
||||
cmpl $16, key_length(CTX);
|
||||
movl $32, RT2d;
|
||||
movl $24, RXORd;
|
||||
cmovel RXORd, RT2d; /* max */
|
||||
|
||||
movq %rbp, RRBP;
|
||||
movq %rsi, RDST;
|
||||
movq %rdx, RIO;
|
||||
|
||||
dec_inpack(RT2);
|
||||
|
||||
cmpb $24, RT2bl;
|
||||
je .L__dec_rounds16;
|
||||
|
||||
dec_rounds(24);
|
||||
dec_fls(24);
|
||||
|
||||
.L__dec_rounds16:
|
||||
dec_rounds(16);
|
||||
dec_fls(16);
|
||||
dec_rounds(8);
|
||||
dec_fls(8);
|
||||
dec_rounds(0);
|
||||
|
||||
movq RDST, RIO;
|
||||
|
||||
dec_outunpack();
|
||||
|
||||
movq RRBP, %rbp;
|
||||
ret;
|
||||
ENDPROC(camellia_dec_blk)
|
||||
|
||||
/**********************************************************************
|
||||
2-way camellia
|
||||
**********************************************************************/
|
||||
#define roundsm2(ab, subkey, cd) \
|
||||
movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \
|
||||
xorq RT2, cd ## 1; \
|
||||
\
|
||||
xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
|
||||
xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
|
||||
xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
|
||||
xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
|
||||
\
|
||||
xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 1, cd ## 1); \
|
||||
xorq RT2, cd ## 0; \
|
||||
xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 1, cd ## 1); \
|
||||
xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 1, cd ## 1); \
|
||||
xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 1, cd ## 1);
|
||||
|
||||
#define fls2(l, r, kl, kr) \
|
||||
movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \
|
||||
andl l ## 0d, RT0d; \
|
||||
roll $1, RT0d; \
|
||||
shlq $32, RT0; \
|
||||
xorq RT0, l ## 0; \
|
||||
movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \
|
||||
orq r ## 0, RT1; \
|
||||
shrq $32, RT1; \
|
||||
xorq RT1, r ## 0; \
|
||||
\
|
||||
movl (key_table + ((kl) * 2) * 4)(CTX), RT2d; \
|
||||
andl l ## 1d, RT2d; \
|
||||
roll $1, RT2d; \
|
||||
shlq $32, RT2; \
|
||||
xorq RT2, l ## 1; \
|
||||
movq (key_table + ((kr) * 2) * 4)(CTX), RT0; \
|
||||
orq r ## 1, RT0; \
|
||||
shrq $32, RT0; \
|
||||
xorq RT0, r ## 1; \
|
||||
\
|
||||
movq (key_table + ((kl) * 2) * 4)(CTX), RT1; \
|
||||
orq l ## 0, RT1; \
|
||||
shrq $32, RT1; \
|
||||
xorq RT1, l ## 0; \
|
||||
movl (key_table + ((kr) * 2) * 4)(CTX), RT2d; \
|
||||
andl r ## 0d, RT2d; \
|
||||
roll $1, RT2d; \
|
||||
shlq $32, RT2; \
|
||||
xorq RT2, r ## 0; \
|
||||
\
|
||||
movq (key_table + ((kl) * 2) * 4)(CTX), RT0; \
|
||||
orq l ## 1, RT0; \
|
||||
shrq $32, RT0; \
|
||||
xorq RT0, l ## 1; \
|
||||
movl (key_table + ((kr) * 2) * 4)(CTX), RT1d; \
|
||||
andl r ## 1d, RT1d; \
|
||||
roll $1, RT1d; \
|
||||
shlq $32, RT1; \
|
||||
xorq RT1, r ## 1;
|
||||
|
||||
#define enc_rounds2(i) \
|
||||
roundsm2(RAB, i + 2, RCD); \
|
||||
roundsm2(RCD, i + 3, RAB); \
|
||||
roundsm2(RAB, i + 4, RCD); \
|
||||
roundsm2(RCD, i + 5, RAB); \
|
||||
roundsm2(RAB, i + 6, RCD); \
|
||||
roundsm2(RCD, i + 7, RAB);
|
||||
|
||||
#define enc_fls2(i) \
|
||||
fls2(RAB, RCD, i + 0, i + 1);
|
||||
|
||||
#define enc_inpack2() \
|
||||
movq (RIO), RAB0; \
|
||||
bswapq RAB0; \
|
||||
rorq $32, RAB0; \
|
||||
movq 4*2(RIO), RCD0; \
|
||||
bswapq RCD0; \
|
||||
rolq $32, RCD0; \
|
||||
xorq key_table(CTX), RAB0; \
|
||||
\
|
||||
movq 8*2(RIO), RAB1; \
|
||||
bswapq RAB1; \
|
||||
rorq $32, RAB1; \
|
||||
movq 12*2(RIO), RCD1; \
|
||||
bswapq RCD1; \
|
||||
rolq $32, RCD1; \
|
||||
xorq key_table(CTX), RAB1;
|
||||
|
||||
#define enc_outunpack2(op, max) \
|
||||
xorq key_table(CTX, max, 8), RCD0; \
|
||||
rolq $32, RCD0; \
|
||||
bswapq RCD0; \
|
||||
op ## q RCD0, (RIO); \
|
||||
rorq $32, RAB0; \
|
||||
bswapq RAB0; \
|
||||
op ## q RAB0, 4*2(RIO); \
|
||||
\
|
||||
xorq key_table(CTX, max, 8), RCD1; \
|
||||
rolq $32, RCD1; \
|
||||
bswapq RCD1; \
|
||||
op ## q RCD1, 8*2(RIO); \
|
||||
rorq $32, RAB1; \
|
||||
bswapq RAB1; \
|
||||
op ## q RAB1, 12*2(RIO);
|
||||
|
||||
#define dec_rounds2(i) \
|
||||
roundsm2(RAB, i + 7, RCD); \
|
||||
roundsm2(RCD, i + 6, RAB); \
|
||||
roundsm2(RAB, i + 5, RCD); \
|
||||
roundsm2(RCD, i + 4, RAB); \
|
||||
roundsm2(RAB, i + 3, RCD); \
|
||||
roundsm2(RCD, i + 2, RAB);
|
||||
|
||||
#define dec_fls2(i) \
|
||||
fls2(RAB, RCD, i + 1, i + 0);
|
||||
|
||||
#define dec_inpack2(max) \
|
||||
movq (RIO), RAB0; \
|
||||
bswapq RAB0; \
|
||||
rorq $32, RAB0; \
|
||||
movq 4*2(RIO), RCD0; \
|
||||
bswapq RCD0; \
|
||||
rolq $32, RCD0; \
|
||||
xorq key_table(CTX, max, 8), RAB0; \
|
||||
\
|
||||
movq 8*2(RIO), RAB1; \
|
||||
bswapq RAB1; \
|
||||
rorq $32, RAB1; \
|
||||
movq 12*2(RIO), RCD1; \
|
||||
bswapq RCD1; \
|
||||
rolq $32, RCD1; \
|
||||
xorq key_table(CTX, max, 8), RAB1;
|
||||
|
||||
#define dec_outunpack2() \
|
||||
xorq key_table(CTX), RCD0; \
|
||||
rolq $32, RCD0; \
|
||||
bswapq RCD0; \
|
||||
movq RCD0, (RIO); \
|
||||
rorq $32, RAB0; \
|
||||
bswapq RAB0; \
|
||||
movq RAB0, 4*2(RIO); \
|
||||
\
|
||||
xorq key_table(CTX), RCD1; \
|
||||
rolq $32, RCD1; \
|
||||
bswapq RCD1; \
|
||||
movq RCD1, 8*2(RIO); \
|
||||
rorq $32, RAB1; \
|
||||
bswapq RAB1; \
|
||||
movq RAB1, 12*2(RIO);
|
||||
|
||||
ENTRY(__camellia_enc_blk_2way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
* %rdx: src
|
||||
* %rcx: bool xor
|
||||
*/
|
||||
pushq %rbx;
|
||||
|
||||
movq %rbp, RRBP;
|
||||
movq %rcx, RXOR;
|
||||
movq %rsi, RDST;
|
||||
movq %rdx, RIO;
|
||||
|
||||
enc_inpack2();
|
||||
|
||||
enc_rounds2(0);
|
||||
enc_fls2(8);
|
||||
enc_rounds2(8);
|
||||
enc_fls2(16);
|
||||
enc_rounds2(16);
|
||||
movl $24, RT2d; /* max */
|
||||
|
||||
cmpb $16, key_length(CTX);
|
||||
je .L__enc2_done;
|
||||
|
||||
enc_fls2(24);
|
||||
enc_rounds2(24);
|
||||
movl $32, RT2d; /* max */
|
||||
|
||||
.L__enc2_done:
|
||||
test RXORbl, RXORbl;
|
||||
movq RDST, RIO;
|
||||
jnz .L__enc2_xor;
|
||||
|
||||
enc_outunpack2(mov, RT2);
|
||||
|
||||
movq RRBP, %rbp;
|
||||
popq %rbx;
|
||||
ret;
|
||||
|
||||
.L__enc2_xor:
|
||||
enc_outunpack2(xor, RT2);
|
||||
|
||||
movq RRBP, %rbp;
|
||||
popq %rbx;
|
||||
ret;
|
||||
ENDPROC(__camellia_enc_blk_2way)
|
||||
|
||||
ENTRY(camellia_dec_blk_2way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
* %rdx: src
|
||||
*/
|
||||
cmpl $16, key_length(CTX);
|
||||
movl $32, RT2d;
|
||||
movl $24, RXORd;
|
||||
cmovel RXORd, RT2d; /* max */
|
||||
|
||||
movq %rbx, RXOR;
|
||||
movq %rbp, RRBP;
|
||||
movq %rsi, RDST;
|
||||
movq %rdx, RIO;
|
||||
|
||||
dec_inpack2(RT2);
|
||||
|
||||
cmpb $24, RT2bl;
|
||||
je .L__dec2_rounds16;
|
||||
|
||||
dec_rounds2(24);
|
||||
dec_fls2(24);
|
||||
|
||||
.L__dec2_rounds16:
|
||||
dec_rounds2(16);
|
||||
dec_fls2(16);
|
||||
dec_rounds2(8);
|
||||
dec_fls2(8);
|
||||
dec_rounds2(0);
|
||||
|
||||
movq RDST, RIO;
|
||||
|
||||
dec_outunpack2();
|
||||
|
||||
movq RRBP, %rbp;
|
||||
movq RXOR, %rbx;
|
||||
ret;
|
||||
ENDPROC(camellia_dec_blk_2way)
|
||||
586
arch/x86/crypto/camellia_aesni_avx2_glue.c
Normal file
586
arch/x86/crypto/camellia_aesni_avx2_glue.c
Normal file
|
|
@ -0,0 +1,586 @@
|
|||
/*
|
||||
* Glue Code for x86_64/AVX2/AES-NI assembler optimized version of Camellia
|
||||
*
|
||||
* Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/err.h>
|
||||
#include <crypto/ablk_helper.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/ctr.h>
|
||||
#include <crypto/lrw.h>
|
||||
#include <crypto/xts.h>
|
||||
#include <asm/xcr.h>
|
||||
#include <asm/xsave.h>
|
||||
#include <asm/crypto/camellia.h>
|
||||
#include <asm/crypto/glue_helper.h>
|
||||
|
||||
#define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
|
||||
#define CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS 32
|
||||
|
||||
/* 32-way AVX2/AES-NI parallel cipher functions */
|
||||
asmlinkage void camellia_ecb_enc_32way(struct camellia_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
asmlinkage void camellia_ecb_dec_32way(struct camellia_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
|
||||
asmlinkage void camellia_cbc_dec_32way(struct camellia_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
asmlinkage void camellia_ctr_32way(struct camellia_ctx *ctx, u8 *dst,
|
||||
const u8 *src, le128 *iv);
|
||||
|
||||
asmlinkage void camellia_xts_enc_32way(struct camellia_ctx *ctx, u8 *dst,
|
||||
const u8 *src, le128 *iv);
|
||||
asmlinkage void camellia_xts_dec_32way(struct camellia_ctx *ctx, u8 *dst,
|
||||
const u8 *src, le128 *iv);
|
||||
|
||||
static const struct common_glue_ctx camellia_enc = {
|
||||
.num_funcs = 4,
|
||||
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
|
||||
|
||||
.funcs = { {
|
||||
.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
|
||||
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_32way) }
|
||||
}, {
|
||||
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
|
||||
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_16way) }
|
||||
}, {
|
||||
.num_blocks = 2,
|
||||
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) }
|
||||
}, {
|
||||
.num_blocks = 1,
|
||||
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) }
|
||||
} }
|
||||
};
|
||||
|
||||
static const struct common_glue_ctx camellia_ctr = {
|
||||
.num_funcs = 4,
|
||||
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
|
||||
|
||||
.funcs = { {
|
||||
.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
|
||||
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_32way) }
|
||||
}, {
|
||||
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
|
||||
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_16way) }
|
||||
}, {
|
||||
.num_blocks = 2,
|
||||
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) }
|
||||
}, {
|
||||
.num_blocks = 1,
|
||||
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) }
|
||||
} }
|
||||
};
|
||||
|
||||
static const struct common_glue_ctx camellia_enc_xts = {
|
||||
.num_funcs = 3,
|
||||
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
|
||||
|
||||
.funcs = { {
|
||||
.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
|
||||
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_32way) }
|
||||
}, {
|
||||
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
|
||||
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_16way) }
|
||||
}, {
|
||||
.num_blocks = 1,
|
||||
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc) }
|
||||
} }
|
||||
};
|
||||
|
||||
static const struct common_glue_ctx camellia_dec = {
|
||||
.num_funcs = 4,
|
||||
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
|
||||
|
||||
.funcs = { {
|
||||
.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
|
||||
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_32way) }
|
||||
}, {
|
||||
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
|
||||
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_16way) }
|
||||
}, {
|
||||
.num_blocks = 2,
|
||||
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) }
|
||||
}, {
|
||||
.num_blocks = 1,
|
||||
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) }
|
||||
} }
|
||||
};
|
||||
|
||||
static const struct common_glue_ctx camellia_dec_cbc = {
|
||||
.num_funcs = 4,
|
||||
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
|
||||
|
||||
.funcs = { {
|
||||
.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
|
||||
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_32way) }
|
||||
}, {
|
||||
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
|
||||
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_16way) }
|
||||
}, {
|
||||
.num_blocks = 2,
|
||||
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) }
|
||||
}, {
|
||||
.num_blocks = 1,
|
||||
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) }
|
||||
} }
|
||||
};
|
||||
|
||||
static const struct common_glue_ctx camellia_dec_xts = {
|
||||
.num_funcs = 3,
|
||||
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
|
||||
|
||||
.funcs = { {
|
||||
.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
|
||||
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_32way) }
|
||||
}, {
|
||||
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
|
||||
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_16way) }
|
||||
}, {
|
||||
.num_blocks = 1,
|
||||
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec) }
|
||||
} }
|
||||
};
|
||||
|
||||
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes);
|
||||
}
|
||||
|
||||
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes);
|
||||
}
|
||||
|
||||
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc,
|
||||
dst, src, nbytes);
|
||||
}
|
||||
|
||||
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src,
|
||||
nbytes);
|
||||
}
|
||||
|
||||
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes);
|
||||
}
|
||||
|
||||
static inline bool camellia_fpu_begin(bool fpu_enabled, unsigned int nbytes)
|
||||
{
|
||||
return glue_fpu_begin(CAMELLIA_BLOCK_SIZE,
|
||||
CAMELLIA_AESNI_PARALLEL_BLOCKS, NULL, fpu_enabled,
|
||||
nbytes);
|
||||
}
|
||||
|
||||
static inline void camellia_fpu_end(bool fpu_enabled)
|
||||
{
|
||||
glue_fpu_end(fpu_enabled);
|
||||
}
|
||||
|
||||
static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len,
|
||||
&tfm->crt_flags);
|
||||
}
|
||||
|
||||
struct crypt_priv {
|
||||
struct camellia_ctx *ctx;
|
||||
bool fpu_enabled;
|
||||
};
|
||||
|
||||
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
|
||||
{
|
||||
const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
|
||||
struct crypt_priv *ctx = priv;
|
||||
int i;
|
||||
|
||||
ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
|
||||
|
||||
if (nbytes >= CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS * bsize) {
|
||||
camellia_ecb_enc_32way(ctx->ctx, srcdst, srcdst);
|
||||
srcdst += bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
|
||||
nbytes -= bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
|
||||
}
|
||||
|
||||
if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
|
||||
camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
|
||||
srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
|
||||
nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
|
||||
}
|
||||
|
||||
while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
|
||||
camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst);
|
||||
srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
|
||||
nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
|
||||
}
|
||||
|
||||
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
|
||||
camellia_enc_blk(ctx->ctx, srcdst, srcdst);
|
||||
}
|
||||
|
||||
static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
|
||||
{
|
||||
const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
|
||||
struct crypt_priv *ctx = priv;
|
||||
int i;
|
||||
|
||||
ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
|
||||
|
||||
if (nbytes >= CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS * bsize) {
|
||||
camellia_ecb_dec_32way(ctx->ctx, srcdst, srcdst);
|
||||
srcdst += bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
|
||||
nbytes -= bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
|
||||
}
|
||||
|
||||
if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
|
||||
camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
|
||||
srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
|
||||
nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
|
||||
}
|
||||
|
||||
while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
|
||||
camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst);
|
||||
srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
|
||||
nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
|
||||
}
|
||||
|
||||
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
|
||||
camellia_dec_blk(ctx->ctx, srcdst, srcdst);
|
||||
}
|
||||
|
||||
static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
be128 buf[CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS];
|
||||
struct crypt_priv crypt_ctx = {
|
||||
.ctx = &ctx->camellia_ctx,
|
||||
.fpu_enabled = false,
|
||||
};
|
||||
struct lrw_crypt_req req = {
|
||||
.tbuf = buf,
|
||||
.tbuflen = sizeof(buf),
|
||||
|
||||
.table_ctx = &ctx->lrw_table,
|
||||
.crypt_ctx = &crypt_ctx,
|
||||
.crypt_fn = encrypt_callback,
|
||||
};
|
||||
int ret;
|
||||
|
||||
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
ret = lrw_crypt(desc, dst, src, nbytes, &req);
|
||||
camellia_fpu_end(crypt_ctx.fpu_enabled);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
be128 buf[CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS];
|
||||
struct crypt_priv crypt_ctx = {
|
||||
.ctx = &ctx->camellia_ctx,
|
||||
.fpu_enabled = false,
|
||||
};
|
||||
struct lrw_crypt_req req = {
|
||||
.tbuf = buf,
|
||||
.tbuflen = sizeof(buf),
|
||||
|
||||
.table_ctx = &ctx->lrw_table,
|
||||
.crypt_ctx = &crypt_ctx,
|
||||
.crypt_fn = decrypt_callback,
|
||||
};
|
||||
int ret;
|
||||
|
||||
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
ret = lrw_crypt(desc, dst, src, nbytes, &req);
|
||||
camellia_fpu_end(crypt_ctx.fpu_enabled);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
|
||||
return glue_xts_crypt_128bit(&camellia_enc_xts, desc, dst, src, nbytes,
|
||||
XTS_TWEAK_CAST(camellia_enc_blk),
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx);
|
||||
}
|
||||
|
||||
static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
|
||||
return glue_xts_crypt_128bit(&camellia_dec_xts, desc, dst, src, nbytes,
|
||||
XTS_TWEAK_CAST(camellia_enc_blk),
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx);
|
||||
}
|
||||
|
||||
static struct crypto_alg cmll_algs[10] = { {
|
||||
.cra_name = "__ecb-camellia-aesni-avx2",
|
||||
.cra_driver_name = "__driver-ecb-camellia-aesni-avx2",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct camellia_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
|
||||
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
|
||||
.setkey = camellia_setkey,
|
||||
.encrypt = ecb_encrypt,
|
||||
.decrypt = ecb_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "__cbc-camellia-aesni-avx2",
|
||||
.cra_driver_name = "__driver-cbc-camellia-aesni-avx2",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct camellia_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
|
||||
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
|
||||
.setkey = camellia_setkey,
|
||||
.encrypt = cbc_encrypt,
|
||||
.decrypt = cbc_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "__ctr-camellia-aesni-avx2",
|
||||
.cra_driver_name = "__driver-ctr-camellia-aesni-avx2",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct camellia_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
|
||||
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
|
||||
.ivsize = CAMELLIA_BLOCK_SIZE,
|
||||
.setkey = camellia_setkey,
|
||||
.encrypt = ctr_crypt,
|
||||
.decrypt = ctr_crypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "__lrw-camellia-aesni-avx2",
|
||||
.cra_driver_name = "__driver-lrw-camellia-aesni-avx2",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct camellia_lrw_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_exit = lrw_camellia_exit_tfm,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = CAMELLIA_MIN_KEY_SIZE +
|
||||
CAMELLIA_BLOCK_SIZE,
|
||||
.max_keysize = CAMELLIA_MAX_KEY_SIZE +
|
||||
CAMELLIA_BLOCK_SIZE,
|
||||
.ivsize = CAMELLIA_BLOCK_SIZE,
|
||||
.setkey = lrw_camellia_setkey,
|
||||
.encrypt = lrw_encrypt,
|
||||
.decrypt = lrw_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "__xts-camellia-aesni-avx2",
|
||||
.cra_driver_name = "__driver-xts-camellia-aesni-avx2",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct camellia_xts_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = CAMELLIA_MIN_KEY_SIZE * 2,
|
||||
.max_keysize = CAMELLIA_MAX_KEY_SIZE * 2,
|
||||
.ivsize = CAMELLIA_BLOCK_SIZE,
|
||||
.setkey = xts_camellia_setkey,
|
||||
.encrypt = xts_encrypt,
|
||||
.decrypt = xts_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "ecb(camellia)",
|
||||
.cra_driver_name = "ecb-camellia-aesni-avx2",
|
||||
.cra_priority = 500,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
|
||||
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = ablk_encrypt,
|
||||
.decrypt = ablk_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "cbc(camellia)",
|
||||
.cra_driver_name = "cbc-camellia-aesni-avx2",
|
||||
.cra_priority = 500,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
|
||||
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
|
||||
.ivsize = CAMELLIA_BLOCK_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = __ablk_encrypt,
|
||||
.decrypt = ablk_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "ctr(camellia)",
|
||||
.cra_driver_name = "ctr-camellia-aesni-avx2",
|
||||
.cra_priority = 500,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
|
||||
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
|
||||
.ivsize = CAMELLIA_BLOCK_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = ablk_encrypt,
|
||||
.decrypt = ablk_encrypt,
|
||||
.geniv = "chainiv",
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "lrw(camellia)",
|
||||
.cra_driver_name = "lrw-camellia-aesni-avx2",
|
||||
.cra_priority = 500,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = CAMELLIA_MIN_KEY_SIZE +
|
||||
CAMELLIA_BLOCK_SIZE,
|
||||
.max_keysize = CAMELLIA_MAX_KEY_SIZE +
|
||||
CAMELLIA_BLOCK_SIZE,
|
||||
.ivsize = CAMELLIA_BLOCK_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = ablk_encrypt,
|
||||
.decrypt = ablk_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "xts(camellia)",
|
||||
.cra_driver_name = "xts-camellia-aesni-avx2",
|
||||
.cra_priority = 500,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = CAMELLIA_MIN_KEY_SIZE * 2,
|
||||
.max_keysize = CAMELLIA_MAX_KEY_SIZE * 2,
|
||||
.ivsize = CAMELLIA_BLOCK_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = ablk_encrypt,
|
||||
.decrypt = ablk_decrypt,
|
||||
},
|
||||
},
|
||||
} };
|
||||
|
||||
static int __init camellia_aesni_init(void)
|
||||
{
|
||||
u64 xcr0;
|
||||
|
||||
if (!cpu_has_avx2 || !cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
|
||||
pr_info("AVX2 or AES-NI instructions are not detected.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
|
||||
if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
|
||||
pr_info("AVX2 detected but unusable.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
return crypto_register_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
|
||||
}
|
||||
|
||||
static void __exit camellia_aesni_fini(void)
|
||||
{
|
||||
crypto_unregister_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
|
||||
}
|
||||
|
||||
module_init(camellia_aesni_init);
|
||||
module_exit(camellia_aesni_fini);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("Camellia Cipher Algorithm, AES-NI/AVX2 optimized");
|
||||
MODULE_ALIAS_CRYPTO("camellia");
|
||||
MODULE_ALIAS_CRYPTO("camellia-asm");
|
||||
578
arch/x86/crypto/camellia_aesni_avx_glue.c
Normal file
578
arch/x86/crypto/camellia_aesni_avx_glue.c
Normal file
|
|
@ -0,0 +1,578 @@
|
|||
/*
|
||||
* Glue Code for x86_64/AVX/AES-NI assembler optimized version of Camellia
|
||||
*
|
||||
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/err.h>
|
||||
#include <crypto/ablk_helper.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/ctr.h>
|
||||
#include <crypto/lrw.h>
|
||||
#include <crypto/xts.h>
|
||||
#include <asm/xcr.h>
|
||||
#include <asm/xsave.h>
|
||||
#include <asm/crypto/camellia.h>
|
||||
#include <asm/crypto/glue_helper.h>
|
||||
|
||||
#define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
|
||||
|
||||
/* 16-way parallel cipher functions (avx/aes-ni) */
|
||||
asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
EXPORT_SYMBOL_GPL(camellia_ecb_enc_16way);
|
||||
|
||||
asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
EXPORT_SYMBOL_GPL(camellia_ecb_dec_16way);
|
||||
|
||||
asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
EXPORT_SYMBOL_GPL(camellia_cbc_dec_16way);
|
||||
|
||||
asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst,
|
||||
const u8 *src, le128 *iv);
|
||||
EXPORT_SYMBOL_GPL(camellia_ctr_16way);
|
||||
|
||||
asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst,
|
||||
const u8 *src, le128 *iv);
|
||||
EXPORT_SYMBOL_GPL(camellia_xts_enc_16way);
|
||||
|
||||
asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst,
|
||||
const u8 *src, le128 *iv);
|
||||
EXPORT_SYMBOL_GPL(camellia_xts_dec_16way);
|
||||
|
||||
void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
|
||||
{
|
||||
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
|
||||
GLUE_FUNC_CAST(camellia_enc_blk));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(camellia_xts_enc);
|
||||
|
||||
void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
|
||||
{
|
||||
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
|
||||
GLUE_FUNC_CAST(camellia_dec_blk));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(camellia_xts_dec);
|
||||
|
||||
static const struct common_glue_ctx camellia_enc = {
|
||||
.num_funcs = 3,
|
||||
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
|
||||
|
||||
.funcs = { {
|
||||
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
|
||||
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_16way) }
|
||||
}, {
|
||||
.num_blocks = 2,
|
||||
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) }
|
||||
}, {
|
||||
.num_blocks = 1,
|
||||
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) }
|
||||
} }
|
||||
};
|
||||
|
||||
static const struct common_glue_ctx camellia_ctr = {
|
||||
.num_funcs = 3,
|
||||
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
|
||||
|
||||
.funcs = { {
|
||||
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
|
||||
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_16way) }
|
||||
}, {
|
||||
.num_blocks = 2,
|
||||
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) }
|
||||
}, {
|
||||
.num_blocks = 1,
|
||||
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) }
|
||||
} }
|
||||
};
|
||||
|
||||
static const struct common_glue_ctx camellia_enc_xts = {
|
||||
.num_funcs = 2,
|
||||
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
|
||||
|
||||
.funcs = { {
|
||||
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
|
||||
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_16way) }
|
||||
}, {
|
||||
.num_blocks = 1,
|
||||
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc) }
|
||||
} }
|
||||
};
|
||||
|
||||
static const struct common_glue_ctx camellia_dec = {
|
||||
.num_funcs = 3,
|
||||
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
|
||||
|
||||
.funcs = { {
|
||||
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
|
||||
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_16way) }
|
||||
}, {
|
||||
.num_blocks = 2,
|
||||
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) }
|
||||
}, {
|
||||
.num_blocks = 1,
|
||||
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) }
|
||||
} }
|
||||
};
|
||||
|
||||
static const struct common_glue_ctx camellia_dec_cbc = {
|
||||
.num_funcs = 3,
|
||||
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
|
||||
|
||||
.funcs = { {
|
||||
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
|
||||
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_16way) }
|
||||
}, {
|
||||
.num_blocks = 2,
|
||||
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) }
|
||||
}, {
|
||||
.num_blocks = 1,
|
||||
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) }
|
||||
} }
|
||||
};
|
||||
|
||||
static const struct common_glue_ctx camellia_dec_xts = {
|
||||
.num_funcs = 2,
|
||||
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
|
||||
|
||||
.funcs = { {
|
||||
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
|
||||
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_16way) }
|
||||
}, {
|
||||
.num_blocks = 1,
|
||||
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec) }
|
||||
} }
|
||||
};
|
||||
|
||||
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes);
|
||||
}
|
||||
|
||||
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes);
|
||||
}
|
||||
|
||||
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc,
|
||||
dst, src, nbytes);
|
||||
}
|
||||
|
||||
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src,
|
||||
nbytes);
|
||||
}
|
||||
|
||||
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes);
|
||||
}
|
||||
|
||||
static inline bool camellia_fpu_begin(bool fpu_enabled, unsigned int nbytes)
|
||||
{
|
||||
return glue_fpu_begin(CAMELLIA_BLOCK_SIZE,
|
||||
CAMELLIA_AESNI_PARALLEL_BLOCKS, NULL, fpu_enabled,
|
||||
nbytes);
|
||||
}
|
||||
|
||||
static inline void camellia_fpu_end(bool fpu_enabled)
|
||||
{
|
||||
glue_fpu_end(fpu_enabled);
|
||||
}
|
||||
|
||||
static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len,
|
||||
&tfm->crt_flags);
|
||||
}
|
||||
|
||||
struct crypt_priv {
|
||||
struct camellia_ctx *ctx;
|
||||
bool fpu_enabled;
|
||||
};
|
||||
|
||||
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
|
||||
{
|
||||
const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
|
||||
struct crypt_priv *ctx = priv;
|
||||
int i;
|
||||
|
||||
ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
|
||||
|
||||
if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
|
||||
camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
|
||||
srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
|
||||
nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
|
||||
}
|
||||
|
||||
while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
|
||||
camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst);
|
||||
srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
|
||||
nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
|
||||
}
|
||||
|
||||
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
|
||||
camellia_enc_blk(ctx->ctx, srcdst, srcdst);
|
||||
}
|
||||
|
||||
static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
|
||||
{
|
||||
const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
|
||||
struct crypt_priv *ctx = priv;
|
||||
int i;
|
||||
|
||||
ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
|
||||
|
||||
if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
|
||||
camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
|
||||
srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
|
||||
nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
|
||||
}
|
||||
|
||||
while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
|
||||
camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst);
|
||||
srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
|
||||
nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
|
||||
}
|
||||
|
||||
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
|
||||
camellia_dec_blk(ctx->ctx, srcdst, srcdst);
|
||||
}
|
||||
|
||||
static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
|
||||
struct crypt_priv crypt_ctx = {
|
||||
.ctx = &ctx->camellia_ctx,
|
||||
.fpu_enabled = false,
|
||||
};
|
||||
struct lrw_crypt_req req = {
|
||||
.tbuf = buf,
|
||||
.tbuflen = sizeof(buf),
|
||||
|
||||
.table_ctx = &ctx->lrw_table,
|
||||
.crypt_ctx = &crypt_ctx,
|
||||
.crypt_fn = encrypt_callback,
|
||||
};
|
||||
int ret;
|
||||
|
||||
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
ret = lrw_crypt(desc, dst, src, nbytes, &req);
|
||||
camellia_fpu_end(crypt_ctx.fpu_enabled);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
|
||||
struct crypt_priv crypt_ctx = {
|
||||
.ctx = &ctx->camellia_ctx,
|
||||
.fpu_enabled = false,
|
||||
};
|
||||
struct lrw_crypt_req req = {
|
||||
.tbuf = buf,
|
||||
.tbuflen = sizeof(buf),
|
||||
|
||||
.table_ctx = &ctx->lrw_table,
|
||||
.crypt_ctx = &crypt_ctx,
|
||||
.crypt_fn = decrypt_callback,
|
||||
};
|
||||
int ret;
|
||||
|
||||
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
ret = lrw_crypt(desc, dst, src, nbytes, &req);
|
||||
camellia_fpu_end(crypt_ctx.fpu_enabled);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
|
||||
return glue_xts_crypt_128bit(&camellia_enc_xts, desc, dst, src, nbytes,
|
||||
XTS_TWEAK_CAST(camellia_enc_blk),
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx);
|
||||
}
|
||||
|
||||
static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
|
||||
return glue_xts_crypt_128bit(&camellia_dec_xts, desc, dst, src, nbytes,
|
||||
XTS_TWEAK_CAST(camellia_enc_blk),
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx);
|
||||
}
|
||||
|
||||
static struct crypto_alg cmll_algs[10] = { {
|
||||
.cra_name = "__ecb-camellia-aesni",
|
||||
.cra_driver_name = "__driver-ecb-camellia-aesni",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct camellia_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
|
||||
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
|
||||
.setkey = camellia_setkey,
|
||||
.encrypt = ecb_encrypt,
|
||||
.decrypt = ecb_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "__cbc-camellia-aesni",
|
||||
.cra_driver_name = "__driver-cbc-camellia-aesni",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct camellia_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
|
||||
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
|
||||
.setkey = camellia_setkey,
|
||||
.encrypt = cbc_encrypt,
|
||||
.decrypt = cbc_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "__ctr-camellia-aesni",
|
||||
.cra_driver_name = "__driver-ctr-camellia-aesni",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct camellia_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
|
||||
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
|
||||
.ivsize = CAMELLIA_BLOCK_SIZE,
|
||||
.setkey = camellia_setkey,
|
||||
.encrypt = ctr_crypt,
|
||||
.decrypt = ctr_crypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "__lrw-camellia-aesni",
|
||||
.cra_driver_name = "__driver-lrw-camellia-aesni",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct camellia_lrw_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_exit = lrw_camellia_exit_tfm,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = CAMELLIA_MIN_KEY_SIZE +
|
||||
CAMELLIA_BLOCK_SIZE,
|
||||
.max_keysize = CAMELLIA_MAX_KEY_SIZE +
|
||||
CAMELLIA_BLOCK_SIZE,
|
||||
.ivsize = CAMELLIA_BLOCK_SIZE,
|
||||
.setkey = lrw_camellia_setkey,
|
||||
.encrypt = lrw_encrypt,
|
||||
.decrypt = lrw_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "__xts-camellia-aesni",
|
||||
.cra_driver_name = "__driver-xts-camellia-aesni",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct camellia_xts_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = CAMELLIA_MIN_KEY_SIZE * 2,
|
||||
.max_keysize = CAMELLIA_MAX_KEY_SIZE * 2,
|
||||
.ivsize = CAMELLIA_BLOCK_SIZE,
|
||||
.setkey = xts_camellia_setkey,
|
||||
.encrypt = xts_encrypt,
|
||||
.decrypt = xts_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "ecb(camellia)",
|
||||
.cra_driver_name = "ecb-camellia-aesni",
|
||||
.cra_priority = 400,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
|
||||
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = ablk_encrypt,
|
||||
.decrypt = ablk_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "cbc(camellia)",
|
||||
.cra_driver_name = "cbc-camellia-aesni",
|
||||
.cra_priority = 400,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
|
||||
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
|
||||
.ivsize = CAMELLIA_BLOCK_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = __ablk_encrypt,
|
||||
.decrypt = ablk_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "ctr(camellia)",
|
||||
.cra_driver_name = "ctr-camellia-aesni",
|
||||
.cra_priority = 400,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
|
||||
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
|
||||
.ivsize = CAMELLIA_BLOCK_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = ablk_encrypt,
|
||||
.decrypt = ablk_encrypt,
|
||||
.geniv = "chainiv",
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "lrw(camellia)",
|
||||
.cra_driver_name = "lrw-camellia-aesni",
|
||||
.cra_priority = 400,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = CAMELLIA_MIN_KEY_SIZE +
|
||||
CAMELLIA_BLOCK_SIZE,
|
||||
.max_keysize = CAMELLIA_MAX_KEY_SIZE +
|
||||
CAMELLIA_BLOCK_SIZE,
|
||||
.ivsize = CAMELLIA_BLOCK_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = ablk_encrypt,
|
||||
.decrypt = ablk_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "xts(camellia)",
|
||||
.cra_driver_name = "xts-camellia-aesni",
|
||||
.cra_priority = 400,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = CAMELLIA_MIN_KEY_SIZE * 2,
|
||||
.max_keysize = CAMELLIA_MAX_KEY_SIZE * 2,
|
||||
.ivsize = CAMELLIA_BLOCK_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = ablk_encrypt,
|
||||
.decrypt = ablk_decrypt,
|
||||
},
|
||||
},
|
||||
} };
|
||||
|
||||
static int __init camellia_aesni_init(void)
|
||||
{
|
||||
u64 xcr0;
|
||||
|
||||
if (!cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
|
||||
pr_info("AVX or AES-NI instructions are not detected.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
|
||||
if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
|
||||
pr_info("AVX detected but unusable.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
return crypto_register_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
|
||||
}
|
||||
|
||||
static void __exit camellia_aesni_fini(void)
|
||||
{
|
||||
crypto_unregister_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
|
||||
}
|
||||
|
||||
module_init(camellia_aesni_init);
|
||||
module_exit(camellia_aesni_fini);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("Camellia Cipher Algorithm, AES-NI/AVX optimized");
|
||||
MODULE_ALIAS_CRYPTO("camellia");
|
||||
MODULE_ALIAS_CRYPTO("camellia-asm");
|
||||
1729
arch/x86/crypto/camellia_glue.c
Normal file
1729
arch/x86/crypto/camellia_glue.c
Normal file
File diff suppressed because it is too large
Load diff
546
arch/x86/crypto/cast5-avx-x86_64-asm_64.S
Normal file
546
arch/x86/crypto/cast5-avx-x86_64-asm_64.S
Normal file
|
|
@ -0,0 +1,546 @@
|
|||
/*
|
||||
* Cast5 Cipher 16-way parallel algorithm (AVX/x86_64)
|
||||
*
|
||||
* Copyright (C) 2012 Johannes Goetzfried
|
||||
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
|
||||
*
|
||||
* Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
|
||||
* USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.file "cast5-avx-x86_64-asm_64.S"
|
||||
|
||||
.extern cast_s1
|
||||
.extern cast_s2
|
||||
.extern cast_s3
|
||||
.extern cast_s4
|
||||
|
||||
/* structure of crypto context */
|
||||
#define km 0
|
||||
#define kr (16*4)
|
||||
#define rr ((16*4)+16)
|
||||
|
||||
/* s-boxes */
|
||||
#define s1 cast_s1
|
||||
#define s2 cast_s2
|
||||
#define s3 cast_s3
|
||||
#define s4 cast_s4
|
||||
|
||||
/**********************************************************************
|
||||
16-way AVX cast5
|
||||
**********************************************************************/
|
||||
#define CTX %rdi
|
||||
|
||||
#define RL1 %xmm0
|
||||
#define RR1 %xmm1
|
||||
#define RL2 %xmm2
|
||||
#define RR2 %xmm3
|
||||
#define RL3 %xmm4
|
||||
#define RR3 %xmm5
|
||||
#define RL4 %xmm6
|
||||
#define RR4 %xmm7
|
||||
|
||||
#define RX %xmm8
|
||||
|
||||
#define RKM %xmm9
|
||||
#define RKR %xmm10
|
||||
#define RKRF %xmm11
|
||||
#define RKRR %xmm12
|
||||
|
||||
#define R32 %xmm13
|
||||
#define R1ST %xmm14
|
||||
|
||||
#define RTMP %xmm15
|
||||
|
||||
#define RID1 %rbp
|
||||
#define RID1d %ebp
|
||||
#define RID2 %rsi
|
||||
#define RID2d %esi
|
||||
|
||||
#define RGI1 %rdx
|
||||
#define RGI1bl %dl
|
||||
#define RGI1bh %dh
|
||||
#define RGI2 %rcx
|
||||
#define RGI2bl %cl
|
||||
#define RGI2bh %ch
|
||||
|
||||
#define RGI3 %rax
|
||||
#define RGI3bl %al
|
||||
#define RGI3bh %ah
|
||||
#define RGI4 %rbx
|
||||
#define RGI4bl %bl
|
||||
#define RGI4bh %bh
|
||||
|
||||
#define RFS1 %r8
|
||||
#define RFS1d %r8d
|
||||
#define RFS2 %r9
|
||||
#define RFS2d %r9d
|
||||
#define RFS3 %r10
|
||||
#define RFS3d %r10d
|
||||
|
||||
|
||||
#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \
|
||||
movzbl src ## bh, RID1d; \
|
||||
movzbl src ## bl, RID2d; \
|
||||
shrq $16, src; \
|
||||
movl s1(, RID1, 4), dst ## d; \
|
||||
op1 s2(, RID2, 4), dst ## d; \
|
||||
movzbl src ## bh, RID1d; \
|
||||
movzbl src ## bl, RID2d; \
|
||||
interleave_op(il_reg); \
|
||||
op2 s3(, RID1, 4), dst ## d; \
|
||||
op3 s4(, RID2, 4), dst ## d;
|
||||
|
||||
#define dummy(d) /* do nothing */
|
||||
|
||||
#define shr_next(reg) \
|
||||
shrq $16, reg;
|
||||
|
||||
#define F_head(a, x, gi1, gi2, op0) \
|
||||
op0 a, RKM, x; \
|
||||
vpslld RKRF, x, RTMP; \
|
||||
vpsrld RKRR, x, x; \
|
||||
vpor RTMP, x, x; \
|
||||
\
|
||||
vmovq x, gi1; \
|
||||
vpextrq $1, x, gi2;
|
||||
|
||||
#define F_tail(a, x, gi1, gi2, op1, op2, op3) \
|
||||
lookup_32bit(##gi1, RFS1, op1, op2, op3, shr_next, ##gi1); \
|
||||
lookup_32bit(##gi2, RFS3, op1, op2, op3, shr_next, ##gi2); \
|
||||
\
|
||||
lookup_32bit(##gi1, RFS2, op1, op2, op3, dummy, none); \
|
||||
shlq $32, RFS2; \
|
||||
orq RFS1, RFS2; \
|
||||
lookup_32bit(##gi2, RFS1, op1, op2, op3, dummy, none); \
|
||||
shlq $32, RFS1; \
|
||||
orq RFS1, RFS3; \
|
||||
\
|
||||
vmovq RFS2, x; \
|
||||
vpinsrq $1, RFS3, x, x;
|
||||
|
||||
#define F_2(a1, b1, a2, b2, op0, op1, op2, op3) \
|
||||
F_head(b1, RX, RGI1, RGI2, op0); \
|
||||
F_head(b2, RX, RGI3, RGI4, op0); \
|
||||
\
|
||||
F_tail(b1, RX, RGI1, RGI2, op1, op2, op3); \
|
||||
F_tail(b2, RTMP, RGI3, RGI4, op1, op2, op3); \
|
||||
\
|
||||
vpxor a1, RX, a1; \
|
||||
vpxor a2, RTMP, a2;
|
||||
|
||||
#define F1_2(a1, b1, a2, b2) \
|
||||
F_2(a1, b1, a2, b2, vpaddd, xorl, subl, addl)
|
||||
#define F2_2(a1, b1, a2, b2) \
|
||||
F_2(a1, b1, a2, b2, vpxor, subl, addl, xorl)
|
||||
#define F3_2(a1, b1, a2, b2) \
|
||||
F_2(a1, b1, a2, b2, vpsubd, addl, xorl, subl)
|
||||
|
||||
#define subround(a1, b1, a2, b2, f) \
|
||||
F ## f ## _2(a1, b1, a2, b2);
|
||||
|
||||
#define round(l, r, n, f) \
|
||||
vbroadcastss (km+(4*n))(CTX), RKM; \
|
||||
vpand R1ST, RKR, RKRF; \
|
||||
vpsubq RKRF, R32, RKRR; \
|
||||
vpsrldq $1, RKR, RKR; \
|
||||
subround(l ## 1, r ## 1, l ## 2, r ## 2, f); \
|
||||
subround(l ## 3, r ## 3, l ## 4, r ## 4, f);
|
||||
|
||||
#define enc_preload_rkr() \
|
||||
vbroadcastss .L16_mask, RKR; \
|
||||
/* add 16-bit rotation to key rotations (mod 32) */ \
|
||||
vpxor kr(CTX), RKR, RKR;
|
||||
|
||||
#define dec_preload_rkr() \
|
||||
vbroadcastss .L16_mask, RKR; \
|
||||
/* add 16-bit rotation to key rotations (mod 32) */ \
|
||||
vpxor kr(CTX), RKR, RKR; \
|
||||
vpshufb .Lbswap128_mask, RKR, RKR;
|
||||
|
||||
#define transpose_2x4(x0, x1, t0, t1) \
|
||||
vpunpckldq x1, x0, t0; \
|
||||
vpunpckhdq x1, x0, t1; \
|
||||
\
|
||||
vpunpcklqdq t1, t0, x0; \
|
||||
vpunpckhqdq t1, t0, x1;
|
||||
|
||||
#define inpack_blocks(x0, x1, t0, t1, rmask) \
|
||||
vpshufb rmask, x0, x0; \
|
||||
vpshufb rmask, x1, x1; \
|
||||
\
|
||||
transpose_2x4(x0, x1, t0, t1)
|
||||
|
||||
#define outunpack_blocks(x0, x1, t0, t1, rmask) \
|
||||
transpose_2x4(x0, x1, t0, t1) \
|
||||
\
|
||||
vpshufb rmask, x0, x0; \
|
||||
vpshufb rmask, x1, x1;
|
||||
|
||||
.data
|
||||
|
||||
.align 16
|
||||
.Lbswap_mask:
|
||||
.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
|
||||
.Lbswap128_mask:
|
||||
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
.Lbswap_iv_mask:
|
||||
.byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
.L16_mask:
|
||||
.byte 16, 16, 16, 16
|
||||
.L32_mask:
|
||||
.byte 32, 0, 0, 0
|
||||
.Lfirst_mask:
|
||||
.byte 0x1f, 0, 0, 0
|
||||
|
||||
.text
|
||||
|
||||
.align 16
|
||||
__cast5_enc_blk16:
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* RL1: blocks 1 and 2
|
||||
* RR1: blocks 3 and 4
|
||||
* RL2: blocks 5 and 6
|
||||
* RR2: blocks 7 and 8
|
||||
* RL3: blocks 9 and 10
|
||||
* RR3: blocks 11 and 12
|
||||
* RL4: blocks 13 and 14
|
||||
* RR4: blocks 15 and 16
|
||||
* output:
|
||||
* RL1: encrypted blocks 1 and 2
|
||||
* RR1: encrypted blocks 3 and 4
|
||||
* RL2: encrypted blocks 5 and 6
|
||||
* RR2: encrypted blocks 7 and 8
|
||||
* RL3: encrypted blocks 9 and 10
|
||||
* RR3: encrypted blocks 11 and 12
|
||||
* RL4: encrypted blocks 13 and 14
|
||||
* RR4: encrypted blocks 15 and 16
|
||||
*/
|
||||
|
||||
pushq %rbp;
|
||||
pushq %rbx;
|
||||
|
||||
vmovdqa .Lbswap_mask, RKM;
|
||||
vmovd .Lfirst_mask, R1ST;
|
||||
vmovd .L32_mask, R32;
|
||||
enc_preload_rkr();
|
||||
|
||||
inpack_blocks(RL1, RR1, RTMP, RX, RKM);
|
||||
inpack_blocks(RL2, RR2, RTMP, RX, RKM);
|
||||
inpack_blocks(RL3, RR3, RTMP, RX, RKM);
|
||||
inpack_blocks(RL4, RR4, RTMP, RX, RKM);
|
||||
|
||||
round(RL, RR, 0, 1);
|
||||
round(RR, RL, 1, 2);
|
||||
round(RL, RR, 2, 3);
|
||||
round(RR, RL, 3, 1);
|
||||
round(RL, RR, 4, 2);
|
||||
round(RR, RL, 5, 3);
|
||||
round(RL, RR, 6, 1);
|
||||
round(RR, RL, 7, 2);
|
||||
round(RL, RR, 8, 3);
|
||||
round(RR, RL, 9, 1);
|
||||
round(RL, RR, 10, 2);
|
||||
round(RR, RL, 11, 3);
|
||||
|
||||
movzbl rr(CTX), %eax;
|
||||
testl %eax, %eax;
|
||||
jnz .L__skip_enc;
|
||||
|
||||
round(RL, RR, 12, 1);
|
||||
round(RR, RL, 13, 2);
|
||||
round(RL, RR, 14, 3);
|
||||
round(RR, RL, 15, 1);
|
||||
|
||||
.L__skip_enc:
|
||||
popq %rbx;
|
||||
popq %rbp;
|
||||
|
||||
vmovdqa .Lbswap_mask, RKM;
|
||||
|
||||
outunpack_blocks(RR1, RL1, RTMP, RX, RKM);
|
||||
outunpack_blocks(RR2, RL2, RTMP, RX, RKM);
|
||||
outunpack_blocks(RR3, RL3, RTMP, RX, RKM);
|
||||
outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
|
||||
|
||||
ret;
|
||||
ENDPROC(__cast5_enc_blk16)
|
||||
|
||||
.align 16
|
||||
__cast5_dec_blk16:
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* RL1: encrypted blocks 1 and 2
|
||||
* RR1: encrypted blocks 3 and 4
|
||||
* RL2: encrypted blocks 5 and 6
|
||||
* RR2: encrypted blocks 7 and 8
|
||||
* RL3: encrypted blocks 9 and 10
|
||||
* RR3: encrypted blocks 11 and 12
|
||||
* RL4: encrypted blocks 13 and 14
|
||||
* RR4: encrypted blocks 15 and 16
|
||||
* output:
|
||||
* RL1: decrypted blocks 1 and 2
|
||||
* RR1: decrypted blocks 3 and 4
|
||||
* RL2: decrypted blocks 5 and 6
|
||||
* RR2: decrypted blocks 7 and 8
|
||||
* RL3: decrypted blocks 9 and 10
|
||||
* RR3: decrypted blocks 11 and 12
|
||||
* RL4: decrypted blocks 13 and 14
|
||||
* RR4: decrypted blocks 15 and 16
|
||||
*/
|
||||
|
||||
pushq %rbp;
|
||||
pushq %rbx;
|
||||
|
||||
vmovdqa .Lbswap_mask, RKM;
|
||||
vmovd .Lfirst_mask, R1ST;
|
||||
vmovd .L32_mask, R32;
|
||||
dec_preload_rkr();
|
||||
|
||||
inpack_blocks(RL1, RR1, RTMP, RX, RKM);
|
||||
inpack_blocks(RL2, RR2, RTMP, RX, RKM);
|
||||
inpack_blocks(RL3, RR3, RTMP, RX, RKM);
|
||||
inpack_blocks(RL4, RR4, RTMP, RX, RKM);
|
||||
|
||||
movzbl rr(CTX), %eax;
|
||||
testl %eax, %eax;
|
||||
jnz .L__skip_dec;
|
||||
|
||||
round(RL, RR, 15, 1);
|
||||
round(RR, RL, 14, 3);
|
||||
round(RL, RR, 13, 2);
|
||||
round(RR, RL, 12, 1);
|
||||
|
||||
.L__dec_tail:
|
||||
round(RL, RR, 11, 3);
|
||||
round(RR, RL, 10, 2);
|
||||
round(RL, RR, 9, 1);
|
||||
round(RR, RL, 8, 3);
|
||||
round(RL, RR, 7, 2);
|
||||
round(RR, RL, 6, 1);
|
||||
round(RL, RR, 5, 3);
|
||||
round(RR, RL, 4, 2);
|
||||
round(RL, RR, 3, 1);
|
||||
round(RR, RL, 2, 3);
|
||||
round(RL, RR, 1, 2);
|
||||
round(RR, RL, 0, 1);
|
||||
|
||||
vmovdqa .Lbswap_mask, RKM;
|
||||
popq %rbx;
|
||||
popq %rbp;
|
||||
|
||||
outunpack_blocks(RR1, RL1, RTMP, RX, RKM);
|
||||
outunpack_blocks(RR2, RL2, RTMP, RX, RKM);
|
||||
outunpack_blocks(RR3, RL3, RTMP, RX, RKM);
|
||||
outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
|
||||
|
||||
ret;
|
||||
|
||||
.L__skip_dec:
|
||||
vpsrldq $4, RKR, RKR;
|
||||
jmp .L__dec_tail;
|
||||
ENDPROC(__cast5_dec_blk16)
|
||||
|
||||
ENTRY(cast5_ecb_enc_16way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
* %rdx: src
|
||||
*/
|
||||
|
||||
movq %rsi, %r11;
|
||||
|
||||
vmovdqu (0*4*4)(%rdx), RL1;
|
||||
vmovdqu (1*4*4)(%rdx), RR1;
|
||||
vmovdqu (2*4*4)(%rdx), RL2;
|
||||
vmovdqu (3*4*4)(%rdx), RR2;
|
||||
vmovdqu (4*4*4)(%rdx), RL3;
|
||||
vmovdqu (5*4*4)(%rdx), RR3;
|
||||
vmovdqu (6*4*4)(%rdx), RL4;
|
||||
vmovdqu (7*4*4)(%rdx), RR4;
|
||||
|
||||
call __cast5_enc_blk16;
|
||||
|
||||
vmovdqu RR1, (0*4*4)(%r11);
|
||||
vmovdqu RL1, (1*4*4)(%r11);
|
||||
vmovdqu RR2, (2*4*4)(%r11);
|
||||
vmovdqu RL2, (3*4*4)(%r11);
|
||||
vmovdqu RR3, (4*4*4)(%r11);
|
||||
vmovdqu RL3, (5*4*4)(%r11);
|
||||
vmovdqu RR4, (6*4*4)(%r11);
|
||||
vmovdqu RL4, (7*4*4)(%r11);
|
||||
|
||||
ret;
|
||||
ENDPROC(cast5_ecb_enc_16way)
|
||||
|
||||
ENTRY(cast5_ecb_dec_16way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
* %rdx: src
|
||||
*/
|
||||
|
||||
movq %rsi, %r11;
|
||||
|
||||
vmovdqu (0*4*4)(%rdx), RL1;
|
||||
vmovdqu (1*4*4)(%rdx), RR1;
|
||||
vmovdqu (2*4*4)(%rdx), RL2;
|
||||
vmovdqu (3*4*4)(%rdx), RR2;
|
||||
vmovdqu (4*4*4)(%rdx), RL3;
|
||||
vmovdqu (5*4*4)(%rdx), RR3;
|
||||
vmovdqu (6*4*4)(%rdx), RL4;
|
||||
vmovdqu (7*4*4)(%rdx), RR4;
|
||||
|
||||
call __cast5_dec_blk16;
|
||||
|
||||
vmovdqu RR1, (0*4*4)(%r11);
|
||||
vmovdqu RL1, (1*4*4)(%r11);
|
||||
vmovdqu RR2, (2*4*4)(%r11);
|
||||
vmovdqu RL2, (3*4*4)(%r11);
|
||||
vmovdqu RR3, (4*4*4)(%r11);
|
||||
vmovdqu RL3, (5*4*4)(%r11);
|
||||
vmovdqu RR4, (6*4*4)(%r11);
|
||||
vmovdqu RL4, (7*4*4)(%r11);
|
||||
|
||||
ret;
|
||||
ENDPROC(cast5_ecb_dec_16way)
|
||||
|
||||
ENTRY(cast5_cbc_dec_16way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
* %rdx: src
|
||||
*/
|
||||
|
||||
pushq %r12;
|
||||
|
||||
movq %rsi, %r11;
|
||||
movq %rdx, %r12;
|
||||
|
||||
vmovdqu (0*16)(%rdx), RL1;
|
||||
vmovdqu (1*16)(%rdx), RR1;
|
||||
vmovdqu (2*16)(%rdx), RL2;
|
||||
vmovdqu (3*16)(%rdx), RR2;
|
||||
vmovdqu (4*16)(%rdx), RL3;
|
||||
vmovdqu (5*16)(%rdx), RR3;
|
||||
vmovdqu (6*16)(%rdx), RL4;
|
||||
vmovdqu (7*16)(%rdx), RR4;
|
||||
|
||||
call __cast5_dec_blk16;
|
||||
|
||||
/* xor with src */
|
||||
vmovq (%r12), RX;
|
||||
vpshufd $0x4f, RX, RX;
|
||||
vpxor RX, RR1, RR1;
|
||||
vpxor 0*16+8(%r12), RL1, RL1;
|
||||
vpxor 1*16+8(%r12), RR2, RR2;
|
||||
vpxor 2*16+8(%r12), RL2, RL2;
|
||||
vpxor 3*16+8(%r12), RR3, RR3;
|
||||
vpxor 4*16+8(%r12), RL3, RL3;
|
||||
vpxor 5*16+8(%r12), RR4, RR4;
|
||||
vpxor 6*16+8(%r12), RL4, RL4;
|
||||
|
||||
vmovdqu RR1, (0*16)(%r11);
|
||||
vmovdqu RL1, (1*16)(%r11);
|
||||
vmovdqu RR2, (2*16)(%r11);
|
||||
vmovdqu RL2, (3*16)(%r11);
|
||||
vmovdqu RR3, (4*16)(%r11);
|
||||
vmovdqu RL3, (5*16)(%r11);
|
||||
vmovdqu RR4, (6*16)(%r11);
|
||||
vmovdqu RL4, (7*16)(%r11);
|
||||
|
||||
popq %r12;
|
||||
|
||||
ret;
|
||||
ENDPROC(cast5_cbc_dec_16way)
|
||||
|
||||
ENTRY(cast5_ctr_16way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
* %rdx: src
|
||||
* %rcx: iv (big endian, 64bit)
|
||||
*/
|
||||
|
||||
pushq %r12;
|
||||
|
||||
movq %rsi, %r11;
|
||||
movq %rdx, %r12;
|
||||
|
||||
vpcmpeqd RTMP, RTMP, RTMP;
|
||||
vpsrldq $8, RTMP, RTMP; /* low: -1, high: 0 */
|
||||
|
||||
vpcmpeqd RKR, RKR, RKR;
|
||||
vpaddq RKR, RKR, RKR; /* low: -2, high: -2 */
|
||||
vmovdqa .Lbswap_iv_mask, R1ST;
|
||||
vmovdqa .Lbswap128_mask, RKM;
|
||||
|
||||
/* load IV and byteswap */
|
||||
vmovq (%rcx), RX;
|
||||
vpshufb R1ST, RX, RX;
|
||||
|
||||
/* construct IVs */
|
||||
vpsubq RTMP, RX, RX; /* le: IV1, IV0 */
|
||||
vpshufb RKM, RX, RL1; /* be: IV0, IV1 */
|
||||
vpsubq RKR, RX, RX;
|
||||
vpshufb RKM, RX, RR1; /* be: IV2, IV3 */
|
||||
vpsubq RKR, RX, RX;
|
||||
vpshufb RKM, RX, RL2; /* be: IV4, IV5 */
|
||||
vpsubq RKR, RX, RX;
|
||||
vpshufb RKM, RX, RR2; /* be: IV6, IV7 */
|
||||
vpsubq RKR, RX, RX;
|
||||
vpshufb RKM, RX, RL3; /* be: IV8, IV9 */
|
||||
vpsubq RKR, RX, RX;
|
||||
vpshufb RKM, RX, RR3; /* be: IV10, IV11 */
|
||||
vpsubq RKR, RX, RX;
|
||||
vpshufb RKM, RX, RL4; /* be: IV12, IV13 */
|
||||
vpsubq RKR, RX, RX;
|
||||
vpshufb RKM, RX, RR4; /* be: IV14, IV15 */
|
||||
|
||||
/* store last IV */
|
||||
vpsubq RTMP, RX, RX; /* le: IV16, IV14 */
|
||||
vpshufb R1ST, RX, RX; /* be: IV16, IV16 */
|
||||
vmovq RX, (%rcx);
|
||||
|
||||
call __cast5_enc_blk16;
|
||||
|
||||
/* dst = src ^ iv */
|
||||
vpxor (0*16)(%r12), RR1, RR1;
|
||||
vpxor (1*16)(%r12), RL1, RL1;
|
||||
vpxor (2*16)(%r12), RR2, RR2;
|
||||
vpxor (3*16)(%r12), RL2, RL2;
|
||||
vpxor (4*16)(%r12), RR3, RR3;
|
||||
vpxor (5*16)(%r12), RL3, RL3;
|
||||
vpxor (6*16)(%r12), RR4, RR4;
|
||||
vpxor (7*16)(%r12), RL4, RL4;
|
||||
vmovdqu RR1, (0*16)(%r11);
|
||||
vmovdqu RL1, (1*16)(%r11);
|
||||
vmovdqu RR2, (2*16)(%r11);
|
||||
vmovdqu RL2, (3*16)(%r11);
|
||||
vmovdqu RR3, (4*16)(%r11);
|
||||
vmovdqu RL3, (5*16)(%r11);
|
||||
vmovdqu RR4, (6*16)(%r11);
|
||||
vmovdqu RL4, (7*16)(%r11);
|
||||
|
||||
popq %r12;
|
||||
|
||||
ret;
|
||||
ENDPROC(cast5_ctr_16way)
|
||||
494
arch/x86/crypto/cast5_avx_glue.c
Normal file
494
arch/x86/crypto/cast5_avx_glue.c
Normal file
|
|
@ -0,0 +1,494 @@
|
|||
/*
|
||||
* Glue Code for the AVX assembler implemention of the Cast5 Cipher
|
||||
*
|
||||
* Copyright (C) 2012 Johannes Goetzfried
|
||||
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
|
||||
* USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/err.h>
|
||||
#include <crypto/ablk_helper.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/cast5.h>
|
||||
#include <crypto/cryptd.h>
|
||||
#include <crypto/ctr.h>
|
||||
#include <asm/xcr.h>
|
||||
#include <asm/xsave.h>
|
||||
#include <asm/crypto/glue_helper.h>
|
||||
|
||||
#define CAST5_PARALLEL_BLOCKS 16
|
||||
|
||||
asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
|
||||
__be64 *iv);
|
||||
|
||||
static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
|
||||
{
|
||||
return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
|
||||
NULL, fpu_enabled, nbytes);
|
||||
}
|
||||
|
||||
static inline void cast5_fpu_end(bool fpu_enabled)
|
||||
{
|
||||
return glue_fpu_end(fpu_enabled);
|
||||
}
|
||||
|
||||
static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
|
||||
bool enc)
|
||||
{
|
||||
bool fpu_enabled = false;
|
||||
struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
const unsigned int bsize = CAST5_BLOCK_SIZE;
|
||||
unsigned int nbytes;
|
||||
void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
|
||||
int err;
|
||||
|
||||
fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
|
||||
|
||||
err = blkcipher_walk_virt(desc, walk);
|
||||
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
|
||||
while ((nbytes = walk->nbytes)) {
|
||||
u8 *wsrc = walk->src.virt.addr;
|
||||
u8 *wdst = walk->dst.virt.addr;
|
||||
|
||||
fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
|
||||
|
||||
/* Process multi-block batch */
|
||||
if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
|
||||
do {
|
||||
fn(ctx, wdst, wsrc);
|
||||
|
||||
wsrc += bsize * CAST5_PARALLEL_BLOCKS;
|
||||
wdst += bsize * CAST5_PARALLEL_BLOCKS;
|
||||
nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
|
||||
} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
|
||||
|
||||
if (nbytes < bsize)
|
||||
goto done;
|
||||
}
|
||||
|
||||
fn = (enc) ? __cast5_encrypt : __cast5_decrypt;
|
||||
|
||||
/* Handle leftovers */
|
||||
do {
|
||||
fn(ctx, wdst, wsrc);
|
||||
|
||||
wsrc += bsize;
|
||||
wdst += bsize;
|
||||
nbytes -= bsize;
|
||||
} while (nbytes >= bsize);
|
||||
|
||||
done:
|
||||
err = blkcipher_walk_done(desc, walk, nbytes);
|
||||
}
|
||||
|
||||
cast5_fpu_end(fpu_enabled);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct blkcipher_walk walk;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
return ecb_crypt(desc, &walk, true);
|
||||
}
|
||||
|
||||
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct blkcipher_walk walk;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
return ecb_crypt(desc, &walk, false);
|
||||
}
|
||||
|
||||
static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
|
||||
struct blkcipher_walk *walk)
|
||||
{
|
||||
struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
const unsigned int bsize = CAST5_BLOCK_SIZE;
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
u64 *src = (u64 *)walk->src.virt.addr;
|
||||
u64 *dst = (u64 *)walk->dst.virt.addr;
|
||||
u64 *iv = (u64 *)walk->iv;
|
||||
|
||||
do {
|
||||
*dst = *src ^ *iv;
|
||||
__cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
|
||||
iv = dst;
|
||||
|
||||
src += 1;
|
||||
dst += 1;
|
||||
nbytes -= bsize;
|
||||
} while (nbytes >= bsize);
|
||||
|
||||
*(u64 *)walk->iv = *iv;
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
nbytes = __cbc_encrypt(desc, &walk);
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
|
||||
struct blkcipher_walk *walk)
|
||||
{
|
||||
struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
const unsigned int bsize = CAST5_BLOCK_SIZE;
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
u64 *src = (u64 *)walk->src.virt.addr;
|
||||
u64 *dst = (u64 *)walk->dst.virt.addr;
|
||||
u64 last_iv;
|
||||
|
||||
/* Start of the last block. */
|
||||
src += nbytes / bsize - 1;
|
||||
dst += nbytes / bsize - 1;
|
||||
|
||||
last_iv = *src;
|
||||
|
||||
/* Process multi-block batch */
|
||||
if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
|
||||
do {
|
||||
nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
|
||||
src -= CAST5_PARALLEL_BLOCKS - 1;
|
||||
dst -= CAST5_PARALLEL_BLOCKS - 1;
|
||||
|
||||
cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src);
|
||||
|
||||
nbytes -= bsize;
|
||||
if (nbytes < bsize)
|
||||
goto done;
|
||||
|
||||
*dst ^= *(src - 1);
|
||||
src -= 1;
|
||||
dst -= 1;
|
||||
} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
|
||||
}
|
||||
|
||||
/* Handle leftovers */
|
||||
for (;;) {
|
||||
__cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
|
||||
|
||||
nbytes -= bsize;
|
||||
if (nbytes < bsize)
|
||||
break;
|
||||
|
||||
*dst ^= *(src - 1);
|
||||
src -= 1;
|
||||
dst -= 1;
|
||||
}
|
||||
|
||||
done:
|
||||
*dst ^= *(u64 *)walk->iv;
|
||||
*(u64 *)walk->iv = last_iv;
|
||||
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
bool fpu_enabled = false;
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
|
||||
nbytes = __cbc_decrypt(desc, &walk);
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
|
||||
cast5_fpu_end(fpu_enabled);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void ctr_crypt_final(struct blkcipher_desc *desc,
|
||||
struct blkcipher_walk *walk)
|
||||
{
|
||||
struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
u8 *ctrblk = walk->iv;
|
||||
u8 keystream[CAST5_BLOCK_SIZE];
|
||||
u8 *src = walk->src.virt.addr;
|
||||
u8 *dst = walk->dst.virt.addr;
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
|
||||
__cast5_encrypt(ctx, keystream, ctrblk);
|
||||
crypto_xor(keystream, src, nbytes);
|
||||
memcpy(dst, keystream, nbytes);
|
||||
|
||||
crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
|
||||
struct blkcipher_walk *walk)
|
||||
{
|
||||
struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
const unsigned int bsize = CAST5_BLOCK_SIZE;
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
u64 *src = (u64 *)walk->src.virt.addr;
|
||||
u64 *dst = (u64 *)walk->dst.virt.addr;
|
||||
|
||||
/* Process multi-block batch */
|
||||
if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
|
||||
do {
|
||||
cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src,
|
||||
(__be64 *)walk->iv);
|
||||
|
||||
src += CAST5_PARALLEL_BLOCKS;
|
||||
dst += CAST5_PARALLEL_BLOCKS;
|
||||
nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
|
||||
} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
|
||||
|
||||
if (nbytes < bsize)
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* Handle leftovers */
|
||||
do {
|
||||
u64 ctrblk;
|
||||
|
||||
if (dst != src)
|
||||
*dst = *src;
|
||||
|
||||
ctrblk = *(u64 *)walk->iv;
|
||||
be64_add_cpu((__be64 *)walk->iv, 1);
|
||||
|
||||
__cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
|
||||
*dst ^= ctrblk;
|
||||
|
||||
src += 1;
|
||||
dst += 1;
|
||||
nbytes -= bsize;
|
||||
} while (nbytes >= bsize);
|
||||
|
||||
done:
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
bool fpu_enabled = false;
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE);
|
||||
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
|
||||
while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
|
||||
fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
|
||||
nbytes = __ctr_crypt(desc, &walk);
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
|
||||
cast5_fpu_end(fpu_enabled);
|
||||
|
||||
if (walk.nbytes) {
|
||||
ctr_crypt_final(desc, &walk);
|
||||
err = blkcipher_walk_done(desc, &walk, 0);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
static struct crypto_alg cast5_algs[6] = { {
|
||||
.cra_name = "__ecb-cast5-avx",
|
||||
.cra_driver_name = "__driver-ecb-cast5-avx",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = CAST5_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct cast5_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = CAST5_MIN_KEY_SIZE,
|
||||
.max_keysize = CAST5_MAX_KEY_SIZE,
|
||||
.setkey = cast5_setkey,
|
||||
.encrypt = ecb_encrypt,
|
||||
.decrypt = ecb_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "__cbc-cast5-avx",
|
||||
.cra_driver_name = "__driver-cbc-cast5-avx",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = CAST5_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct cast5_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = CAST5_MIN_KEY_SIZE,
|
||||
.max_keysize = CAST5_MAX_KEY_SIZE,
|
||||
.setkey = cast5_setkey,
|
||||
.encrypt = cbc_encrypt,
|
||||
.decrypt = cbc_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "__ctr-cast5-avx",
|
||||
.cra_driver_name = "__driver-ctr-cast5-avx",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct cast5_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = CAST5_MIN_KEY_SIZE,
|
||||
.max_keysize = CAST5_MAX_KEY_SIZE,
|
||||
.ivsize = CAST5_BLOCK_SIZE,
|
||||
.setkey = cast5_setkey,
|
||||
.encrypt = ctr_crypt,
|
||||
.decrypt = ctr_crypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "ecb(cast5)",
|
||||
.cra_driver_name = "ecb-cast5-avx",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = CAST5_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = CAST5_MIN_KEY_SIZE,
|
||||
.max_keysize = CAST5_MAX_KEY_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = ablk_encrypt,
|
||||
.decrypt = ablk_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "cbc(cast5)",
|
||||
.cra_driver_name = "cbc-cast5-avx",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = CAST5_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = CAST5_MIN_KEY_SIZE,
|
||||
.max_keysize = CAST5_MAX_KEY_SIZE,
|
||||
.ivsize = CAST5_BLOCK_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = __ablk_encrypt,
|
||||
.decrypt = ablk_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "ctr(cast5)",
|
||||
.cra_driver_name = "ctr-cast5-avx",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = CAST5_MIN_KEY_SIZE,
|
||||
.max_keysize = CAST5_MAX_KEY_SIZE,
|
||||
.ivsize = CAST5_BLOCK_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = ablk_encrypt,
|
||||
.decrypt = ablk_encrypt,
|
||||
.geniv = "chainiv",
|
||||
},
|
||||
},
|
||||
} };
|
||||
|
||||
static int __init cast5_init(void)
|
||||
{
|
||||
u64 xcr0;
|
||||
|
||||
if (!cpu_has_avx || !cpu_has_osxsave) {
|
||||
pr_info("AVX instructions are not detected.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
|
||||
if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
|
||||
pr_info("AVX detected but unusable.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
|
||||
}
|
||||
|
||||
static void __exit cast5_exit(void)
|
||||
{
|
||||
crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
|
||||
}
|
||||
|
||||
module_init(cast5_init);
|
||||
module_exit(cast5_exit);
|
||||
|
||||
MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_ALIAS_CRYPTO("cast5");
|
||||
472
arch/x86/crypto/cast6-avx-x86_64-asm_64.S
Normal file
472
arch/x86/crypto/cast6-avx-x86_64-asm_64.S
Normal file
|
|
@ -0,0 +1,472 @@
|
|||
/*
|
||||
* Cast6 Cipher 8-way parallel algorithm (AVX/x86_64)
|
||||
*
|
||||
* Copyright (C) 2012 Johannes Goetzfried
|
||||
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
|
||||
*
|
||||
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
|
||||
* USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include "glue_helper-asm-avx.S"
|
||||
|
||||
.file "cast6-avx-x86_64-asm_64.S"
|
||||
|
||||
.extern cast_s1
|
||||
.extern cast_s2
|
||||
.extern cast_s3
|
||||
.extern cast_s4
|
||||
|
||||
/* structure of crypto context */
|
||||
#define km 0
|
||||
#define kr (12*4*4)
|
||||
|
||||
/* s-boxes */
|
||||
#define s1 cast_s1
|
||||
#define s2 cast_s2
|
||||
#define s3 cast_s3
|
||||
#define s4 cast_s4
|
||||
|
||||
/**********************************************************************
|
||||
8-way AVX cast6
|
||||
**********************************************************************/
|
||||
#define CTX %rdi
|
||||
|
||||
#define RA1 %xmm0
|
||||
#define RB1 %xmm1
|
||||
#define RC1 %xmm2
|
||||
#define RD1 %xmm3
|
||||
|
||||
#define RA2 %xmm4
|
||||
#define RB2 %xmm5
|
||||
#define RC2 %xmm6
|
||||
#define RD2 %xmm7
|
||||
|
||||
#define RX %xmm8
|
||||
|
||||
#define RKM %xmm9
|
||||
#define RKR %xmm10
|
||||
#define RKRF %xmm11
|
||||
#define RKRR %xmm12
|
||||
#define R32 %xmm13
|
||||
#define R1ST %xmm14
|
||||
|
||||
#define RTMP %xmm15
|
||||
|
||||
#define RID1 %rbp
|
||||
#define RID1d %ebp
|
||||
#define RID2 %rsi
|
||||
#define RID2d %esi
|
||||
|
||||
#define RGI1 %rdx
|
||||
#define RGI1bl %dl
|
||||
#define RGI1bh %dh
|
||||
#define RGI2 %rcx
|
||||
#define RGI2bl %cl
|
||||
#define RGI2bh %ch
|
||||
|
||||
#define RGI3 %rax
|
||||
#define RGI3bl %al
|
||||
#define RGI3bh %ah
|
||||
#define RGI4 %rbx
|
||||
#define RGI4bl %bl
|
||||
#define RGI4bh %bh
|
||||
|
||||
#define RFS1 %r8
|
||||
#define RFS1d %r8d
|
||||
#define RFS2 %r9
|
||||
#define RFS2d %r9d
|
||||
#define RFS3 %r10
|
||||
#define RFS3d %r10d
|
||||
|
||||
|
||||
#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \
|
||||
movzbl src ## bh, RID1d; \
|
||||
movzbl src ## bl, RID2d; \
|
||||
shrq $16, src; \
|
||||
movl s1(, RID1, 4), dst ## d; \
|
||||
op1 s2(, RID2, 4), dst ## d; \
|
||||
movzbl src ## bh, RID1d; \
|
||||
movzbl src ## bl, RID2d; \
|
||||
interleave_op(il_reg); \
|
||||
op2 s3(, RID1, 4), dst ## d; \
|
||||
op3 s4(, RID2, 4), dst ## d;
|
||||
|
||||
#define dummy(d) /* do nothing */
|
||||
|
||||
#define shr_next(reg) \
|
||||
shrq $16, reg;
|
||||
|
||||
#define F_head(a, x, gi1, gi2, op0) \
|
||||
op0 a, RKM, x; \
|
||||
vpslld RKRF, x, RTMP; \
|
||||
vpsrld RKRR, x, x; \
|
||||
vpor RTMP, x, x; \
|
||||
\
|
||||
vmovq x, gi1; \
|
||||
vpextrq $1, x, gi2;
|
||||
|
||||
#define F_tail(a, x, gi1, gi2, op1, op2, op3) \
|
||||
lookup_32bit(##gi1, RFS1, op1, op2, op3, shr_next, ##gi1); \
|
||||
lookup_32bit(##gi2, RFS3, op1, op2, op3, shr_next, ##gi2); \
|
||||
\
|
||||
lookup_32bit(##gi1, RFS2, op1, op2, op3, dummy, none); \
|
||||
shlq $32, RFS2; \
|
||||
orq RFS1, RFS2; \
|
||||
lookup_32bit(##gi2, RFS1, op1, op2, op3, dummy, none); \
|
||||
shlq $32, RFS1; \
|
||||
orq RFS1, RFS3; \
|
||||
\
|
||||
vmovq RFS2, x; \
|
||||
vpinsrq $1, RFS3, x, x;
|
||||
|
||||
#define F_2(a1, b1, a2, b2, op0, op1, op2, op3) \
|
||||
F_head(b1, RX, RGI1, RGI2, op0); \
|
||||
F_head(b2, RX, RGI3, RGI4, op0); \
|
||||
\
|
||||
F_tail(b1, RX, RGI1, RGI2, op1, op2, op3); \
|
||||
F_tail(b2, RTMP, RGI3, RGI4, op1, op2, op3); \
|
||||
\
|
||||
vpxor a1, RX, a1; \
|
||||
vpxor a2, RTMP, a2;
|
||||
|
||||
#define F1_2(a1, b1, a2, b2) \
|
||||
F_2(a1, b1, a2, b2, vpaddd, xorl, subl, addl)
|
||||
#define F2_2(a1, b1, a2, b2) \
|
||||
F_2(a1, b1, a2, b2, vpxor, subl, addl, xorl)
|
||||
#define F3_2(a1, b1, a2, b2) \
|
||||
F_2(a1, b1, a2, b2, vpsubd, addl, xorl, subl)
|
||||
|
||||
#define qop(in, out, f) \
|
||||
F ## f ## _2(out ## 1, in ## 1, out ## 2, in ## 2);
|
||||
|
||||
#define get_round_keys(nn) \
|
||||
vbroadcastss (km+(4*(nn)))(CTX), RKM; \
|
||||
vpand R1ST, RKR, RKRF; \
|
||||
vpsubq RKRF, R32, RKRR; \
|
||||
vpsrldq $1, RKR, RKR;
|
||||
|
||||
#define Q(n) \
|
||||
get_round_keys(4*n+0); \
|
||||
qop(RD, RC, 1); \
|
||||
\
|
||||
get_round_keys(4*n+1); \
|
||||
qop(RC, RB, 2); \
|
||||
\
|
||||
get_round_keys(4*n+2); \
|
||||
qop(RB, RA, 3); \
|
||||
\
|
||||
get_round_keys(4*n+3); \
|
||||
qop(RA, RD, 1);
|
||||
|
||||
#define QBAR(n) \
|
||||
get_round_keys(4*n+3); \
|
||||
qop(RA, RD, 1); \
|
||||
\
|
||||
get_round_keys(4*n+2); \
|
||||
qop(RB, RA, 3); \
|
||||
\
|
||||
get_round_keys(4*n+1); \
|
||||
qop(RC, RB, 2); \
|
||||
\
|
||||
get_round_keys(4*n+0); \
|
||||
qop(RD, RC, 1);
|
||||
|
||||
#define shuffle(mask) \
|
||||
vpshufb mask, RKR, RKR;
|
||||
|
||||
#define preload_rkr(n, do_mask, mask) \
|
||||
vbroadcastss .L16_mask, RKR; \
|
||||
/* add 16-bit rotation to key rotations (mod 32) */ \
|
||||
vpxor (kr+n*16)(CTX), RKR, RKR; \
|
||||
do_mask(mask);
|
||||
|
||||
#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
|
||||
vpunpckldq x1, x0, t0; \
|
||||
vpunpckhdq x1, x0, t2; \
|
||||
vpunpckldq x3, x2, t1; \
|
||||
vpunpckhdq x3, x2, x3; \
|
||||
\
|
||||
vpunpcklqdq t1, t0, x0; \
|
||||
vpunpckhqdq t1, t0, x1; \
|
||||
vpunpcklqdq x3, t2, x2; \
|
||||
vpunpckhqdq x3, t2, x3;
|
||||
|
||||
#define inpack_blocks(x0, x1, x2, x3, t0, t1, t2, rmask) \
|
||||
vpshufb rmask, x0, x0; \
|
||||
vpshufb rmask, x1, x1; \
|
||||
vpshufb rmask, x2, x2; \
|
||||
vpshufb rmask, x3, x3; \
|
||||
\
|
||||
transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
|
||||
|
||||
#define outunpack_blocks(x0, x1, x2, x3, t0, t1, t2, rmask) \
|
||||
transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
|
||||
\
|
||||
vpshufb rmask, x0, x0; \
|
||||
vpshufb rmask, x1, x1; \
|
||||
vpshufb rmask, x2, x2; \
|
||||
vpshufb rmask, x3, x3;
|
||||
|
||||
.data
|
||||
|
||||
.align 16
|
||||
.Lxts_gf128mul_and_shl1_mask:
|
||||
.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
|
||||
.Lbswap_mask:
|
||||
.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
|
||||
.Lbswap128_mask:
|
||||
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
.Lrkr_enc_Q_Q_QBAR_QBAR:
|
||||
.byte 0, 1, 2, 3, 4, 5, 6, 7, 11, 10, 9, 8, 15, 14, 13, 12
|
||||
.Lrkr_enc_QBAR_QBAR_QBAR_QBAR:
|
||||
.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
|
||||
.Lrkr_dec_Q_Q_Q_Q:
|
||||
.byte 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3
|
||||
.Lrkr_dec_Q_Q_QBAR_QBAR:
|
||||
.byte 12, 13, 14, 15, 8, 9, 10, 11, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
.Lrkr_dec_QBAR_QBAR_QBAR_QBAR:
|
||||
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
.L16_mask:
|
||||
.byte 16, 16, 16, 16
|
||||
.L32_mask:
|
||||
.byte 32, 0, 0, 0
|
||||
.Lfirst_mask:
|
||||
.byte 0x1f, 0, 0, 0
|
||||
|
||||
.text
|
||||
|
||||
.align 8
|
||||
__cast6_enc_blk8:
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
|
||||
* output:
|
||||
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
|
||||
*/
|
||||
|
||||
pushq %rbp;
|
||||
pushq %rbx;
|
||||
|
||||
vmovdqa .Lbswap_mask, RKM;
|
||||
vmovd .Lfirst_mask, R1ST;
|
||||
vmovd .L32_mask, R32;
|
||||
|
||||
inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
|
||||
inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
|
||||
|
||||
preload_rkr(0, dummy, none);
|
||||
Q(0);
|
||||
Q(1);
|
||||
Q(2);
|
||||
Q(3);
|
||||
preload_rkr(1, shuffle, .Lrkr_enc_Q_Q_QBAR_QBAR);
|
||||
Q(4);
|
||||
Q(5);
|
||||
QBAR(6);
|
||||
QBAR(7);
|
||||
preload_rkr(2, shuffle, .Lrkr_enc_QBAR_QBAR_QBAR_QBAR);
|
||||
QBAR(8);
|
||||
QBAR(9);
|
||||
QBAR(10);
|
||||
QBAR(11);
|
||||
|
||||
popq %rbx;
|
||||
popq %rbp;
|
||||
|
||||
vmovdqa .Lbswap_mask, RKM;
|
||||
|
||||
outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
|
||||
outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
|
||||
|
||||
ret;
|
||||
ENDPROC(__cast6_enc_blk8)
|
||||
|
||||
.align 8
|
||||
__cast6_dec_blk8:
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
|
||||
* output:
|
||||
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks
|
||||
*/
|
||||
|
||||
pushq %rbp;
|
||||
pushq %rbx;
|
||||
|
||||
vmovdqa .Lbswap_mask, RKM;
|
||||
vmovd .Lfirst_mask, R1ST;
|
||||
vmovd .L32_mask, R32;
|
||||
|
||||
inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
|
||||
inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
|
||||
|
||||
preload_rkr(2, shuffle, .Lrkr_dec_Q_Q_Q_Q);
|
||||
Q(11);
|
||||
Q(10);
|
||||
Q(9);
|
||||
Q(8);
|
||||
preload_rkr(1, shuffle, .Lrkr_dec_Q_Q_QBAR_QBAR);
|
||||
Q(7);
|
||||
Q(6);
|
||||
QBAR(5);
|
||||
QBAR(4);
|
||||
preload_rkr(0, shuffle, .Lrkr_dec_QBAR_QBAR_QBAR_QBAR);
|
||||
QBAR(3);
|
||||
QBAR(2);
|
||||
QBAR(1);
|
||||
QBAR(0);
|
||||
|
||||
popq %rbx;
|
||||
popq %rbp;
|
||||
|
||||
vmovdqa .Lbswap_mask, RKM;
|
||||
outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
|
||||
outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
|
||||
|
||||
ret;
|
||||
ENDPROC(__cast6_dec_blk8)
|
||||
|
||||
ENTRY(cast6_ecb_enc_8way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
* %rdx: src
|
||||
*/
|
||||
|
||||
movq %rsi, %r11;
|
||||
|
||||
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
|
||||
|
||||
call __cast6_enc_blk8;
|
||||
|
||||
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
|
||||
|
||||
ret;
|
||||
ENDPROC(cast6_ecb_enc_8way)
|
||||
|
||||
ENTRY(cast6_ecb_dec_8way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
* %rdx: src
|
||||
*/
|
||||
|
||||
movq %rsi, %r11;
|
||||
|
||||
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
|
||||
|
||||
call __cast6_dec_blk8;
|
||||
|
||||
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
|
||||
|
||||
ret;
|
||||
ENDPROC(cast6_ecb_dec_8way)
|
||||
|
||||
ENTRY(cast6_cbc_dec_8way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
* %rdx: src
|
||||
*/
|
||||
|
||||
pushq %r12;
|
||||
|
||||
movq %rsi, %r11;
|
||||
movq %rdx, %r12;
|
||||
|
||||
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
|
||||
|
||||
call __cast6_dec_blk8;
|
||||
|
||||
store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
|
||||
|
||||
popq %r12;
|
||||
|
||||
ret;
|
||||
ENDPROC(cast6_cbc_dec_8way)
|
||||
|
||||
ENTRY(cast6_ctr_8way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
* %rdx: src
|
||||
* %rcx: iv (little endian, 128bit)
|
||||
*/
|
||||
|
||||
pushq %r12;
|
||||
|
||||
movq %rsi, %r11;
|
||||
movq %rdx, %r12;
|
||||
|
||||
load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
|
||||
RD2, RX, RKR, RKM);
|
||||
|
||||
call __cast6_enc_blk8;
|
||||
|
||||
store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
|
||||
|
||||
popq %r12;
|
||||
|
||||
ret;
|
||||
ENDPROC(cast6_ctr_8way)
|
||||
|
||||
ENTRY(cast6_xts_enc_8way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
* %rdx: src
|
||||
* %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
|
||||
*/
|
||||
|
||||
movq %rsi, %r11;
|
||||
|
||||
/* regs <= src, dst <= IVs, regs <= regs xor IVs */
|
||||
load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
|
||||
RX, RKR, RKM, .Lxts_gf128mul_and_shl1_mask);
|
||||
|
||||
call __cast6_enc_blk8;
|
||||
|
||||
/* dst <= regs xor IVs(in dst) */
|
||||
store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
|
||||
|
||||
ret;
|
||||
ENDPROC(cast6_xts_enc_8way)
|
||||
|
||||
ENTRY(cast6_xts_dec_8way)
|
||||
/* input:
|
||||
* %rdi: ctx, CTX
|
||||
* %rsi: dst
|
||||
* %rdx: src
|
||||
* %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
|
||||
*/
|
||||
|
||||
movq %rsi, %r11;
|
||||
|
||||
/* regs <= src, dst <= IVs, regs <= regs xor IVs */
|
||||
load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
|
||||
RX, RKR, RKM, .Lxts_gf128mul_and_shl1_mask);
|
||||
|
||||
call __cast6_dec_blk8;
|
||||
|
||||
/* dst <= regs xor IVs(in dst) */
|
||||
store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
|
||||
|
||||
ret;
|
||||
ENDPROC(cast6_xts_dec_8way)
|
||||
614
arch/x86/crypto/cast6_avx_glue.c
Normal file
614
arch/x86/crypto/cast6_avx_glue.c
Normal file
|
|
@ -0,0 +1,614 @@
|
|||
/*
|
||||
* Glue Code for the AVX assembler implemention of the Cast6 Cipher
|
||||
*
|
||||
* Copyright (C) 2012 Johannes Goetzfried
|
||||
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
|
||||
*
|
||||
* Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
|
||||
* USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/err.h>
|
||||
#include <crypto/ablk_helper.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/cast6.h>
|
||||
#include <crypto/cryptd.h>
|
||||
#include <crypto/b128ops.h>
|
||||
#include <crypto/ctr.h>
|
||||
#include <crypto/lrw.h>
|
||||
#include <crypto/xts.h>
|
||||
#include <asm/xcr.h>
|
||||
#include <asm/xsave.h>
|
||||
#include <asm/crypto/glue_helper.h>
|
||||
|
||||
#define CAST6_PARALLEL_BLOCKS 8
|
||||
|
||||
asmlinkage void cast6_ecb_enc_8way(struct cast6_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
asmlinkage void cast6_ecb_dec_8way(struct cast6_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
|
||||
asmlinkage void cast6_cbc_dec_8way(struct cast6_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
asmlinkage void cast6_ctr_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src,
|
||||
le128 *iv);
|
||||
|
||||
asmlinkage void cast6_xts_enc_8way(struct cast6_ctx *ctx, u8 *dst,
|
||||
const u8 *src, le128 *iv);
|
||||
asmlinkage void cast6_xts_dec_8way(struct cast6_ctx *ctx, u8 *dst,
|
||||
const u8 *src, le128 *iv);
|
||||
|
||||
static void cast6_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
|
||||
{
|
||||
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
|
||||
GLUE_FUNC_CAST(__cast6_encrypt));
|
||||
}
|
||||
|
||||
static void cast6_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
|
||||
{
|
||||
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
|
||||
GLUE_FUNC_CAST(__cast6_decrypt));
|
||||
}
|
||||
|
||||
static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
|
||||
{
|
||||
be128 ctrblk;
|
||||
|
||||
le128_to_be128(&ctrblk, iv);
|
||||
le128_inc(iv);
|
||||
|
||||
__cast6_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
|
||||
u128_xor(dst, src, (u128 *)&ctrblk);
|
||||
}
|
||||
|
||||
static const struct common_glue_ctx cast6_enc = {
|
||||
.num_funcs = 2,
|
||||
.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
|
||||
|
||||
.funcs = { {
|
||||
.num_blocks = CAST6_PARALLEL_BLOCKS,
|
||||
.fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_enc_8way) }
|
||||
}, {
|
||||
.num_blocks = 1,
|
||||
.fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_encrypt) }
|
||||
} }
|
||||
};
|
||||
|
||||
static const struct common_glue_ctx cast6_ctr = {
|
||||
.num_funcs = 2,
|
||||
.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
|
||||
|
||||
.funcs = { {
|
||||
.num_blocks = CAST6_PARALLEL_BLOCKS,
|
||||
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_ctr_8way) }
|
||||
}, {
|
||||
.num_blocks = 1,
|
||||
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr) }
|
||||
} }
|
||||
};
|
||||
|
||||
static const struct common_glue_ctx cast6_enc_xts = {
|
||||
.num_funcs = 2,
|
||||
.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
|
||||
|
||||
.funcs = { {
|
||||
.num_blocks = CAST6_PARALLEL_BLOCKS,
|
||||
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc_8way) }
|
||||
}, {
|
||||
.num_blocks = 1,
|
||||
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc) }
|
||||
} }
|
||||
};
|
||||
|
||||
static const struct common_glue_ctx cast6_dec = {
|
||||
.num_funcs = 2,
|
||||
.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
|
||||
|
||||
.funcs = { {
|
||||
.num_blocks = CAST6_PARALLEL_BLOCKS,
|
||||
.fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_dec_8way) }
|
||||
}, {
|
||||
.num_blocks = 1,
|
||||
.fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_decrypt) }
|
||||
} }
|
||||
};
|
||||
|
||||
static const struct common_glue_ctx cast6_dec_cbc = {
|
||||
.num_funcs = 2,
|
||||
.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
|
||||
|
||||
.funcs = { {
|
||||
.num_blocks = CAST6_PARALLEL_BLOCKS,
|
||||
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(cast6_cbc_dec_8way) }
|
||||
}, {
|
||||
.num_blocks = 1,
|
||||
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__cast6_decrypt) }
|
||||
} }
|
||||
};
|
||||
|
||||
static const struct common_glue_ctx cast6_dec_xts = {
|
||||
.num_funcs = 2,
|
||||
.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
|
||||
|
||||
.funcs = { {
|
||||
.num_blocks = CAST6_PARALLEL_BLOCKS,
|
||||
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec_8way) }
|
||||
}, {
|
||||
.num_blocks = 1,
|
||||
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec) }
|
||||
} }
|
||||
};
|
||||
|
||||
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
return glue_ecb_crypt_128bit(&cast6_enc, desc, dst, src, nbytes);
|
||||
}
|
||||
|
||||
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
return glue_ecb_crypt_128bit(&cast6_dec, desc, dst, src, nbytes);
|
||||
}
|
||||
|
||||
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__cast6_encrypt), desc,
|
||||
dst, src, nbytes);
|
||||
}
|
||||
|
||||
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
return glue_cbc_decrypt_128bit(&cast6_dec_cbc, desc, dst, src,
|
||||
nbytes);
|
||||
}
|
||||
|
||||
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
return glue_ctr_crypt_128bit(&cast6_ctr, desc, dst, src, nbytes);
|
||||
}
|
||||
|
||||
static inline bool cast6_fpu_begin(bool fpu_enabled, unsigned int nbytes)
|
||||
{
|
||||
return glue_fpu_begin(CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS,
|
||||
NULL, fpu_enabled, nbytes);
|
||||
}
|
||||
|
||||
static inline void cast6_fpu_end(bool fpu_enabled)
|
||||
{
|
||||
glue_fpu_end(fpu_enabled);
|
||||
}
|
||||
|
||||
struct crypt_priv {
|
||||
struct cast6_ctx *ctx;
|
||||
bool fpu_enabled;
|
||||
};
|
||||
|
||||
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
|
||||
{
|
||||
const unsigned int bsize = CAST6_BLOCK_SIZE;
|
||||
struct crypt_priv *ctx = priv;
|
||||
int i;
|
||||
|
||||
ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
|
||||
|
||||
if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
|
||||
cast6_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
|
||||
__cast6_encrypt(ctx->ctx, srcdst, srcdst);
|
||||
}
|
||||
|
||||
static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
|
||||
{
|
||||
const unsigned int bsize = CAST6_BLOCK_SIZE;
|
||||
struct crypt_priv *ctx = priv;
|
||||
int i;
|
||||
|
||||
ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
|
||||
|
||||
if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
|
||||
cast6_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
|
||||
__cast6_decrypt(ctx->ctx, srcdst, srcdst);
|
||||
}
|
||||
|
||||
struct cast6_lrw_ctx {
|
||||
struct lrw_table_ctx lrw_table;
|
||||
struct cast6_ctx cast6_ctx;
|
||||
};
|
||||
|
||||
static int lrw_cast6_setkey(struct crypto_tfm *tfm, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
int err;
|
||||
|
||||
err = __cast6_setkey(&ctx->cast6_ctx, key, keylen - CAST6_BLOCK_SIZE,
|
||||
&tfm->crt_flags);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return lrw_init_table(&ctx->lrw_table, key + keylen - CAST6_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
be128 buf[CAST6_PARALLEL_BLOCKS];
|
||||
struct crypt_priv crypt_ctx = {
|
||||
.ctx = &ctx->cast6_ctx,
|
||||
.fpu_enabled = false,
|
||||
};
|
||||
struct lrw_crypt_req req = {
|
||||
.tbuf = buf,
|
||||
.tbuflen = sizeof(buf),
|
||||
|
||||
.table_ctx = &ctx->lrw_table,
|
||||
.crypt_ctx = &crypt_ctx,
|
||||
.crypt_fn = encrypt_callback,
|
||||
};
|
||||
int ret;
|
||||
|
||||
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
ret = lrw_crypt(desc, dst, src, nbytes, &req);
|
||||
cast6_fpu_end(crypt_ctx.fpu_enabled);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
be128 buf[CAST6_PARALLEL_BLOCKS];
|
||||
struct crypt_priv crypt_ctx = {
|
||||
.ctx = &ctx->cast6_ctx,
|
||||
.fpu_enabled = false,
|
||||
};
|
||||
struct lrw_crypt_req req = {
|
||||
.tbuf = buf,
|
||||
.tbuflen = sizeof(buf),
|
||||
|
||||
.table_ctx = &ctx->lrw_table,
|
||||
.crypt_ctx = &crypt_ctx,
|
||||
.crypt_fn = decrypt_callback,
|
||||
};
|
||||
int ret;
|
||||
|
||||
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
ret = lrw_crypt(desc, dst, src, nbytes, &req);
|
||||
cast6_fpu_end(crypt_ctx.fpu_enabled);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void lrw_exit_tfm(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
lrw_free_table(&ctx->lrw_table);
|
||||
}
|
||||
|
||||
struct cast6_xts_ctx {
|
||||
struct cast6_ctx tweak_ctx;
|
||||
struct cast6_ctx crypt_ctx;
|
||||
};
|
||||
|
||||
static int xts_cast6_setkey(struct crypto_tfm *tfm, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct cast6_xts_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
u32 *flags = &tfm->crt_flags;
|
||||
int err;
|
||||
|
||||
/* key consists of keys of equal size concatenated, therefore
|
||||
* the length must be even
|
||||
*/
|
||||
if (keylen % 2) {
|
||||
*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* first half of xts-key is for crypt */
|
||||
err = __cast6_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* second half of xts-key is for tweak */
|
||||
return __cast6_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
|
||||
flags);
|
||||
}
|
||||
|
||||
static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
|
||||
return glue_xts_crypt_128bit(&cast6_enc_xts, desc, dst, src, nbytes,
|
||||
XTS_TWEAK_CAST(__cast6_encrypt),
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx);
|
||||
}
|
||||
|
||||
static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
|
||||
return glue_xts_crypt_128bit(&cast6_dec_xts, desc, dst, src, nbytes,
|
||||
XTS_TWEAK_CAST(__cast6_encrypt),
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx);
|
||||
}
|
||||
|
||||
static struct crypto_alg cast6_algs[10] = { {
|
||||
.cra_name = "__ecb-cast6-avx",
|
||||
.cra_driver_name = "__driver-ecb-cast6-avx",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = CAST6_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct cast6_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = CAST6_MIN_KEY_SIZE,
|
||||
.max_keysize = CAST6_MAX_KEY_SIZE,
|
||||
.setkey = cast6_setkey,
|
||||
.encrypt = ecb_encrypt,
|
||||
.decrypt = ecb_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "__cbc-cast6-avx",
|
||||
.cra_driver_name = "__driver-cbc-cast6-avx",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = CAST6_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct cast6_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = CAST6_MIN_KEY_SIZE,
|
||||
.max_keysize = CAST6_MAX_KEY_SIZE,
|
||||
.setkey = cast6_setkey,
|
||||
.encrypt = cbc_encrypt,
|
||||
.decrypt = cbc_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "__ctr-cast6-avx",
|
||||
.cra_driver_name = "__driver-ctr-cast6-avx",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct cast6_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = CAST6_MIN_KEY_SIZE,
|
||||
.max_keysize = CAST6_MAX_KEY_SIZE,
|
||||
.ivsize = CAST6_BLOCK_SIZE,
|
||||
.setkey = cast6_setkey,
|
||||
.encrypt = ctr_crypt,
|
||||
.decrypt = ctr_crypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "__lrw-cast6-avx",
|
||||
.cra_driver_name = "__driver-lrw-cast6-avx",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = CAST6_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct cast6_lrw_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_exit = lrw_exit_tfm,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = CAST6_MIN_KEY_SIZE +
|
||||
CAST6_BLOCK_SIZE,
|
||||
.max_keysize = CAST6_MAX_KEY_SIZE +
|
||||
CAST6_BLOCK_SIZE,
|
||||
.ivsize = CAST6_BLOCK_SIZE,
|
||||
.setkey = lrw_cast6_setkey,
|
||||
.encrypt = lrw_encrypt,
|
||||
.decrypt = lrw_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "__xts-cast6-avx",
|
||||
.cra_driver_name = "__driver-xts-cast6-avx",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = CAST6_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct cast6_xts_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = CAST6_MIN_KEY_SIZE * 2,
|
||||
.max_keysize = CAST6_MAX_KEY_SIZE * 2,
|
||||
.ivsize = CAST6_BLOCK_SIZE,
|
||||
.setkey = xts_cast6_setkey,
|
||||
.encrypt = xts_encrypt,
|
||||
.decrypt = xts_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "ecb(cast6)",
|
||||
.cra_driver_name = "ecb-cast6-avx",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = CAST6_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = CAST6_MIN_KEY_SIZE,
|
||||
.max_keysize = CAST6_MAX_KEY_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = ablk_encrypt,
|
||||
.decrypt = ablk_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "cbc(cast6)",
|
||||
.cra_driver_name = "cbc-cast6-avx",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = CAST6_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = CAST6_MIN_KEY_SIZE,
|
||||
.max_keysize = CAST6_MAX_KEY_SIZE,
|
||||
.ivsize = CAST6_BLOCK_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = __ablk_encrypt,
|
||||
.decrypt = ablk_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "ctr(cast6)",
|
||||
.cra_driver_name = "ctr-cast6-avx",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = CAST6_MIN_KEY_SIZE,
|
||||
.max_keysize = CAST6_MAX_KEY_SIZE,
|
||||
.ivsize = CAST6_BLOCK_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = ablk_encrypt,
|
||||
.decrypt = ablk_encrypt,
|
||||
.geniv = "chainiv",
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "lrw(cast6)",
|
||||
.cra_driver_name = "lrw-cast6-avx",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = CAST6_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = CAST6_MIN_KEY_SIZE +
|
||||
CAST6_BLOCK_SIZE,
|
||||
.max_keysize = CAST6_MAX_KEY_SIZE +
|
||||
CAST6_BLOCK_SIZE,
|
||||
.ivsize = CAST6_BLOCK_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = ablk_encrypt,
|
||||
.decrypt = ablk_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "xts(cast6)",
|
||||
.cra_driver_name = "xts-cast6-avx",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = CAST6_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = CAST6_MIN_KEY_SIZE * 2,
|
||||
.max_keysize = CAST6_MAX_KEY_SIZE * 2,
|
||||
.ivsize = CAST6_BLOCK_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = ablk_encrypt,
|
||||
.decrypt = ablk_decrypt,
|
||||
},
|
||||
},
|
||||
} };
|
||||
|
||||
static int __init cast6_init(void)
|
||||
{
|
||||
u64 xcr0;
|
||||
|
||||
if (!cpu_has_avx || !cpu_has_osxsave) {
|
||||
pr_info("AVX instructions are not detected.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
|
||||
if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
|
||||
pr_info("AVX detected but unusable.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
return crypto_register_algs(cast6_algs, ARRAY_SIZE(cast6_algs));
|
||||
}
|
||||
|
||||
static void __exit cast6_exit(void)
|
||||
{
|
||||
crypto_unregister_algs(cast6_algs, ARRAY_SIZE(cast6_algs));
|
||||
}
|
||||
|
||||
module_init(cast6_init);
|
||||
module_exit(cast6_exit);
|
||||
|
||||
MODULE_DESCRIPTION("Cast6 Cipher Algorithm, AVX optimized");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_ALIAS_CRYPTO("cast6");
|
||||
246
arch/x86/crypto/crc32-pclmul_asm.S
Normal file
246
arch/x86/crypto/crc32-pclmul_asm.S
Normal file
|
|
@ -0,0 +1,246 @@
|
|||
/* GPL HEADER START
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 only,
|
||||
* as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License version 2 for more details (a copy is included
|
||||
* in the LICENSE file that accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* version 2 along with this program; If not, see http://www.gnu.org/licenses
|
||||
*
|
||||
* Please visit http://www.xyratex.com/contact if you need additional
|
||||
* information or have any questions.
|
||||
*
|
||||
* GPL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2012 Xyratex Technology Limited
|
||||
*
|
||||
* Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
|
||||
* calculation.
|
||||
* CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
|
||||
* PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
|
||||
* at:
|
||||
* http://www.intel.com/products/processor/manuals/
|
||||
* Intel(R) 64 and IA-32 Architectures Software Developer's Manual
|
||||
* Volume 2B: Instruction Set Reference, N-Z
|
||||
*
|
||||
* Authors: Gregory Prestas <Gregory_Prestas@us.xyratex.com>
|
||||
* Alexander Boyko <Alexander_Boyko@xyratex.com>
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/inst.h>
|
||||
|
||||
|
||||
.align 16
|
||||
/*
|
||||
* [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4
|
||||
* #define CONSTANT_R1 0x154442bd4LL
|
||||
*
|
||||
* [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596
|
||||
* #define CONSTANT_R2 0x1c6e41596LL
|
||||
*/
|
||||
.Lconstant_R2R1:
|
||||
.octa 0x00000001c6e415960000000154442bd4
|
||||
/*
|
||||
* [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0
|
||||
* #define CONSTANT_R3 0x1751997d0LL
|
||||
*
|
||||
* [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e
|
||||
* #define CONSTANT_R4 0x0ccaa009eLL
|
||||
*/
|
||||
.Lconstant_R4R3:
|
||||
.octa 0x00000000ccaa009e00000001751997d0
|
||||
/*
|
||||
* [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124
|
||||
* #define CONSTANT_R5 0x163cd6124LL
|
||||
*/
|
||||
.Lconstant_R5:
|
||||
.octa 0x00000000000000000000000163cd6124
|
||||
.Lconstant_mask32:
|
||||
.octa 0x000000000000000000000000FFFFFFFF
|
||||
/*
|
||||
* #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
|
||||
*
|
||||
* Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL
|
||||
* #define CONSTANT_RU 0x1F7011641LL
|
||||
*/
|
||||
.Lconstant_RUpoly:
|
||||
.octa 0x00000001F701164100000001DB710641
|
||||
|
||||
#define CONSTANT %xmm0
|
||||
|
||||
#ifdef __x86_64__
|
||||
#define BUF %rdi
|
||||
#define LEN %rsi
|
||||
#define CRC %edx
|
||||
#else
|
||||
#define BUF %eax
|
||||
#define LEN %edx
|
||||
#define CRC %ecx
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
.text
|
||||
/**
|
||||
* Calculate crc32
|
||||
* BUF - buffer (16 bytes aligned)
|
||||
* LEN - sizeof buffer (16 bytes aligned), LEN should be grater than 63
|
||||
* CRC - initial crc32
|
||||
* return %eax crc32
|
||||
* uint crc32_pclmul_le_16(unsigned char const *buffer,
|
||||
* size_t len, uint crc32)
|
||||
*/
|
||||
|
||||
ENTRY(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */
|
||||
movdqa (BUF), %xmm1
|
||||
movdqa 0x10(BUF), %xmm2
|
||||
movdqa 0x20(BUF), %xmm3
|
||||
movdqa 0x30(BUF), %xmm4
|
||||
movd CRC, CONSTANT
|
||||
pxor CONSTANT, %xmm1
|
||||
sub $0x40, LEN
|
||||
add $0x40, BUF
|
||||
#ifndef __x86_64__
|
||||
/* This is for position independent code(-fPIC) support for 32bit */
|
||||
call delta
|
||||
delta:
|
||||
pop %ecx
|
||||
#endif
|
||||
cmp $0x40, LEN
|
||||
jb less_64
|
||||
|
||||
#ifdef __x86_64__
|
||||
movdqa .Lconstant_R2R1(%rip), CONSTANT
|
||||
#else
|
||||
movdqa .Lconstant_R2R1 - delta(%ecx), CONSTANT
|
||||
#endif
|
||||
|
||||
loop_64:/* 64 bytes Full cache line folding */
|
||||
prefetchnta 0x40(BUF)
|
||||
movdqa %xmm1, %xmm5
|
||||
movdqa %xmm2, %xmm6
|
||||
movdqa %xmm3, %xmm7
|
||||
#ifdef __x86_64__
|
||||
movdqa %xmm4, %xmm8
|
||||
#endif
|
||||
PCLMULQDQ 00, CONSTANT, %xmm1
|
||||
PCLMULQDQ 00, CONSTANT, %xmm2
|
||||
PCLMULQDQ 00, CONSTANT, %xmm3
|
||||
#ifdef __x86_64__
|
||||
PCLMULQDQ 00, CONSTANT, %xmm4
|
||||
#endif
|
||||
PCLMULQDQ 0x11, CONSTANT, %xmm5
|
||||
PCLMULQDQ 0x11, CONSTANT, %xmm6
|
||||
PCLMULQDQ 0x11, CONSTANT, %xmm7
|
||||
#ifdef __x86_64__
|
||||
PCLMULQDQ 0x11, CONSTANT, %xmm8
|
||||
#endif
|
||||
pxor %xmm5, %xmm1
|
||||
pxor %xmm6, %xmm2
|
||||
pxor %xmm7, %xmm3
|
||||
#ifdef __x86_64__
|
||||
pxor %xmm8, %xmm4
|
||||
#else
|
||||
/* xmm8 unsupported for x32 */
|
||||
movdqa %xmm4, %xmm5
|
||||
PCLMULQDQ 00, CONSTANT, %xmm4
|
||||
PCLMULQDQ 0x11, CONSTANT, %xmm5
|
||||
pxor %xmm5, %xmm4
|
||||
#endif
|
||||
|
||||
pxor (BUF), %xmm1
|
||||
pxor 0x10(BUF), %xmm2
|
||||
pxor 0x20(BUF), %xmm3
|
||||
pxor 0x30(BUF), %xmm4
|
||||
|
||||
sub $0x40, LEN
|
||||
add $0x40, BUF
|
||||
cmp $0x40, LEN
|
||||
jge loop_64
|
||||
less_64:/* Folding cache line into 128bit */
|
||||
#ifdef __x86_64__
|
||||
movdqa .Lconstant_R4R3(%rip), CONSTANT
|
||||
#else
|
||||
movdqa .Lconstant_R4R3 - delta(%ecx), CONSTANT
|
||||
#endif
|
||||
prefetchnta (BUF)
|
||||
|
||||
movdqa %xmm1, %xmm5
|
||||
PCLMULQDQ 0x00, CONSTANT, %xmm1
|
||||
PCLMULQDQ 0x11, CONSTANT, %xmm5
|
||||
pxor %xmm5, %xmm1
|
||||
pxor %xmm2, %xmm1
|
||||
|
||||
movdqa %xmm1, %xmm5
|
||||
PCLMULQDQ 0x00, CONSTANT, %xmm1
|
||||
PCLMULQDQ 0x11, CONSTANT, %xmm5
|
||||
pxor %xmm5, %xmm1
|
||||
pxor %xmm3, %xmm1
|
||||
|
||||
movdqa %xmm1, %xmm5
|
||||
PCLMULQDQ 0x00, CONSTANT, %xmm1
|
||||
PCLMULQDQ 0x11, CONSTANT, %xmm5
|
||||
pxor %xmm5, %xmm1
|
||||
pxor %xmm4, %xmm1
|
||||
|
||||
cmp $0x10, LEN
|
||||
jb fold_64
|
||||
loop_16:/* Folding rest buffer into 128bit */
|
||||
movdqa %xmm1, %xmm5
|
||||
PCLMULQDQ 0x00, CONSTANT, %xmm1
|
||||
PCLMULQDQ 0x11, CONSTANT, %xmm5
|
||||
pxor %xmm5, %xmm1
|
||||
pxor (BUF), %xmm1
|
||||
sub $0x10, LEN
|
||||
add $0x10, BUF
|
||||
cmp $0x10, LEN
|
||||
jge loop_16
|
||||
|
||||
fold_64:
|
||||
/* perform the last 64 bit fold, also adds 32 zeroes
|
||||
* to the input stream */
|
||||
PCLMULQDQ 0x01, %xmm1, CONSTANT /* R4 * xmm1.low */
|
||||
psrldq $0x08, %xmm1
|
||||
pxor CONSTANT, %xmm1
|
||||
|
||||
/* final 32-bit fold */
|
||||
movdqa %xmm1, %xmm2
|
||||
#ifdef __x86_64__
|
||||
movdqa .Lconstant_R5(%rip), CONSTANT
|
||||
movdqa .Lconstant_mask32(%rip), %xmm3
|
||||
#else
|
||||
movdqa .Lconstant_R5 - delta(%ecx), CONSTANT
|
||||
movdqa .Lconstant_mask32 - delta(%ecx), %xmm3
|
||||
#endif
|
||||
psrldq $0x04, %xmm2
|
||||
pand %xmm3, %xmm1
|
||||
PCLMULQDQ 0x00, CONSTANT, %xmm1
|
||||
pxor %xmm2, %xmm1
|
||||
|
||||
/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
|
||||
#ifdef __x86_64__
|
||||
movdqa .Lconstant_RUpoly(%rip), CONSTANT
|
||||
#else
|
||||
movdqa .Lconstant_RUpoly - delta(%ecx), CONSTANT
|
||||
#endif
|
||||
movdqa %xmm1, %xmm2
|
||||
pand %xmm3, %xmm1
|
||||
PCLMULQDQ 0x10, CONSTANT, %xmm1
|
||||
pand %xmm3, %xmm1
|
||||
PCLMULQDQ 0x00, CONSTANT, %xmm1
|
||||
pxor %xmm2, %xmm1
|
||||
PEXTRD 0x01, %xmm1, %eax
|
||||
|
||||
ret
|
||||
ENDPROC(crc32_pclmul_le_16)
|
||||
201
arch/x86/crypto/crc32-pclmul_glue.c
Normal file
201
arch/x86/crypto/crc32-pclmul_glue.c
Normal file
|
|
@ -0,0 +1,201 @@
|
|||
/* GPL HEADER START
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 only,
|
||||
* as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License version 2 for more details (a copy is included
|
||||
* in the LICENSE file that accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* version 2 along with this program; If not, see http://www.gnu.org/licenses
|
||||
*
|
||||
* Please visit http://www.xyratex.com/contact if you need additional
|
||||
* information or have any questions.
|
||||
*
|
||||
* GPL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2012 Xyratex Technology Limited
|
||||
*
|
||||
* Wrappers for kernel crypto shash api to pclmulqdq crc32 imlementation.
|
||||
*/
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/crc32.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include <asm/i387.h>
|
||||
|
||||
#define CHKSUM_BLOCK_SIZE 1
|
||||
#define CHKSUM_DIGEST_SIZE 4
|
||||
|
||||
#define PCLMUL_MIN_LEN 64L /* minimum size of buffer
|
||||
* for crc32_pclmul_le_16 */
|
||||
#define SCALE_F 16L /* size of xmm register */
|
||||
#define SCALE_F_MASK (SCALE_F - 1)
|
||||
|
||||
u32 crc32_pclmul_le_16(unsigned char const *buffer, size_t len, u32 crc32);
|
||||
|
||||
static u32 __attribute__((pure))
|
||||
crc32_pclmul_le(u32 crc, unsigned char const *p, size_t len)
|
||||
{
|
||||
unsigned int iquotient;
|
||||
unsigned int iremainder;
|
||||
unsigned int prealign;
|
||||
|
||||
if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !irq_fpu_usable())
|
||||
return crc32_le(crc, p, len);
|
||||
|
||||
if ((long)p & SCALE_F_MASK) {
|
||||
/* align p to 16 byte */
|
||||
prealign = SCALE_F - ((long)p & SCALE_F_MASK);
|
||||
|
||||
crc = crc32_le(crc, p, prealign);
|
||||
len -= prealign;
|
||||
p = (unsigned char *)(((unsigned long)p + SCALE_F_MASK) &
|
||||
~SCALE_F_MASK);
|
||||
}
|
||||
iquotient = len & (~SCALE_F_MASK);
|
||||
iremainder = len & SCALE_F_MASK;
|
||||
|
||||
kernel_fpu_begin();
|
||||
crc = crc32_pclmul_le_16(p, iquotient, crc);
|
||||
kernel_fpu_end();
|
||||
|
||||
if (iremainder)
|
||||
crc = crc32_le(crc, p + iquotient, iremainder);
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
||||
static int crc32_pclmul_cra_init(struct crypto_tfm *tfm)
|
||||
{
|
||||
u32 *key = crypto_tfm_ctx(tfm);
|
||||
|
||||
*key = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32_pclmul_setkey(struct crypto_shash *hash, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
u32 *mctx = crypto_shash_ctx(hash);
|
||||
|
||||
if (keylen != sizeof(u32)) {
|
||||
crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
*mctx = le32_to_cpup((__le32 *)key);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32_pclmul_init(struct shash_desc *desc)
|
||||
{
|
||||
u32 *mctx = crypto_shash_ctx(desc->tfm);
|
||||
u32 *crcp = shash_desc_ctx(desc);
|
||||
|
||||
*crcp = *mctx;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32_pclmul_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
u32 *crcp = shash_desc_ctx(desc);
|
||||
|
||||
*crcp = crc32_pclmul_le(*crcp, data, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* No final XOR 0xFFFFFFFF, like crc32_le */
|
||||
static int __crc32_pclmul_finup(u32 *crcp, const u8 *data, unsigned int len,
|
||||
u8 *out)
|
||||
{
|
||||
*(__le32 *)out = cpu_to_le32(crc32_pclmul_le(*crcp, data, len));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32_pclmul_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
{
|
||||
return __crc32_pclmul_finup(shash_desc_ctx(desc), data, len, out);
|
||||
}
|
||||
|
||||
static int crc32_pclmul_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
u32 *crcp = shash_desc_ctx(desc);
|
||||
|
||||
*(__le32 *)out = cpu_to_le32p(crcp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32_pclmul_digest(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
{
|
||||
return __crc32_pclmul_finup(crypto_shash_ctx(desc->tfm), data, len,
|
||||
out);
|
||||
}
|
||||
|
||||
static struct shash_alg alg = {
|
||||
.setkey = crc32_pclmul_setkey,
|
||||
.init = crc32_pclmul_init,
|
||||
.update = crc32_pclmul_update,
|
||||
.final = crc32_pclmul_final,
|
||||
.finup = crc32_pclmul_finup,
|
||||
.digest = crc32_pclmul_digest,
|
||||
.descsize = sizeof(u32),
|
||||
.digestsize = CHKSUM_DIGEST_SIZE,
|
||||
.base = {
|
||||
.cra_name = "crc32",
|
||||
.cra_driver_name = "crc32-pclmul",
|
||||
.cra_priority = 200,
|
||||
.cra_blocksize = CHKSUM_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(u32),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = crc32_pclmul_cra_init,
|
||||
}
|
||||
};
|
||||
|
||||
static const struct x86_cpu_id crc32pclmul_cpu_id[] = {
|
||||
X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ),
|
||||
{}
|
||||
};
|
||||
MODULE_DEVICE_TABLE(x86cpu, crc32pclmul_cpu_id);
|
||||
|
||||
|
||||
static int __init crc32_pclmul_mod_init(void)
|
||||
{
|
||||
|
||||
if (!x86_match_cpu(crc32pclmul_cpu_id)) {
|
||||
pr_info("PCLMULQDQ-NI instructions are not detected.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
return crypto_register_shash(&alg);
|
||||
}
|
||||
|
||||
static void __exit crc32_pclmul_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_shash(&alg);
|
||||
}
|
||||
|
||||
module_init(crc32_pclmul_mod_init);
|
||||
module_exit(crc32_pclmul_mod_fini);
|
||||
|
||||
MODULE_AUTHOR("Alexander Boyko <alexander_boyko@xyratex.com>");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
||||
MODULE_ALIAS_CRYPTO("crc32");
|
||||
MODULE_ALIAS_CRYPTO("crc32-pclmul");
|
||||
284
arch/x86/crypto/crc32c-intel_glue.c
Normal file
284
arch/x86/crypto/crc32c-intel_glue.c
Normal file
|
|
@ -0,0 +1,284 @@
|
|||
/*
|
||||
* Using hardware provided CRC32 instruction to accelerate the CRC32 disposal.
|
||||
* CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE)
|
||||
* CRC32 is a new instruction in Intel SSE4.2, the reference can be found at:
|
||||
* http://www.intel.com/products/processor/manuals/
|
||||
* Intel(R) 64 and IA-32 Architectures Software Developer's Manual
|
||||
* Volume 2A: Instruction Set Reference, A-M
|
||||
*
|
||||
* Copyright (C) 2008 Intel Corporation
|
||||
* Authors: Austin Zhang <austin_zhang@linux.intel.com>
|
||||
* Kent Liu <kent.liu@intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
*/
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include <asm/i387.h>
|
||||
#include <asm/fpu-internal.h>
|
||||
|
||||
#define CHKSUM_BLOCK_SIZE 1
|
||||
#define CHKSUM_DIGEST_SIZE 4
|
||||
|
||||
#define SCALE_F sizeof(unsigned long)
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define REX_PRE "0x48, "
|
||||
#else
|
||||
#define REX_PRE
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* use carryless multiply version of crc32c when buffer
|
||||
* size is >= 512 (when eager fpu is enabled) or
|
||||
* >= 1024 (when eager fpu is disabled) to account
|
||||
* for fpu state save/restore overhead.
|
||||
*/
|
||||
#define CRC32C_PCL_BREAKEVEN_EAGERFPU 512
|
||||
#define CRC32C_PCL_BREAKEVEN_NOEAGERFPU 1024
|
||||
|
||||
asmlinkage unsigned int crc_pcl(const u8 *buffer, int len,
|
||||
unsigned int crc_init);
|
||||
static int crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_EAGERFPU;
|
||||
#if defined(X86_FEATURE_EAGER_FPU)
|
||||
#define set_pcl_breakeven_point() \
|
||||
do { \
|
||||
if (!use_eager_fpu()) \
|
||||
crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU; \
|
||||
} while (0)
|
||||
#else
|
||||
#define set_pcl_breakeven_point() \
|
||||
(crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU)
|
||||
#endif
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
|
||||
{
|
||||
while (length--) {
|
||||
__asm__ __volatile__(
|
||||
".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
|
||||
:"=S"(crc)
|
||||
:"0"(crc), "c"(*data)
|
||||
);
|
||||
data++;
|
||||
}
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
||||
static u32 __pure crc32c_intel_le_hw(u32 crc, unsigned char const *p, size_t len)
|
||||
{
|
||||
unsigned int iquotient = len / SCALE_F;
|
||||
unsigned int iremainder = len % SCALE_F;
|
||||
unsigned long *ptmp = (unsigned long *)p;
|
||||
|
||||
while (iquotient--) {
|
||||
__asm__ __volatile__(
|
||||
".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;"
|
||||
:"=S"(crc)
|
||||
:"0"(crc), "c"(*ptmp)
|
||||
);
|
||||
ptmp++;
|
||||
}
|
||||
|
||||
if (iremainder)
|
||||
crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp,
|
||||
iremainder);
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setting the seed allows arbitrary accumulators and flexible XOR policy
|
||||
* If your algorithm starts with ~0, then XOR with ~0 before you set
|
||||
* the seed.
|
||||
*/
|
||||
static int crc32c_intel_setkey(struct crypto_shash *hash, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
u32 *mctx = crypto_shash_ctx(hash);
|
||||
|
||||
if (keylen != sizeof(u32)) {
|
||||
crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
*mctx = le32_to_cpup((__le32 *)key);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32c_intel_init(struct shash_desc *desc)
|
||||
{
|
||||
u32 *mctx = crypto_shash_ctx(desc->tfm);
|
||||
u32 *crcp = shash_desc_ctx(desc);
|
||||
|
||||
*crcp = *mctx;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32c_intel_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
u32 *crcp = shash_desc_ctx(desc);
|
||||
|
||||
*crcp = crc32c_intel_le_hw(*crcp, data, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __crc32c_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
|
||||
u8 *out)
|
||||
{
|
||||
*(__le32 *)out = ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32c_intel_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
{
|
||||
return __crc32c_intel_finup(shash_desc_ctx(desc), data, len, out);
|
||||
}
|
||||
|
||||
static int crc32c_intel_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
u32 *crcp = shash_desc_ctx(desc);
|
||||
|
||||
*(__le32 *)out = ~cpu_to_le32p(crcp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32c_intel_digest(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
{
|
||||
return __crc32c_intel_finup(crypto_shash_ctx(desc->tfm), data, len,
|
||||
out);
|
||||
}
|
||||
|
||||
static int crc32c_intel_cra_init(struct crypto_tfm *tfm)
|
||||
{
|
||||
u32 *key = crypto_tfm_ctx(tfm);
|
||||
|
||||
*key = ~0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
u32 *crcp = shash_desc_ctx(desc);
|
||||
|
||||
/*
|
||||
* use faster PCL version if datasize is large enough to
|
||||
* overcome kernel fpu state save/restore overhead
|
||||
*/
|
||||
if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) {
|
||||
kernel_fpu_begin();
|
||||
*crcp = crc_pcl(data, len, *crcp);
|
||||
kernel_fpu_end();
|
||||
} else
|
||||
*crcp = crc32c_intel_le_hw(*crcp, data, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
|
||||
u8 *out)
|
||||
{
|
||||
if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) {
|
||||
kernel_fpu_begin();
|
||||
*(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp));
|
||||
kernel_fpu_end();
|
||||
} else
|
||||
*(__le32 *)out =
|
||||
~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32c_pcl_intel_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
{
|
||||
return __crc32c_pcl_intel_finup(shash_desc_ctx(desc), data, len, out);
|
||||
}
|
||||
|
||||
static int crc32c_pcl_intel_digest(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
{
|
||||
return __crc32c_pcl_intel_finup(crypto_shash_ctx(desc->tfm), data, len,
|
||||
out);
|
||||
}
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
static struct shash_alg alg = {
|
||||
.setkey = crc32c_intel_setkey,
|
||||
.init = crc32c_intel_init,
|
||||
.update = crc32c_intel_update,
|
||||
.final = crc32c_intel_final,
|
||||
.finup = crc32c_intel_finup,
|
||||
.digest = crc32c_intel_digest,
|
||||
.descsize = sizeof(u32),
|
||||
.digestsize = CHKSUM_DIGEST_SIZE,
|
||||
.base = {
|
||||
.cra_name = "crc32c",
|
||||
.cra_driver_name = "crc32c-intel",
|
||||
.cra_priority = 200,
|
||||
.cra_blocksize = CHKSUM_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(u32),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = crc32c_intel_cra_init,
|
||||
}
|
||||
};
|
||||
|
||||
static const struct x86_cpu_id crc32c_cpu_id[] = {
|
||||
X86_FEATURE_MATCH(X86_FEATURE_XMM4_2),
|
||||
{}
|
||||
};
|
||||
MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id);
|
||||
|
||||
static int __init crc32c_intel_mod_init(void)
|
||||
{
|
||||
if (!x86_match_cpu(crc32c_cpu_id))
|
||||
return -ENODEV;
|
||||
#ifdef CONFIG_X86_64
|
||||
if (cpu_has_pclmulqdq) {
|
||||
alg.update = crc32c_pcl_intel_update;
|
||||
alg.finup = crc32c_pcl_intel_finup;
|
||||
alg.digest = crc32c_pcl_intel_digest;
|
||||
set_pcl_breakeven_point();
|
||||
}
|
||||
#endif
|
||||
return crypto_register_shash(&alg);
|
||||
}
|
||||
|
||||
static void __exit crc32c_intel_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_shash(&alg);
|
||||
}
|
||||
|
||||
module_init(crc32c_intel_mod_init);
|
||||
module_exit(crc32c_intel_mod_fini);
|
||||
|
||||
MODULE_AUTHOR("Austin Zhang <austin.zhang@intel.com>, Kent Liu <kent.liu@intel.com>");
|
||||
MODULE_DESCRIPTION("CRC32c (Castagnoli) optimization using Intel Hardware.");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
||||
MODULE_ALIAS_CRYPTO("crc32c");
|
||||
MODULE_ALIAS_CRYPTO("crc32c-intel");
|
||||
463
arch/x86/crypto/crc32c-pcl-intel-asm_64.S
Normal file
463
arch/x86/crypto/crc32c-pcl-intel-asm_64.S
Normal file
|
|
@ -0,0 +1,463 @@
|
|||
/*
|
||||
* Implement fast CRC32C with PCLMULQDQ instructions. (x86_64)
|
||||
*
|
||||
* The white papers on CRC32C calculations with PCLMULQDQ instruction can be
|
||||
* downloaded from:
|
||||
* http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/crc-iscsi-polynomial-crc32-instruction-paper.pdf
|
||||
* http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-paper.pdf
|
||||
*
|
||||
* Copyright (C) 2012 Intel Corporation.
|
||||
*
|
||||
* Authors:
|
||||
* Wajdi Feghali <wajdi.k.feghali@intel.com>
|
||||
* James Guilford <james.guilford@intel.com>
|
||||
* David Cote <david.m.cote@intel.com>
|
||||
* Tim Chen <tim.c.chen@linux.intel.com>
|
||||
*
|
||||
* This software is available to you under a choice of one of two
|
||||
* licenses. You may choose to be licensed under the terms of the GNU
|
||||
* General Public License (GPL) Version 2, available from the file
|
||||
* COPYING in the main directory of this source tree, or the
|
||||
* OpenIB.org BSD license below:
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or
|
||||
* without modification, are permitted provided that the following
|
||||
* conditions are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer.
|
||||
*
|
||||
* - Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <asm/inst.h>
|
||||
#include <linux/linkage.h>
|
||||
|
||||
## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
|
||||
|
||||
.macro LABEL prefix n
|
||||
\prefix\n\():
|
||||
.endm
|
||||
|
||||
.macro JMPTBL_ENTRY i
|
||||
.word crc_\i - crc_array
|
||||
.endm
|
||||
|
||||
.macro JNC_LESS_THAN j
|
||||
jnc less_than_\j
|
||||
.endm
|
||||
|
||||
# Define threshold where buffers are considered "small" and routed to more
|
||||
# efficient "by-1" code. This "by-1" code only handles up to 255 bytes, so
|
||||
# SMALL_SIZE can be no larger than 255.
|
||||
|
||||
#define SMALL_SIZE 200
|
||||
|
||||
.if (SMALL_SIZE > 255)
|
||||
.error "SMALL_ SIZE must be < 256"
|
||||
.endif
|
||||
|
||||
# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init);
|
||||
|
||||
.text
|
||||
ENTRY(crc_pcl)
|
||||
#define bufp %rdi
|
||||
#define bufp_dw %edi
|
||||
#define bufp_w %di
|
||||
#define bufp_b %dil
|
||||
#define bufptmp %rcx
|
||||
#define block_0 %rcx
|
||||
#define block_1 %rdx
|
||||
#define block_2 %r11
|
||||
#define len %rsi
|
||||
#define len_dw %esi
|
||||
#define len_w %si
|
||||
#define len_b %sil
|
||||
#define crc_init_arg %rdx
|
||||
#define tmp %rbx
|
||||
#define crc_init %r8
|
||||
#define crc_init_dw %r8d
|
||||
#define crc1 %r9
|
||||
#define crc2 %r10
|
||||
|
||||
pushq %rbx
|
||||
pushq %rdi
|
||||
pushq %rsi
|
||||
|
||||
## Move crc_init for Linux to a different
|
||||
mov crc_init_arg, crc_init
|
||||
|
||||
################################################################
|
||||
## 1) ALIGN:
|
||||
################################################################
|
||||
|
||||
mov bufp, bufptmp # rdi = *buf
|
||||
neg bufp
|
||||
and $7, bufp # calculate the unalignment amount of
|
||||
# the address
|
||||
je proc_block # Skip if aligned
|
||||
|
||||
## If len is less than 8 and we're unaligned, we need to jump
|
||||
## to special code to avoid reading beyond the end of the buffer
|
||||
cmp $8, len
|
||||
jae do_align
|
||||
# less_than_8 expects length in upper 3 bits of len_dw
|
||||
# less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30]
|
||||
shl $32-3+1, len_dw
|
||||
jmp less_than_8_post_shl1
|
||||
|
||||
do_align:
|
||||
#### Calculate CRC of unaligned bytes of the buffer (if any)
|
||||
movq (bufptmp), tmp # load a quadward from the buffer
|
||||
add bufp, bufptmp # align buffer pointer for quadword
|
||||
# processing
|
||||
sub bufp, len # update buffer length
|
||||
align_loop:
|
||||
crc32b %bl, crc_init_dw # compute crc32 of 1-byte
|
||||
shr $8, tmp # get next byte
|
||||
dec bufp
|
||||
jne align_loop
|
||||
|
||||
proc_block:
|
||||
|
||||
################################################################
|
||||
## 2) PROCESS BLOCKS:
|
||||
################################################################
|
||||
|
||||
## compute num of bytes to be processed
|
||||
movq len, tmp # save num bytes in tmp
|
||||
|
||||
cmpq $128*24, len
|
||||
jae full_block
|
||||
|
||||
continue_block:
|
||||
cmpq $SMALL_SIZE, len
|
||||
jb small
|
||||
|
||||
## len < 128*24
|
||||
movq $2731, %rax # 2731 = ceil(2^16 / 24)
|
||||
mul len_dw
|
||||
shrq $16, %rax
|
||||
|
||||
## eax contains floor(bytes / 24) = num 24-byte chunks to do
|
||||
|
||||
## process rax 24-byte chunks (128 >= rax >= 0)
|
||||
|
||||
## compute end address of each block
|
||||
## block 0 (base addr + RAX * 8)
|
||||
## block 1 (base addr + RAX * 16)
|
||||
## block 2 (base addr + RAX * 24)
|
||||
lea (bufptmp, %rax, 8), block_0
|
||||
lea (block_0, %rax, 8), block_1
|
||||
lea (block_1, %rax, 8), block_2
|
||||
|
||||
xor crc1, crc1
|
||||
xor crc2, crc2
|
||||
|
||||
## branch into array
|
||||
lea jump_table(%rip), bufp
|
||||
movzxw (bufp, %rax, 2), len
|
||||
offset=crc_array-jump_table
|
||||
lea offset(bufp, len, 1), bufp
|
||||
jmp *bufp
|
||||
|
||||
################################################################
|
||||
## 2a) PROCESS FULL BLOCKS:
|
||||
################################################################
|
||||
full_block:
|
||||
movq $128,%rax
|
||||
lea 128*8*2(block_0), block_1
|
||||
lea 128*8*3(block_0), block_2
|
||||
add $128*8*1, block_0
|
||||
|
||||
xor crc1,crc1
|
||||
xor crc2,crc2
|
||||
|
||||
# Fall thruogh into top of crc array (crc_128)
|
||||
|
||||
################################################################
|
||||
## 3) CRC Array:
|
||||
################################################################
|
||||
|
||||
crc_array:
|
||||
i=128
|
||||
.rept 128-1
|
||||
.altmacro
|
||||
LABEL crc_ %i
|
||||
.noaltmacro
|
||||
crc32q -i*8(block_0), crc_init
|
||||
crc32q -i*8(block_1), crc1
|
||||
crc32q -i*8(block_2), crc2
|
||||
i=(i-1)
|
||||
.endr
|
||||
|
||||
.altmacro
|
||||
LABEL crc_ %i
|
||||
.noaltmacro
|
||||
crc32q -i*8(block_0), crc_init
|
||||
crc32q -i*8(block_1), crc1
|
||||
# SKIP crc32 -i*8(block_2), crc2 ; Don't do this one yet
|
||||
|
||||
mov block_2, block_0
|
||||
|
||||
################################################################
|
||||
## 4) Combine three results:
|
||||
################################################################
|
||||
|
||||
lea (K_table-8)(%rip), bufp # first entry is for idx 1
|
||||
shlq $3, %rax # rax *= 8
|
||||
pmovzxdq (bufp,%rax), %xmm0 # 2 consts: K1:K2
|
||||
leal (%eax,%eax,2), %eax # rax *= 3 (total *24)
|
||||
subq %rax, tmp # tmp -= rax*24
|
||||
|
||||
movq crc_init, %xmm1 # CRC for block 1
|
||||
PCLMULQDQ 0x00,%xmm0,%xmm1 # Multiply by K2
|
||||
|
||||
movq crc1, %xmm2 # CRC for block 2
|
||||
PCLMULQDQ 0x10, %xmm0, %xmm2 # Multiply by K1
|
||||
|
||||
pxor %xmm2,%xmm1
|
||||
movq %xmm1, %rax
|
||||
xor -i*8(block_2), %rax
|
||||
mov crc2, crc_init
|
||||
crc32 %rax, crc_init
|
||||
|
||||
################################################################
|
||||
## 5) Check for end:
|
||||
################################################################
|
||||
|
||||
LABEL crc_ 0
|
||||
mov tmp, len
|
||||
cmp $128*24, tmp
|
||||
jae full_block
|
||||
cmp $24, tmp
|
||||
jae continue_block
|
||||
|
||||
less_than_24:
|
||||
shl $32-4, len_dw # less_than_16 expects length
|
||||
# in upper 4 bits of len_dw
|
||||
jnc less_than_16
|
||||
crc32q (bufptmp), crc_init
|
||||
crc32q 8(bufptmp), crc_init
|
||||
jz do_return
|
||||
add $16, bufptmp
|
||||
# len is less than 8 if we got here
|
||||
# less_than_8 expects length in upper 3 bits of len_dw
|
||||
# less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30]
|
||||
shl $2, len_dw
|
||||
jmp less_than_8_post_shl1
|
||||
|
||||
#######################################################################
|
||||
## 6) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of len are full)
|
||||
#######################################################################
|
||||
small:
|
||||
shl $32-8, len_dw # Prepare len_dw for less_than_256
|
||||
j=256
|
||||
.rept 5 # j = {256, 128, 64, 32, 16}
|
||||
.altmacro
|
||||
LABEL less_than_ %j # less_than_j: Length should be in
|
||||
# upper lg(j) bits of len_dw
|
||||
j=(j/2)
|
||||
shl $1, len_dw # Get next MSB
|
||||
JNC_LESS_THAN %j
|
||||
.noaltmacro
|
||||
i=0
|
||||
.rept (j/8)
|
||||
crc32q i(bufptmp), crc_init # Compute crc32 of 8-byte data
|
||||
i=i+8
|
||||
.endr
|
||||
jz do_return # Return if remaining length is zero
|
||||
add $j, bufptmp # Advance buf
|
||||
.endr
|
||||
|
||||
less_than_8: # Length should be stored in
|
||||
# upper 3 bits of len_dw
|
||||
shl $1, len_dw
|
||||
less_than_8_post_shl1:
|
||||
jnc less_than_4
|
||||
crc32l (bufptmp), crc_init_dw # CRC of 4 bytes
|
||||
jz do_return # return if remaining data is zero
|
||||
add $4, bufptmp
|
||||
less_than_4: # Length should be stored in
|
||||
# upper 2 bits of len_dw
|
||||
shl $1, len_dw
|
||||
jnc less_than_2
|
||||
crc32w (bufptmp), crc_init_dw # CRC of 2 bytes
|
||||
jz do_return # return if remaining data is zero
|
||||
add $2, bufptmp
|
||||
less_than_2: # Length should be stored in the MSB
|
||||
# of len_dw
|
||||
shl $1, len_dw
|
||||
jnc less_than_1
|
||||
crc32b (bufptmp), crc_init_dw # CRC of 1 byte
|
||||
less_than_1: # Length should be zero
|
||||
do_return:
|
||||
movq crc_init, %rax
|
||||
popq %rsi
|
||||
popq %rdi
|
||||
popq %rbx
|
||||
ret
|
||||
|
||||
################################################################
|
||||
## jump table Table is 129 entries x 2 bytes each
|
||||
################################################################
|
||||
.align 4
|
||||
jump_table:
|
||||
i=0
|
||||
.rept 129
|
||||
.altmacro
|
||||
JMPTBL_ENTRY %i
|
||||
.noaltmacro
|
||||
i=i+1
|
||||
.endr
|
||||
|
||||
ENDPROC(crc_pcl)
|
||||
|
||||
################################################################
|
||||
## PCLMULQDQ tables
|
||||
## Table is 128 entries x 2 words (8 bytes) each
|
||||
################################################################
|
||||
.section .rotata, "a", %progbits
|
||||
.align 8
|
||||
K_table:
|
||||
.long 0x493c7d27, 0x00000001
|
||||
.long 0xba4fc28e, 0x493c7d27
|
||||
.long 0xddc0152b, 0xf20c0dfe
|
||||
.long 0x9e4addf8, 0xba4fc28e
|
||||
.long 0x39d3b296, 0x3da6d0cb
|
||||
.long 0x0715ce53, 0xddc0152b
|
||||
.long 0x47db8317, 0x1c291d04
|
||||
.long 0x0d3b6092, 0x9e4addf8
|
||||
.long 0xc96cfdc0, 0x740eef02
|
||||
.long 0x878a92a7, 0x39d3b296
|
||||
.long 0xdaece73e, 0x083a6eec
|
||||
.long 0xab7aff2a, 0x0715ce53
|
||||
.long 0x2162d385, 0xc49f4f67
|
||||
.long 0x83348832, 0x47db8317
|
||||
.long 0x299847d5, 0x2ad91c30
|
||||
.long 0xb9e02b86, 0x0d3b6092
|
||||
.long 0x18b33a4e, 0x6992cea2
|
||||
.long 0xb6dd949b, 0xc96cfdc0
|
||||
.long 0x78d9ccb7, 0x7e908048
|
||||
.long 0xbac2fd7b, 0x878a92a7
|
||||
.long 0xa60ce07b, 0x1b3d8f29
|
||||
.long 0xce7f39f4, 0xdaece73e
|
||||
.long 0x61d82e56, 0xf1d0f55e
|
||||
.long 0xd270f1a2, 0xab7aff2a
|
||||
.long 0xc619809d, 0xa87ab8a8
|
||||
.long 0x2b3cac5d, 0x2162d385
|
||||
.long 0x65863b64, 0x8462d800
|
||||
.long 0x1b03397f, 0x83348832
|
||||
.long 0xebb883bd, 0x71d111a8
|
||||
.long 0xb3e32c28, 0x299847d5
|
||||
.long 0x064f7f26, 0xffd852c6
|
||||
.long 0xdd7e3b0c, 0xb9e02b86
|
||||
.long 0xf285651c, 0xdcb17aa4
|
||||
.long 0x10746f3c, 0x18b33a4e
|
||||
.long 0xc7a68855, 0xf37c5aee
|
||||
.long 0x271d9844, 0xb6dd949b
|
||||
.long 0x8e766a0c, 0x6051d5a2
|
||||
.long 0x93a5f730, 0x78d9ccb7
|
||||
.long 0x6cb08e5c, 0x18b0d4ff
|
||||
.long 0x6b749fb2, 0xbac2fd7b
|
||||
.long 0x1393e203, 0x21f3d99c
|
||||
.long 0xcec3662e, 0xa60ce07b
|
||||
.long 0x96c515bb, 0x8f158014
|
||||
.long 0xe6fc4e6a, 0xce7f39f4
|
||||
.long 0x8227bb8a, 0xa00457f7
|
||||
.long 0xb0cd4768, 0x61d82e56
|
||||
.long 0x39c7ff35, 0x8d6d2c43
|
||||
.long 0xd7a4825c, 0xd270f1a2
|
||||
.long 0x0ab3844b, 0x00ac29cf
|
||||
.long 0x0167d312, 0xc619809d
|
||||
.long 0xf6076544, 0xe9adf796
|
||||
.long 0x26f6a60a, 0x2b3cac5d
|
||||
.long 0xa741c1bf, 0x96638b34
|
||||
.long 0x98d8d9cb, 0x65863b64
|
||||
.long 0x49c3cc9c, 0xe0e9f351
|
||||
.long 0x68bce87a, 0x1b03397f
|
||||
.long 0x57a3d037, 0x9af01f2d
|
||||
.long 0x6956fc3b, 0xebb883bd
|
||||
.long 0x42d98888, 0x2cff42cf
|
||||
.long 0x3771e98f, 0xb3e32c28
|
||||
.long 0xb42ae3d9, 0x88f25a3a
|
||||
.long 0x2178513a, 0x064f7f26
|
||||
.long 0xe0ac139e, 0x4e36f0b0
|
||||
.long 0x170076fa, 0xdd7e3b0c
|
||||
.long 0x444dd413, 0xbd6f81f8
|
||||
.long 0x6f345e45, 0xf285651c
|
||||
.long 0x41d17b64, 0x91c9bd4b
|
||||
.long 0xff0dba97, 0x10746f3c
|
||||
.long 0xa2b73df1, 0x885f087b
|
||||
.long 0xf872e54c, 0xc7a68855
|
||||
.long 0x1e41e9fc, 0x4c144932
|
||||
.long 0x86d8e4d2, 0x271d9844
|
||||
.long 0x651bd98b, 0x52148f02
|
||||
.long 0x5bb8f1bc, 0x8e766a0c
|
||||
.long 0xa90fd27a, 0xa3c6f37a
|
||||
.long 0xb3af077a, 0x93a5f730
|
||||
.long 0x4984d782, 0xd7c0557f
|
||||
.long 0xca6ef3ac, 0x6cb08e5c
|
||||
.long 0x234e0b26, 0x63ded06a
|
||||
.long 0xdd66cbbb, 0x6b749fb2
|
||||
.long 0x4597456a, 0x4d56973c
|
||||
.long 0xe9e28eb4, 0x1393e203
|
||||
.long 0x7b3ff57a, 0x9669c9df
|
||||
.long 0xc9c8b782, 0xcec3662e
|
||||
.long 0x3f70cc6f, 0xe417f38a
|
||||
.long 0x93e106a4, 0x96c515bb
|
||||
.long 0x62ec6c6d, 0x4b9e0f71
|
||||
.long 0xd813b325, 0xe6fc4e6a
|
||||
.long 0x0df04680, 0xd104b8fc
|
||||
.long 0x2342001e, 0x8227bb8a
|
||||
.long 0x0a2a8d7e, 0x5b397730
|
||||
.long 0x6d9a4957, 0xb0cd4768
|
||||
.long 0xe8b6368b, 0xe78eb416
|
||||
.long 0xd2c3ed1a, 0x39c7ff35
|
||||
.long 0x995a5724, 0x61ff0e01
|
||||
.long 0x9ef68d35, 0xd7a4825c
|
||||
.long 0x0c139b31, 0x8d96551c
|
||||
.long 0xf2271e60, 0x0ab3844b
|
||||
.long 0x0b0bf8ca, 0x0bf80dd2
|
||||
.long 0x2664fd8b, 0x0167d312
|
||||
.long 0xed64812d, 0x8821abed
|
||||
.long 0x02ee03b2, 0xf6076544
|
||||
.long 0x8604ae0f, 0x6a45d2b2
|
||||
.long 0x363bd6b3, 0x26f6a60a
|
||||
.long 0x135c83fd, 0xd8d26619
|
||||
.long 0x5fabe670, 0xa741c1bf
|
||||
.long 0x35ec3279, 0xde87806c
|
||||
.long 0x00bcf5f6, 0x98d8d9cb
|
||||
.long 0x8ae00689, 0x14338754
|
||||
.long 0x17f27698, 0x49c3cc9c
|
||||
.long 0x58ca5f00, 0x5bd2011f
|
||||
.long 0xaa7c7ad5, 0x68bce87a
|
||||
.long 0xb5cfca28, 0xdd07448e
|
||||
.long 0xded288f8, 0x57a3d037
|
||||
.long 0x59f229bc, 0xdde8f5b9
|
||||
.long 0x6d390dec, 0x6956fc3b
|
||||
.long 0x37170390, 0xa3e3e02c
|
||||
.long 0x6353c1cc, 0x42d98888
|
||||
.long 0xc4584f5c, 0xd73c7bea
|
||||
.long 0xf48642e9, 0x3771e98f
|
||||
.long 0x531377e2, 0x80ff0093
|
||||
.long 0xdd35bc8d, 0xb42ae3d9
|
||||
.long 0xb25b29f2, 0x8fe4c34d
|
||||
.long 0x9a5ede41, 0x2178513a
|
||||
.long 0xa563905d, 0xdf99fc11
|
||||
.long 0x45cddf4e, 0xe0ac139e
|
||||
.long 0xacfa3103, 0x6c23e841
|
||||
.long 0xa51b6135, 0x170076fa
|
||||
643
arch/x86/crypto/crct10dif-pcl-asm_64.S
Normal file
643
arch/x86/crypto/crct10dif-pcl-asm_64.S
Normal file
|
|
@ -0,0 +1,643 @@
|
|||
########################################################################
|
||||
# Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
|
||||
#
|
||||
# Copyright (c) 2013, Intel Corporation
|
||||
#
|
||||
# Authors:
|
||||
# Erdinc Ozturk <erdinc.ozturk@intel.com>
|
||||
# Vinodh Gopal <vinodh.gopal@intel.com>
|
||||
# James Guilford <james.guilford@intel.com>
|
||||
# Tim Chen <tim.c.chen@linux.intel.com>
|
||||
#
|
||||
# This software is available to you under a choice of one of two
|
||||
# licenses. You may choose to be licensed under the terms of the GNU
|
||||
# General Public License (GPL) Version 2, available from the file
|
||||
# COPYING in the main directory of this source tree, or the
|
||||
# OpenIB.org BSD license below:
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
#
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
#
|
||||
# * Neither the name of the Intel Corporation nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
########################################################################
|
||||
# Function API:
|
||||
# UINT16 crc_t10dif_pcl(
|
||||
# UINT16 init_crc, //initial CRC value, 16 bits
|
||||
# const unsigned char *buf, //buffer pointer to calculate CRC on
|
||||
# UINT64 len //buffer length in bytes (64-bit data)
|
||||
# );
|
||||
#
|
||||
# Reference paper titled "Fast CRC Computation for Generic
|
||||
# Polynomials Using PCLMULQDQ Instruction"
|
||||
# URL: http://www.intel.com/content/dam/www/public/us/en/documents
|
||||
# /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
|
||||
#
|
||||
#
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.text
|
||||
|
||||
#define arg1 %rdi
|
||||
#define arg2 %rsi
|
||||
#define arg3 %rdx
|
||||
|
||||
#define arg1_low32 %edi
|
||||
|
||||
ENTRY(crc_t10dif_pcl)
|
||||
.align 16
|
||||
|
||||
# adjust the 16-bit initial_crc value, scale it to 32 bits
|
||||
shl $16, arg1_low32
|
||||
|
||||
# Allocate Stack Space
|
||||
mov %rsp, %rcx
|
||||
sub $16*2, %rsp
|
||||
# align stack to 16 byte boundary
|
||||
and $~(0x10 - 1), %rsp
|
||||
|
||||
# check if smaller than 256
|
||||
cmp $256, arg3
|
||||
|
||||
# for sizes less than 128, we can't fold 64B at a time...
|
||||
jl _less_than_128
|
||||
|
||||
|
||||
# load the initial crc value
|
||||
movd arg1_low32, %xmm10 # initial crc
|
||||
|
||||
# crc value does not need to be byte-reflected, but it needs
|
||||
# to be moved to the high part of the register.
|
||||
# because data will be byte-reflected and will align with
|
||||
# initial crc at correct place.
|
||||
pslldq $12, %xmm10
|
||||
|
||||
movdqa SHUF_MASK(%rip), %xmm11
|
||||
# receive the initial 64B data, xor the initial crc value
|
||||
movdqu 16*0(arg2), %xmm0
|
||||
movdqu 16*1(arg2), %xmm1
|
||||
movdqu 16*2(arg2), %xmm2
|
||||
movdqu 16*3(arg2), %xmm3
|
||||
movdqu 16*4(arg2), %xmm4
|
||||
movdqu 16*5(arg2), %xmm5
|
||||
movdqu 16*6(arg2), %xmm6
|
||||
movdqu 16*7(arg2), %xmm7
|
||||
|
||||
pshufb %xmm11, %xmm0
|
||||
# XOR the initial_crc value
|
||||
pxor %xmm10, %xmm0
|
||||
pshufb %xmm11, %xmm1
|
||||
pshufb %xmm11, %xmm2
|
||||
pshufb %xmm11, %xmm3
|
||||
pshufb %xmm11, %xmm4
|
||||
pshufb %xmm11, %xmm5
|
||||
pshufb %xmm11, %xmm6
|
||||
pshufb %xmm11, %xmm7
|
||||
|
||||
movdqa rk3(%rip), %xmm10 #xmm10 has rk3 and rk4
|
||||
#imm value of pclmulqdq instruction
|
||||
#will determine which constant to use
|
||||
|
||||
#################################################################
|
||||
# we subtract 256 instead of 128 to save one instruction from the loop
|
||||
sub $256, arg3
|
||||
|
||||
# at this section of the code, there is 64*x+y (0<=y<64) bytes of
|
||||
# buffer. The _fold_64_B_loop will fold 64B at a time
|
||||
# until we have 64+y Bytes of buffer
|
||||
|
||||
|
||||
# fold 64B at a time. This section of the code folds 4 xmm
|
||||
# registers in parallel
|
||||
_fold_64_B_loop:
|
||||
|
||||
# update the buffer pointer
|
||||
add $128, arg2 # buf += 64#
|
||||
|
||||
movdqu 16*0(arg2), %xmm9
|
||||
movdqu 16*1(arg2), %xmm12
|
||||
pshufb %xmm11, %xmm9
|
||||
pshufb %xmm11, %xmm12
|
||||
movdqa %xmm0, %xmm8
|
||||
movdqa %xmm1, %xmm13
|
||||
pclmulqdq $0x0 , %xmm10, %xmm0
|
||||
pclmulqdq $0x11, %xmm10, %xmm8
|
||||
pclmulqdq $0x0 , %xmm10, %xmm1
|
||||
pclmulqdq $0x11, %xmm10, %xmm13
|
||||
pxor %xmm9 , %xmm0
|
||||
xorps %xmm8 , %xmm0
|
||||
pxor %xmm12, %xmm1
|
||||
xorps %xmm13, %xmm1
|
||||
|
||||
movdqu 16*2(arg2), %xmm9
|
||||
movdqu 16*3(arg2), %xmm12
|
||||
pshufb %xmm11, %xmm9
|
||||
pshufb %xmm11, %xmm12
|
||||
movdqa %xmm2, %xmm8
|
||||
movdqa %xmm3, %xmm13
|
||||
pclmulqdq $0x0, %xmm10, %xmm2
|
||||
pclmulqdq $0x11, %xmm10, %xmm8
|
||||
pclmulqdq $0x0, %xmm10, %xmm3
|
||||
pclmulqdq $0x11, %xmm10, %xmm13
|
||||
pxor %xmm9 , %xmm2
|
||||
xorps %xmm8 , %xmm2
|
||||
pxor %xmm12, %xmm3
|
||||
xorps %xmm13, %xmm3
|
||||
|
||||
movdqu 16*4(arg2), %xmm9
|
||||
movdqu 16*5(arg2), %xmm12
|
||||
pshufb %xmm11, %xmm9
|
||||
pshufb %xmm11, %xmm12
|
||||
movdqa %xmm4, %xmm8
|
||||
movdqa %xmm5, %xmm13
|
||||
pclmulqdq $0x0, %xmm10, %xmm4
|
||||
pclmulqdq $0x11, %xmm10, %xmm8
|
||||
pclmulqdq $0x0, %xmm10, %xmm5
|
||||
pclmulqdq $0x11, %xmm10, %xmm13
|
||||
pxor %xmm9 , %xmm4
|
||||
xorps %xmm8 , %xmm4
|
||||
pxor %xmm12, %xmm5
|
||||
xorps %xmm13, %xmm5
|
||||
|
||||
movdqu 16*6(arg2), %xmm9
|
||||
movdqu 16*7(arg2), %xmm12
|
||||
pshufb %xmm11, %xmm9
|
||||
pshufb %xmm11, %xmm12
|
||||
movdqa %xmm6 , %xmm8
|
||||
movdqa %xmm7 , %xmm13
|
||||
pclmulqdq $0x0 , %xmm10, %xmm6
|
||||
pclmulqdq $0x11, %xmm10, %xmm8
|
||||
pclmulqdq $0x0 , %xmm10, %xmm7
|
||||
pclmulqdq $0x11, %xmm10, %xmm13
|
||||
pxor %xmm9 , %xmm6
|
||||
xorps %xmm8 , %xmm6
|
||||
pxor %xmm12, %xmm7
|
||||
xorps %xmm13, %xmm7
|
||||
|
||||
sub $128, arg3
|
||||
|
||||
# check if there is another 64B in the buffer to be able to fold
|
||||
jge _fold_64_B_loop
|
||||
##################################################################
|
||||
|
||||
|
||||
add $128, arg2
|
||||
# at this point, the buffer pointer is pointing at the last y Bytes
|
||||
# of the buffer the 64B of folded data is in 4 of the xmm
|
||||
# registers: xmm0, xmm1, xmm2, xmm3
|
||||
|
||||
|
||||
# fold the 8 xmm registers to 1 xmm register with different constants
|
||||
|
||||
movdqa rk9(%rip), %xmm10
|
||||
movdqa %xmm0, %xmm8
|
||||
pclmulqdq $0x11, %xmm10, %xmm0
|
||||
pclmulqdq $0x0 , %xmm10, %xmm8
|
||||
pxor %xmm8, %xmm7
|
||||
xorps %xmm0, %xmm7
|
||||
|
||||
movdqa rk11(%rip), %xmm10
|
||||
movdqa %xmm1, %xmm8
|
||||
pclmulqdq $0x11, %xmm10, %xmm1
|
||||
pclmulqdq $0x0 , %xmm10, %xmm8
|
||||
pxor %xmm8, %xmm7
|
||||
xorps %xmm1, %xmm7
|
||||
|
||||
movdqa rk13(%rip), %xmm10
|
||||
movdqa %xmm2, %xmm8
|
||||
pclmulqdq $0x11, %xmm10, %xmm2
|
||||
pclmulqdq $0x0 , %xmm10, %xmm8
|
||||
pxor %xmm8, %xmm7
|
||||
pxor %xmm2, %xmm7
|
||||
|
||||
movdqa rk15(%rip), %xmm10
|
||||
movdqa %xmm3, %xmm8
|
||||
pclmulqdq $0x11, %xmm10, %xmm3
|
||||
pclmulqdq $0x0 , %xmm10, %xmm8
|
||||
pxor %xmm8, %xmm7
|
||||
xorps %xmm3, %xmm7
|
||||
|
||||
movdqa rk17(%rip), %xmm10
|
||||
movdqa %xmm4, %xmm8
|
||||
pclmulqdq $0x11, %xmm10, %xmm4
|
||||
pclmulqdq $0x0 , %xmm10, %xmm8
|
||||
pxor %xmm8, %xmm7
|
||||
pxor %xmm4, %xmm7
|
||||
|
||||
movdqa rk19(%rip), %xmm10
|
||||
movdqa %xmm5, %xmm8
|
||||
pclmulqdq $0x11, %xmm10, %xmm5
|
||||
pclmulqdq $0x0 , %xmm10, %xmm8
|
||||
pxor %xmm8, %xmm7
|
||||
xorps %xmm5, %xmm7
|
||||
|
||||
movdqa rk1(%rip), %xmm10 #xmm10 has rk1 and rk2
|
||||
#imm value of pclmulqdq instruction
|
||||
#will determine which constant to use
|
||||
movdqa %xmm6, %xmm8
|
||||
pclmulqdq $0x11, %xmm10, %xmm6
|
||||
pclmulqdq $0x0 , %xmm10, %xmm8
|
||||
pxor %xmm8, %xmm7
|
||||
pxor %xmm6, %xmm7
|
||||
|
||||
|
||||
# instead of 64, we add 48 to the loop counter to save 1 instruction
|
||||
# from the loop instead of a cmp instruction, we use the negative
|
||||
# flag with the jl instruction
|
||||
add $128-16, arg3
|
||||
jl _final_reduction_for_128
|
||||
|
||||
# now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7
|
||||
# and the rest is in memory. We can fold 16 bytes at a time if y>=16
|
||||
# continue folding 16B at a time
|
||||
|
||||
_16B_reduction_loop:
|
||||
movdqa %xmm7, %xmm8
|
||||
pclmulqdq $0x11, %xmm10, %xmm7
|
||||
pclmulqdq $0x0 , %xmm10, %xmm8
|
||||
pxor %xmm8, %xmm7
|
||||
movdqu (arg2), %xmm0
|
||||
pshufb %xmm11, %xmm0
|
||||
pxor %xmm0 , %xmm7
|
||||
add $16, arg2
|
||||
sub $16, arg3
|
||||
# instead of a cmp instruction, we utilize the flags with the
|
||||
# jge instruction equivalent of: cmp arg3, 16-16
|
||||
# check if there is any more 16B in the buffer to be able to fold
|
||||
jge _16B_reduction_loop
|
||||
|
||||
#now we have 16+z bytes left to reduce, where 0<= z < 16.
|
||||
#first, we reduce the data in the xmm7 register
|
||||
|
||||
|
||||
_final_reduction_for_128:
|
||||
# check if any more data to fold. If not, compute the CRC of
|
||||
# the final 128 bits
|
||||
add $16, arg3
|
||||
je _128_done
|
||||
|
||||
# here we are getting data that is less than 16 bytes.
|
||||
# since we know that there was data before the pointer, we can
|
||||
# offset the input pointer before the actual point, to receive
|
||||
# exactly 16 bytes. after that the registers need to be adjusted.
|
||||
_get_last_two_xmms:
|
||||
movdqa %xmm7, %xmm2
|
||||
|
||||
movdqu -16(arg2, arg3), %xmm1
|
||||
pshufb %xmm11, %xmm1
|
||||
|
||||
# get rid of the extra data that was loaded before
|
||||
# load the shift constant
|
||||
lea pshufb_shf_table+16(%rip), %rax
|
||||
sub arg3, %rax
|
||||
movdqu (%rax), %xmm0
|
||||
|
||||
# shift xmm2 to the left by arg3 bytes
|
||||
pshufb %xmm0, %xmm2
|
||||
|
||||
# shift xmm7 to the right by 16-arg3 bytes
|
||||
pxor mask1(%rip), %xmm0
|
||||
pshufb %xmm0, %xmm7
|
||||
pblendvb %xmm2, %xmm1 #xmm0 is implicit
|
||||
|
||||
# fold 16 Bytes
|
||||
movdqa %xmm1, %xmm2
|
||||
movdqa %xmm7, %xmm8
|
||||
pclmulqdq $0x11, %xmm10, %xmm7
|
||||
pclmulqdq $0x0 , %xmm10, %xmm8
|
||||
pxor %xmm8, %xmm7
|
||||
pxor %xmm2, %xmm7
|
||||
|
||||
_128_done:
|
||||
# compute crc of a 128-bit value
|
||||
movdqa rk5(%rip), %xmm10 # rk5 and rk6 in xmm10
|
||||
movdqa %xmm7, %xmm0
|
||||
|
||||
#64b fold
|
||||
pclmulqdq $0x1, %xmm10, %xmm7
|
||||
pslldq $8 , %xmm0
|
||||
pxor %xmm0, %xmm7
|
||||
|
||||
#32b fold
|
||||
movdqa %xmm7, %xmm0
|
||||
|
||||
pand mask2(%rip), %xmm0
|
||||
|
||||
psrldq $12, %xmm7
|
||||
pclmulqdq $0x10, %xmm10, %xmm7
|
||||
pxor %xmm0, %xmm7
|
||||
|
||||
#barrett reduction
|
||||
_barrett:
|
||||
movdqa rk7(%rip), %xmm10 # rk7 and rk8 in xmm10
|
||||
movdqa %xmm7, %xmm0
|
||||
pclmulqdq $0x01, %xmm10, %xmm7
|
||||
pslldq $4, %xmm7
|
||||
pclmulqdq $0x11, %xmm10, %xmm7
|
||||
|
||||
pslldq $4, %xmm7
|
||||
pxor %xmm0, %xmm7
|
||||
pextrd $1, %xmm7, %eax
|
||||
|
||||
_cleanup:
|
||||
# scale the result back to 16 bits
|
||||
shr $16, %eax
|
||||
mov %rcx, %rsp
|
||||
ret
|
||||
|
||||
########################################################################
|
||||
|
||||
.align 16
|
||||
_less_than_128:
|
||||
|
||||
# check if there is enough buffer to be able to fold 16B at a time
|
||||
cmp $32, arg3
|
||||
jl _less_than_32
|
||||
movdqa SHUF_MASK(%rip), %xmm11
|
||||
|
||||
# now if there is, load the constants
|
||||
movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10
|
||||
|
||||
movd arg1_low32, %xmm0 # get the initial crc value
|
||||
pslldq $12, %xmm0 # align it to its correct place
|
||||
movdqu (arg2), %xmm7 # load the plaintext
|
||||
pshufb %xmm11, %xmm7 # byte-reflect the plaintext
|
||||
pxor %xmm0, %xmm7
|
||||
|
||||
|
||||
# update the buffer pointer
|
||||
add $16, arg2
|
||||
|
||||
# update the counter. subtract 32 instead of 16 to save one
|
||||
# instruction from the loop
|
||||
sub $32, arg3
|
||||
|
||||
jmp _16B_reduction_loop
|
||||
|
||||
|
||||
.align 16
|
||||
_less_than_32:
|
||||
# mov initial crc to the return value. this is necessary for
|
||||
# zero-length buffers.
|
||||
mov arg1_low32, %eax
|
||||
test arg3, arg3
|
||||
je _cleanup
|
||||
|
||||
movdqa SHUF_MASK(%rip), %xmm11
|
||||
|
||||
movd arg1_low32, %xmm0 # get the initial crc value
|
||||
pslldq $12, %xmm0 # align it to its correct place
|
||||
|
||||
cmp $16, arg3
|
||||
je _exact_16_left
|
||||
jl _less_than_16_left
|
||||
|
||||
movdqu (arg2), %xmm7 # load the plaintext
|
||||
pshufb %xmm11, %xmm7 # byte-reflect the plaintext
|
||||
pxor %xmm0 , %xmm7 # xor the initial crc value
|
||||
add $16, arg2
|
||||
sub $16, arg3
|
||||
movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10
|
||||
jmp _get_last_two_xmms
|
||||
|
||||
|
||||
.align 16
|
||||
_less_than_16_left:
|
||||
# use stack space to load data less than 16 bytes, zero-out
|
||||
# the 16B in memory first.
|
||||
|
||||
pxor %xmm1, %xmm1
|
||||
mov %rsp, %r11
|
||||
movdqa %xmm1, (%r11)
|
||||
|
||||
cmp $4, arg3
|
||||
jl _only_less_than_4
|
||||
|
||||
# backup the counter value
|
||||
mov arg3, %r9
|
||||
cmp $8, arg3
|
||||
jl _less_than_8_left
|
||||
|
||||
# load 8 Bytes
|
||||
mov (arg2), %rax
|
||||
mov %rax, (%r11)
|
||||
add $8, %r11
|
||||
sub $8, arg3
|
||||
add $8, arg2
|
||||
_less_than_8_left:
|
||||
|
||||
cmp $4, arg3
|
||||
jl _less_than_4_left
|
||||
|
||||
# load 4 Bytes
|
||||
mov (arg2), %eax
|
||||
mov %eax, (%r11)
|
||||
add $4, %r11
|
||||
sub $4, arg3
|
||||
add $4, arg2
|
||||
_less_than_4_left:
|
||||
|
||||
cmp $2, arg3
|
||||
jl _less_than_2_left
|
||||
|
||||
# load 2 Bytes
|
||||
mov (arg2), %ax
|
||||
mov %ax, (%r11)
|
||||
add $2, %r11
|
||||
sub $2, arg3
|
||||
add $2, arg2
|
||||
_less_than_2_left:
|
||||
cmp $1, arg3
|
||||
jl _zero_left
|
||||
|
||||
# load 1 Byte
|
||||
mov (arg2), %al
|
||||
mov %al, (%r11)
|
||||
_zero_left:
|
||||
movdqa (%rsp), %xmm7
|
||||
pshufb %xmm11, %xmm7
|
||||
pxor %xmm0 , %xmm7 # xor the initial crc value
|
||||
|
||||
# shl r9, 4
|
||||
lea pshufb_shf_table+16(%rip), %rax
|
||||
sub %r9, %rax
|
||||
movdqu (%rax), %xmm0
|
||||
pxor mask1(%rip), %xmm0
|
||||
|
||||
pshufb %xmm0, %xmm7
|
||||
jmp _128_done
|
||||
|
||||
.align 16
|
||||
_exact_16_left:
|
||||
movdqu (arg2), %xmm7
|
||||
pshufb %xmm11, %xmm7
|
||||
pxor %xmm0 , %xmm7 # xor the initial crc value
|
||||
|
||||
jmp _128_done
|
||||
|
||||
_only_less_than_4:
|
||||
cmp $3, arg3
|
||||
jl _only_less_than_3
|
||||
|
||||
# load 3 Bytes
|
||||
mov (arg2), %al
|
||||
mov %al, (%r11)
|
||||
|
||||
mov 1(arg2), %al
|
||||
mov %al, 1(%r11)
|
||||
|
||||
mov 2(arg2), %al
|
||||
mov %al, 2(%r11)
|
||||
|
||||
movdqa (%rsp), %xmm7
|
||||
pshufb %xmm11, %xmm7
|
||||
pxor %xmm0 , %xmm7 # xor the initial crc value
|
||||
|
||||
psrldq $5, %xmm7
|
||||
|
||||
jmp _barrett
|
||||
_only_less_than_3:
|
||||
cmp $2, arg3
|
||||
jl _only_less_than_2
|
||||
|
||||
# load 2 Bytes
|
||||
mov (arg2), %al
|
||||
mov %al, (%r11)
|
||||
|
||||
mov 1(arg2), %al
|
||||
mov %al, 1(%r11)
|
||||
|
||||
movdqa (%rsp), %xmm7
|
||||
pshufb %xmm11, %xmm7
|
||||
pxor %xmm0 , %xmm7 # xor the initial crc value
|
||||
|
||||
psrldq $6, %xmm7
|
||||
|
||||
jmp _barrett
|
||||
_only_less_than_2:
|
||||
|
||||
# load 1 Byte
|
||||
mov (arg2), %al
|
||||
mov %al, (%r11)
|
||||
|
||||
movdqa (%rsp), %xmm7
|
||||
pshufb %xmm11, %xmm7
|
||||
pxor %xmm0 , %xmm7 # xor the initial crc value
|
||||
|
||||
psrldq $7, %xmm7
|
||||
|
||||
jmp _barrett
|
||||
|
||||
ENDPROC(crc_t10dif_pcl)
|
||||
|
||||
.data
|
||||
|
||||
# precomputed constants
|
||||
# these constants are precomputed from the poly:
|
||||
# 0x8bb70000 (0x8bb7 scaled to 32 bits)
|
||||
.align 16
|
||||
# Q = 0x18BB70000
|
||||
# rk1 = 2^(32*3) mod Q << 32
|
||||
# rk2 = 2^(32*5) mod Q << 32
|
||||
# rk3 = 2^(32*15) mod Q << 32
|
||||
# rk4 = 2^(32*17) mod Q << 32
|
||||
# rk5 = 2^(32*3) mod Q << 32
|
||||
# rk6 = 2^(32*2) mod Q << 32
|
||||
# rk7 = floor(2^64/Q)
|
||||
# rk8 = Q
|
||||
rk1:
|
||||
.quad 0x2d56000000000000
|
||||
rk2:
|
||||
.quad 0x06df000000000000
|
||||
rk3:
|
||||
.quad 0x9d9d000000000000
|
||||
rk4:
|
||||
.quad 0x7cf5000000000000
|
||||
rk5:
|
||||
.quad 0x2d56000000000000
|
||||
rk6:
|
||||
.quad 0x1368000000000000
|
||||
rk7:
|
||||
.quad 0x00000001f65a57f8
|
||||
rk8:
|
||||
.quad 0x000000018bb70000
|
||||
|
||||
rk9:
|
||||
.quad 0xceae000000000000
|
||||
rk10:
|
||||
.quad 0xbfd6000000000000
|
||||
rk11:
|
||||
.quad 0x1e16000000000000
|
||||
rk12:
|
||||
.quad 0x713c000000000000
|
||||
rk13:
|
||||
.quad 0xf7f9000000000000
|
||||
rk14:
|
||||
.quad 0x80a6000000000000
|
||||
rk15:
|
||||
.quad 0x044c000000000000
|
||||
rk16:
|
||||
.quad 0xe658000000000000
|
||||
rk17:
|
||||
.quad 0xad18000000000000
|
||||
rk18:
|
||||
.quad 0xa497000000000000
|
||||
rk19:
|
||||
.quad 0x6ee3000000000000
|
||||
rk20:
|
||||
.quad 0xe7b5000000000000
|
||||
|
||||
|
||||
|
||||
mask1:
|
||||
.octa 0x80808080808080808080808080808080
|
||||
mask2:
|
||||
.octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF
|
||||
|
||||
SHUF_MASK:
|
||||
.octa 0x000102030405060708090A0B0C0D0E0F
|
||||
|
||||
pshufb_shf_table:
|
||||
# use these values for shift constants for the pshufb instruction
|
||||
# different alignments result in values as shown:
|
||||
# DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1
|
||||
# DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2
|
||||
# DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3
|
||||
# DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4
|
||||
# DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5
|
||||
# DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6
|
||||
# DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9 (16-7) / shr7
|
||||
# DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8 (16-8) / shr8
|
||||
# DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7 (16-9) / shr9
|
||||
# DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6 (16-10) / shr10
|
||||
# DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5 (16-11) / shr11
|
||||
# DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4 (16-12) / shr12
|
||||
# DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3 (16-13) / shr13
|
||||
# DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2 (16-14) / shr14
|
||||
# DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1 (16-15) / shr15
|
||||
.octa 0x8f8e8d8c8b8a89888786858483828100
|
||||
.octa 0x000e0d0c0b0a09080706050403020100
|
||||
151
arch/x86/crypto/crct10dif-pclmul_glue.c
Normal file
151
arch/x86/crypto/crct10dif-pclmul_glue.c
Normal file
|
|
@ -0,0 +1,151 @@
|
|||
/*
|
||||
* Cryptographic API.
|
||||
*
|
||||
* T10 Data Integrity Field CRC16 Crypto Transform using PCLMULQDQ Instructions
|
||||
*
|
||||
* Copyright (C) 2013 Intel Corporation
|
||||
* Author: Tim Chen <tim.c.chen@linux.intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/crc-t10dif.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <asm/i387.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
|
||||
asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf,
|
||||
size_t len);
|
||||
|
||||
struct chksum_desc_ctx {
|
||||
__u16 crc;
|
||||
};
|
||||
|
||||
/*
|
||||
* Steps through buffer one byte at at time, calculates reflected
|
||||
* crc using table.
|
||||
*/
|
||||
|
||||
static int chksum_init(struct shash_desc *desc)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
ctx->crc = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int chksum_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
if (irq_fpu_usable()) {
|
||||
kernel_fpu_begin();
|
||||
ctx->crc = crc_t10dif_pcl(ctx->crc, data, length);
|
||||
kernel_fpu_end();
|
||||
} else
|
||||
ctx->crc = crc_t10dif_generic(ctx->crc, data, length);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int chksum_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
*(__u16 *)out = ctx->crc;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
|
||||
u8 *out)
|
||||
{
|
||||
if (irq_fpu_usable()) {
|
||||
kernel_fpu_begin();
|
||||
*(__u16 *)out = crc_t10dif_pcl(*crcp, data, len);
|
||||
kernel_fpu_end();
|
||||
} else
|
||||
*(__u16 *)out = crc_t10dif_generic(*crcp, data, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int chksum_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
return __chksum_finup(&ctx->crc, data, len, out);
|
||||
}
|
||||
|
||||
static int chksum_digest(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length, u8 *out)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
return __chksum_finup(&ctx->crc, data, length, out);
|
||||
}
|
||||
|
||||
static struct shash_alg alg = {
|
||||
.digestsize = CRC_T10DIF_DIGEST_SIZE,
|
||||
.init = chksum_init,
|
||||
.update = chksum_update,
|
||||
.final = chksum_final,
|
||||
.finup = chksum_finup,
|
||||
.digest = chksum_digest,
|
||||
.descsize = sizeof(struct chksum_desc_ctx),
|
||||
.base = {
|
||||
.cra_name = "crct10dif",
|
||||
.cra_driver_name = "crct10dif-pclmul",
|
||||
.cra_priority = 200,
|
||||
.cra_blocksize = CRC_T10DIF_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
|
||||
static const struct x86_cpu_id crct10dif_cpu_id[] = {
|
||||
X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ),
|
||||
{}
|
||||
};
|
||||
MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id);
|
||||
|
||||
static int __init crct10dif_intel_mod_init(void)
|
||||
{
|
||||
if (!x86_match_cpu(crct10dif_cpu_id))
|
||||
return -ENODEV;
|
||||
|
||||
return crypto_register_shash(&alg);
|
||||
}
|
||||
|
||||
static void __exit crct10dif_intel_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_shash(&alg);
|
||||
}
|
||||
|
||||
module_init(crct10dif_intel_mod_init);
|
||||
module_exit(crct10dif_intel_mod_fini);
|
||||
|
||||
MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
|
||||
MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ.");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
||||
MODULE_ALIAS_CRYPTO("crct10dif");
|
||||
MODULE_ALIAS_CRYPTO("crct10dif-pclmul");
|
||||
805
arch/x86/crypto/des3_ede-asm_64.S
Normal file
805
arch/x86/crypto/des3_ede-asm_64.S
Normal file
|
|
@ -0,0 +1,805 @@
|
|||
/*
|
||||
* des3_ede-asm_64.S - x86-64 assembly implementation of 3DES cipher
|
||||
*
|
||||
* Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.file "des3_ede-asm_64.S"
|
||||
.text
|
||||
|
||||
#define s1 .L_s1
|
||||
#define s2 ((s1) + (64*8))
|
||||
#define s3 ((s2) + (64*8))
|
||||
#define s4 ((s3) + (64*8))
|
||||
#define s5 ((s4) + (64*8))
|
||||
#define s6 ((s5) + (64*8))
|
||||
#define s7 ((s6) + (64*8))
|
||||
#define s8 ((s7) + (64*8))
|
||||
|
||||
/* register macros */
|
||||
#define CTX %rdi
|
||||
|
||||
#define RL0 %r8
|
||||
#define RL1 %r9
|
||||
#define RL2 %r10
|
||||
|
||||
#define RL0d %r8d
|
||||
#define RL1d %r9d
|
||||
#define RL2d %r10d
|
||||
|
||||
#define RR0 %r11
|
||||
#define RR1 %r12
|
||||
#define RR2 %r13
|
||||
|
||||
#define RR0d %r11d
|
||||
#define RR1d %r12d
|
||||
#define RR2d %r13d
|
||||
|
||||
#define RW0 %rax
|
||||
#define RW1 %rbx
|
||||
#define RW2 %rcx
|
||||
|
||||
#define RW0d %eax
|
||||
#define RW1d %ebx
|
||||
#define RW2d %ecx
|
||||
|
||||
#define RW0bl %al
|
||||
#define RW1bl %bl
|
||||
#define RW2bl %cl
|
||||
|
||||
#define RW0bh %ah
|
||||
#define RW1bh %bh
|
||||
#define RW2bh %ch
|
||||
|
||||
#define RT0 %r15
|
||||
#define RT1 %rbp
|
||||
#define RT2 %r14
|
||||
#define RT3 %rdx
|
||||
|
||||
#define RT0d %r15d
|
||||
#define RT1d %ebp
|
||||
#define RT2d %r14d
|
||||
#define RT3d %edx
|
||||
|
||||
/***********************************************************************
|
||||
* 1-way 3DES
|
||||
***********************************************************************/
|
||||
#define do_permutation(a, b, offset, mask) \
|
||||
movl a, RT0d; \
|
||||
shrl $(offset), RT0d; \
|
||||
xorl b, RT0d; \
|
||||
andl $(mask), RT0d; \
|
||||
xorl RT0d, b; \
|
||||
shll $(offset), RT0d; \
|
||||
xorl RT0d, a;
|
||||
|
||||
#define expand_to_64bits(val, mask) \
|
||||
movl val##d, RT0d; \
|
||||
rorl $4, RT0d; \
|
||||
shlq $32, RT0; \
|
||||
orq RT0, val; \
|
||||
andq mask, val;
|
||||
|
||||
#define compress_to_64bits(val) \
|
||||
movq val, RT0; \
|
||||
shrq $32, RT0; \
|
||||
roll $4, RT0d; \
|
||||
orl RT0d, val##d;
|
||||
|
||||
#define initial_permutation(left, right) \
|
||||
do_permutation(left##d, right##d, 4, 0x0f0f0f0f); \
|
||||
do_permutation(left##d, right##d, 16, 0x0000ffff); \
|
||||
do_permutation(right##d, left##d, 2, 0x33333333); \
|
||||
do_permutation(right##d, left##d, 8, 0x00ff00ff); \
|
||||
movabs $0x3f3f3f3f3f3f3f3f, RT3; \
|
||||
movl left##d, RW0d; \
|
||||
roll $1, right##d; \
|
||||
xorl right##d, RW0d; \
|
||||
andl $0xaaaaaaaa, RW0d; \
|
||||
xorl RW0d, left##d; \
|
||||
xorl RW0d, right##d; \
|
||||
roll $1, left##d; \
|
||||
expand_to_64bits(right, RT3); \
|
||||
expand_to_64bits(left, RT3);
|
||||
|
||||
#define final_permutation(left, right) \
|
||||
compress_to_64bits(right); \
|
||||
compress_to_64bits(left); \
|
||||
movl right##d, RW0d; \
|
||||
rorl $1, left##d; \
|
||||
xorl left##d, RW0d; \
|
||||
andl $0xaaaaaaaa, RW0d; \
|
||||
xorl RW0d, right##d; \
|
||||
xorl RW0d, left##d; \
|
||||
rorl $1, right##d; \
|
||||
do_permutation(right##d, left##d, 8, 0x00ff00ff); \
|
||||
do_permutation(right##d, left##d, 2, 0x33333333); \
|
||||
do_permutation(left##d, right##d, 16, 0x0000ffff); \
|
||||
do_permutation(left##d, right##d, 4, 0x0f0f0f0f);
|
||||
|
||||
#define round1(n, from, to, load_next_key) \
|
||||
xorq from, RW0; \
|
||||
\
|
||||
movzbl RW0bl, RT0d; \
|
||||
movzbl RW0bh, RT1d; \
|
||||
shrq $16, RW0; \
|
||||
movzbl RW0bl, RT2d; \
|
||||
movzbl RW0bh, RT3d; \
|
||||
shrq $16, RW0; \
|
||||
movq s8(, RT0, 8), RT0; \
|
||||
xorq s6(, RT1, 8), to; \
|
||||
movzbl RW0bl, RL1d; \
|
||||
movzbl RW0bh, RT1d; \
|
||||
shrl $16, RW0d; \
|
||||
xorq s4(, RT2, 8), RT0; \
|
||||
xorq s2(, RT3, 8), to; \
|
||||
movzbl RW0bl, RT2d; \
|
||||
movzbl RW0bh, RT3d; \
|
||||
xorq s7(, RL1, 8), RT0; \
|
||||
xorq s5(, RT1, 8), to; \
|
||||
xorq s3(, RT2, 8), RT0; \
|
||||
load_next_key(n, RW0); \
|
||||
xorq RT0, to; \
|
||||
xorq s1(, RT3, 8), to; \
|
||||
|
||||
#define load_next_key(n, RWx) \
|
||||
movq (((n) + 1) * 8)(CTX), RWx;
|
||||
|
||||
#define dummy2(a, b) /*_*/
|
||||
|
||||
#define read_block(io, left, right) \
|
||||
movl (io), left##d; \
|
||||
movl 4(io), right##d; \
|
||||
bswapl left##d; \
|
||||
bswapl right##d;
|
||||
|
||||
#define write_block(io, left, right) \
|
||||
bswapl left##d; \
|
||||
bswapl right##d; \
|
||||
movl left##d, (io); \
|
||||
movl right##d, 4(io);
|
||||
|
||||
ENTRY(des3_ede_x86_64_crypt_blk)
|
||||
/* input:
|
||||
* %rdi: round keys, CTX
|
||||
* %rsi: dst
|
||||
* %rdx: src
|
||||
*/
|
||||
pushq %rbp;
|
||||
pushq %rbx;
|
||||
pushq %r12;
|
||||
pushq %r13;
|
||||
pushq %r14;
|
||||
pushq %r15;
|
||||
|
||||
read_block(%rdx, RL0, RR0);
|
||||
initial_permutation(RL0, RR0);
|
||||
|
||||
movq (CTX), RW0;
|
||||
|
||||
round1(0, RR0, RL0, load_next_key);
|
||||
round1(1, RL0, RR0, load_next_key);
|
||||
round1(2, RR0, RL0, load_next_key);
|
||||
round1(3, RL0, RR0, load_next_key);
|
||||
round1(4, RR0, RL0, load_next_key);
|
||||
round1(5, RL0, RR0, load_next_key);
|
||||
round1(6, RR0, RL0, load_next_key);
|
||||
round1(7, RL0, RR0, load_next_key);
|
||||
round1(8, RR0, RL0, load_next_key);
|
||||
round1(9, RL0, RR0, load_next_key);
|
||||
round1(10, RR0, RL0, load_next_key);
|
||||
round1(11, RL0, RR0, load_next_key);
|
||||
round1(12, RR0, RL0, load_next_key);
|
||||
round1(13, RL0, RR0, load_next_key);
|
||||
round1(14, RR0, RL0, load_next_key);
|
||||
round1(15, RL0, RR0, load_next_key);
|
||||
|
||||
round1(16+0, RL0, RR0, load_next_key);
|
||||
round1(16+1, RR0, RL0, load_next_key);
|
||||
round1(16+2, RL0, RR0, load_next_key);
|
||||
round1(16+3, RR0, RL0, load_next_key);
|
||||
round1(16+4, RL0, RR0, load_next_key);
|
||||
round1(16+5, RR0, RL0, load_next_key);
|
||||
round1(16+6, RL0, RR0, load_next_key);
|
||||
round1(16+7, RR0, RL0, load_next_key);
|
||||
round1(16+8, RL0, RR0, load_next_key);
|
||||
round1(16+9, RR0, RL0, load_next_key);
|
||||
round1(16+10, RL0, RR0, load_next_key);
|
||||
round1(16+11, RR0, RL0, load_next_key);
|
||||
round1(16+12, RL0, RR0, load_next_key);
|
||||
round1(16+13, RR0, RL0, load_next_key);
|
||||
round1(16+14, RL0, RR0, load_next_key);
|
||||
round1(16+15, RR0, RL0, load_next_key);
|
||||
|
||||
round1(32+0, RR0, RL0, load_next_key);
|
||||
round1(32+1, RL0, RR0, load_next_key);
|
||||
round1(32+2, RR0, RL0, load_next_key);
|
||||
round1(32+3, RL0, RR0, load_next_key);
|
||||
round1(32+4, RR0, RL0, load_next_key);
|
||||
round1(32+5, RL0, RR0, load_next_key);
|
||||
round1(32+6, RR0, RL0, load_next_key);
|
||||
round1(32+7, RL0, RR0, load_next_key);
|
||||
round1(32+8, RR0, RL0, load_next_key);
|
||||
round1(32+9, RL0, RR0, load_next_key);
|
||||
round1(32+10, RR0, RL0, load_next_key);
|
||||
round1(32+11, RL0, RR0, load_next_key);
|
||||
round1(32+12, RR0, RL0, load_next_key);
|
||||
round1(32+13, RL0, RR0, load_next_key);
|
||||
round1(32+14, RR0, RL0, load_next_key);
|
||||
round1(32+15, RL0, RR0, dummy2);
|
||||
|
||||
final_permutation(RR0, RL0);
|
||||
write_block(%rsi, RR0, RL0);
|
||||
|
||||
popq %r15;
|
||||
popq %r14;
|
||||
popq %r13;
|
||||
popq %r12;
|
||||
popq %rbx;
|
||||
popq %rbp;
|
||||
|
||||
ret;
|
||||
ENDPROC(des3_ede_x86_64_crypt_blk)
|
||||
|
||||
/***********************************************************************
|
||||
* 3-way 3DES
|
||||
***********************************************************************/
|
||||
#define expand_to_64bits(val, mask) \
|
||||
movl val##d, RT0d; \
|
||||
rorl $4, RT0d; \
|
||||
shlq $32, RT0; \
|
||||
orq RT0, val; \
|
||||
andq mask, val;
|
||||
|
||||
#define compress_to_64bits(val) \
|
||||
movq val, RT0; \
|
||||
shrq $32, RT0; \
|
||||
roll $4, RT0d; \
|
||||
orl RT0d, val##d;
|
||||
|
||||
#define initial_permutation3(left, right) \
|
||||
do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \
|
||||
do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
|
||||
do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \
|
||||
do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
|
||||
do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); \
|
||||
do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
|
||||
\
|
||||
do_permutation(right##0d, left##0d, 2, 0x33333333); \
|
||||
do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \
|
||||
do_permutation(right##1d, left##1d, 2, 0x33333333); \
|
||||
do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \
|
||||
do_permutation(right##2d, left##2d, 2, 0x33333333); \
|
||||
do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \
|
||||
\
|
||||
movabs $0x3f3f3f3f3f3f3f3f, RT3; \
|
||||
\
|
||||
movl left##0d, RW0d; \
|
||||
roll $1, right##0d; \
|
||||
xorl right##0d, RW0d; \
|
||||
andl $0xaaaaaaaa, RW0d; \
|
||||
xorl RW0d, left##0d; \
|
||||
xorl RW0d, right##0d; \
|
||||
roll $1, left##0d; \
|
||||
expand_to_64bits(right##0, RT3); \
|
||||
expand_to_64bits(left##0, RT3); \
|
||||
movl left##1d, RW1d; \
|
||||
roll $1, right##1d; \
|
||||
xorl right##1d, RW1d; \
|
||||
andl $0xaaaaaaaa, RW1d; \
|
||||
xorl RW1d, left##1d; \
|
||||
xorl RW1d, right##1d; \
|
||||
roll $1, left##1d; \
|
||||
expand_to_64bits(right##1, RT3); \
|
||||
expand_to_64bits(left##1, RT3); \
|
||||
movl left##2d, RW2d; \
|
||||
roll $1, right##2d; \
|
||||
xorl right##2d, RW2d; \
|
||||
andl $0xaaaaaaaa, RW2d; \
|
||||
xorl RW2d, left##2d; \
|
||||
xorl RW2d, right##2d; \
|
||||
roll $1, left##2d; \
|
||||
expand_to_64bits(right##2, RT3); \
|
||||
expand_to_64bits(left##2, RT3);
|
||||
|
||||
#define final_permutation3(left, right) \
|
||||
compress_to_64bits(right##0); \
|
||||
compress_to_64bits(left##0); \
|
||||
movl right##0d, RW0d; \
|
||||
rorl $1, left##0d; \
|
||||
xorl left##0d, RW0d; \
|
||||
andl $0xaaaaaaaa, RW0d; \
|
||||
xorl RW0d, right##0d; \
|
||||
xorl RW0d, left##0d; \
|
||||
rorl $1, right##0d; \
|
||||
compress_to_64bits(right##1); \
|
||||
compress_to_64bits(left##1); \
|
||||
movl right##1d, RW1d; \
|
||||
rorl $1, left##1d; \
|
||||
xorl left##1d, RW1d; \
|
||||
andl $0xaaaaaaaa, RW1d; \
|
||||
xorl RW1d, right##1d; \
|
||||
xorl RW1d, left##1d; \
|
||||
rorl $1, right##1d; \
|
||||
compress_to_64bits(right##2); \
|
||||
compress_to_64bits(left##2); \
|
||||
movl right##2d, RW2d; \
|
||||
rorl $1, left##2d; \
|
||||
xorl left##2d, RW2d; \
|
||||
andl $0xaaaaaaaa, RW2d; \
|
||||
xorl RW2d, right##2d; \
|
||||
xorl RW2d, left##2d; \
|
||||
rorl $1, right##2d; \
|
||||
\
|
||||
do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \
|
||||
do_permutation(right##0d, left##0d, 2, 0x33333333); \
|
||||
do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \
|
||||
do_permutation(right##1d, left##1d, 2, 0x33333333); \
|
||||
do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \
|
||||
do_permutation(right##2d, left##2d, 2, 0x33333333); \
|
||||
\
|
||||
do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
|
||||
do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \
|
||||
do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
|
||||
do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \
|
||||
do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
|
||||
do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f);
|
||||
|
||||
#define round3(n, from, to, load_next_key, do_movq) \
|
||||
xorq from##0, RW0; \
|
||||
movzbl RW0bl, RT3d; \
|
||||
movzbl RW0bh, RT1d; \
|
||||
shrq $16, RW0; \
|
||||
xorq s8(, RT3, 8), to##0; \
|
||||
xorq s6(, RT1, 8), to##0; \
|
||||
movzbl RW0bl, RT3d; \
|
||||
movzbl RW0bh, RT1d; \
|
||||
shrq $16, RW0; \
|
||||
xorq s4(, RT3, 8), to##0; \
|
||||
xorq s2(, RT1, 8), to##0; \
|
||||
movzbl RW0bl, RT3d; \
|
||||
movzbl RW0bh, RT1d; \
|
||||
shrl $16, RW0d; \
|
||||
xorq s7(, RT3, 8), to##0; \
|
||||
xorq s5(, RT1, 8), to##0; \
|
||||
movzbl RW0bl, RT3d; \
|
||||
movzbl RW0bh, RT1d; \
|
||||
load_next_key(n, RW0); \
|
||||
xorq s3(, RT3, 8), to##0; \
|
||||
xorq s1(, RT1, 8), to##0; \
|
||||
xorq from##1, RW1; \
|
||||
movzbl RW1bl, RT3d; \
|
||||
movzbl RW1bh, RT1d; \
|
||||
shrq $16, RW1; \
|
||||
xorq s8(, RT3, 8), to##1; \
|
||||
xorq s6(, RT1, 8), to##1; \
|
||||
movzbl RW1bl, RT3d; \
|
||||
movzbl RW1bh, RT1d; \
|
||||
shrq $16, RW1; \
|
||||
xorq s4(, RT3, 8), to##1; \
|
||||
xorq s2(, RT1, 8), to##1; \
|
||||
movzbl RW1bl, RT3d; \
|
||||
movzbl RW1bh, RT1d; \
|
||||
shrl $16, RW1d; \
|
||||
xorq s7(, RT3, 8), to##1; \
|
||||
xorq s5(, RT1, 8), to##1; \
|
||||
movzbl RW1bl, RT3d; \
|
||||
movzbl RW1bh, RT1d; \
|
||||
do_movq(RW0, RW1); \
|
||||
xorq s3(, RT3, 8), to##1; \
|
||||
xorq s1(, RT1, 8), to##1; \
|
||||
xorq from##2, RW2; \
|
||||
movzbl RW2bl, RT3d; \
|
||||
movzbl RW2bh, RT1d; \
|
||||
shrq $16, RW2; \
|
||||
xorq s8(, RT3, 8), to##2; \
|
||||
xorq s6(, RT1, 8), to##2; \
|
||||
movzbl RW2bl, RT3d; \
|
||||
movzbl RW2bh, RT1d; \
|
||||
shrq $16, RW2; \
|
||||
xorq s4(, RT3, 8), to##2; \
|
||||
xorq s2(, RT1, 8), to##2; \
|
||||
movzbl RW2bl, RT3d; \
|
||||
movzbl RW2bh, RT1d; \
|
||||
shrl $16, RW2d; \
|
||||
xorq s7(, RT3, 8), to##2; \
|
||||
xorq s5(, RT1, 8), to##2; \
|
||||
movzbl RW2bl, RT3d; \
|
||||
movzbl RW2bh, RT1d; \
|
||||
do_movq(RW0, RW2); \
|
||||
xorq s3(, RT3, 8), to##2; \
|
||||
xorq s1(, RT1, 8), to##2;
|
||||
|
||||
#define __movq(src, dst) \
|
||||
movq src, dst;
|
||||
|
||||
ENTRY(des3_ede_x86_64_crypt_blk_3way)
|
||||
/* input:
|
||||
* %rdi: ctx, round keys
|
||||
* %rsi: dst (3 blocks)
|
||||
* %rdx: src (3 blocks)
|
||||
*/
|
||||
|
||||
pushq %rbp;
|
||||
pushq %rbx;
|
||||
pushq %r12;
|
||||
pushq %r13;
|
||||
pushq %r14;
|
||||
pushq %r15;
|
||||
|
||||
/* load input */
|
||||
movl 0 * 4(%rdx), RL0d;
|
||||
movl 1 * 4(%rdx), RR0d;
|
||||
movl 2 * 4(%rdx), RL1d;
|
||||
movl 3 * 4(%rdx), RR1d;
|
||||
movl 4 * 4(%rdx), RL2d;
|
||||
movl 5 * 4(%rdx), RR2d;
|
||||
|
||||
bswapl RL0d;
|
||||
bswapl RR0d;
|
||||
bswapl RL1d;
|
||||
bswapl RR1d;
|
||||
bswapl RL2d;
|
||||
bswapl RR2d;
|
||||
|
||||
initial_permutation3(RL, RR);
|
||||
|
||||
movq 0(CTX), RW0;
|
||||
movq RW0, RW1;
|
||||
movq RW0, RW2;
|
||||
|
||||
round3(0, RR, RL, load_next_key, __movq);
|
||||
round3(1, RL, RR, load_next_key, __movq);
|
||||
round3(2, RR, RL, load_next_key, __movq);
|
||||
round3(3, RL, RR, load_next_key, __movq);
|
||||
round3(4, RR, RL, load_next_key, __movq);
|
||||
round3(5, RL, RR, load_next_key, __movq);
|
||||
round3(6, RR, RL, load_next_key, __movq);
|
||||
round3(7, RL, RR, load_next_key, __movq);
|
||||
round3(8, RR, RL, load_next_key, __movq);
|
||||
round3(9, RL, RR, load_next_key, __movq);
|
||||
round3(10, RR, RL, load_next_key, __movq);
|
||||
round3(11, RL, RR, load_next_key, __movq);
|
||||
round3(12, RR, RL, load_next_key, __movq);
|
||||
round3(13, RL, RR, load_next_key, __movq);
|
||||
round3(14, RR, RL, load_next_key, __movq);
|
||||
round3(15, RL, RR, load_next_key, __movq);
|
||||
|
||||
round3(16+0, RL, RR, load_next_key, __movq);
|
||||
round3(16+1, RR, RL, load_next_key, __movq);
|
||||
round3(16+2, RL, RR, load_next_key, __movq);
|
||||
round3(16+3, RR, RL, load_next_key, __movq);
|
||||
round3(16+4, RL, RR, load_next_key, __movq);
|
||||
round3(16+5, RR, RL, load_next_key, __movq);
|
||||
round3(16+6, RL, RR, load_next_key, __movq);
|
||||
round3(16+7, RR, RL, load_next_key, __movq);
|
||||
round3(16+8, RL, RR, load_next_key, __movq);
|
||||
round3(16+9, RR, RL, load_next_key, __movq);
|
||||
round3(16+10, RL, RR, load_next_key, __movq);
|
||||
round3(16+11, RR, RL, load_next_key, __movq);
|
||||
round3(16+12, RL, RR, load_next_key, __movq);
|
||||
round3(16+13, RR, RL, load_next_key, __movq);
|
||||
round3(16+14, RL, RR, load_next_key, __movq);
|
||||
round3(16+15, RR, RL, load_next_key, __movq);
|
||||
|
||||
round3(32+0, RR, RL, load_next_key, __movq);
|
||||
round3(32+1, RL, RR, load_next_key, __movq);
|
||||
round3(32+2, RR, RL, load_next_key, __movq);
|
||||
round3(32+3, RL, RR, load_next_key, __movq);
|
||||
round3(32+4, RR, RL, load_next_key, __movq);
|
||||
round3(32+5, RL, RR, load_next_key, __movq);
|
||||
round3(32+6, RR, RL, load_next_key, __movq);
|
||||
round3(32+7, RL, RR, load_next_key, __movq);
|
||||
round3(32+8, RR, RL, load_next_key, __movq);
|
||||
round3(32+9, RL, RR, load_next_key, __movq);
|
||||
round3(32+10, RR, RL, load_next_key, __movq);
|
||||
round3(32+11, RL, RR, load_next_key, __movq);
|
||||
round3(32+12, RR, RL, load_next_key, __movq);
|
||||
round3(32+13, RL, RR, load_next_key, __movq);
|
||||
round3(32+14, RR, RL, load_next_key, __movq);
|
||||
round3(32+15, RL, RR, dummy2, dummy2);
|
||||
|
||||
final_permutation3(RR, RL);
|
||||
|
||||
bswapl RR0d;
|
||||
bswapl RL0d;
|
||||
bswapl RR1d;
|
||||
bswapl RL1d;
|
||||
bswapl RR2d;
|
||||
bswapl RL2d;
|
||||
|
||||
movl RR0d, 0 * 4(%rsi);
|
||||
movl RL0d, 1 * 4(%rsi);
|
||||
movl RR1d, 2 * 4(%rsi);
|
||||
movl RL1d, 3 * 4(%rsi);
|
||||
movl RR2d, 4 * 4(%rsi);
|
||||
movl RL2d, 5 * 4(%rsi);
|
||||
|
||||
popq %r15;
|
||||
popq %r14;
|
||||
popq %r13;
|
||||
popq %r12;
|
||||
popq %rbx;
|
||||
popq %rbp;
|
||||
|
||||
ret;
|
||||
ENDPROC(des3_ede_x86_64_crypt_blk_3way)
|
||||
|
||||
.data
|
||||
.align 16
|
||||
.L_s1:
|
||||
.quad 0x0010100001010400, 0x0000000000000000
|
||||
.quad 0x0000100000010000, 0x0010100001010404
|
||||
.quad 0x0010100001010004, 0x0000100000010404
|
||||
.quad 0x0000000000000004, 0x0000100000010000
|
||||
.quad 0x0000000000000400, 0x0010100001010400
|
||||
.quad 0x0010100001010404, 0x0000000000000400
|
||||
.quad 0x0010000001000404, 0x0010100001010004
|
||||
.quad 0x0010000001000000, 0x0000000000000004
|
||||
.quad 0x0000000000000404, 0x0010000001000400
|
||||
.quad 0x0010000001000400, 0x0000100000010400
|
||||
.quad 0x0000100000010400, 0x0010100001010000
|
||||
.quad 0x0010100001010000, 0x0010000001000404
|
||||
.quad 0x0000100000010004, 0x0010000001000004
|
||||
.quad 0x0010000001000004, 0x0000100000010004
|
||||
.quad 0x0000000000000000, 0x0000000000000404
|
||||
.quad 0x0000100000010404, 0x0010000001000000
|
||||
.quad 0x0000100000010000, 0x0010100001010404
|
||||
.quad 0x0000000000000004, 0x0010100001010000
|
||||
.quad 0x0010100001010400, 0x0010000001000000
|
||||
.quad 0x0010000001000000, 0x0000000000000400
|
||||
.quad 0x0010100001010004, 0x0000100000010000
|
||||
.quad 0x0000100000010400, 0x0010000001000004
|
||||
.quad 0x0000000000000400, 0x0000000000000004
|
||||
.quad 0x0010000001000404, 0x0000100000010404
|
||||
.quad 0x0010100001010404, 0x0000100000010004
|
||||
.quad 0x0010100001010000, 0x0010000001000404
|
||||
.quad 0x0010000001000004, 0x0000000000000404
|
||||
.quad 0x0000100000010404, 0x0010100001010400
|
||||
.quad 0x0000000000000404, 0x0010000001000400
|
||||
.quad 0x0010000001000400, 0x0000000000000000
|
||||
.quad 0x0000100000010004, 0x0000100000010400
|
||||
.quad 0x0000000000000000, 0x0010100001010004
|
||||
.L_s2:
|
||||
.quad 0x0801080200100020, 0x0800080000000000
|
||||
.quad 0x0000080000000000, 0x0001080200100020
|
||||
.quad 0x0001000000100000, 0x0000000200000020
|
||||
.quad 0x0801000200100020, 0x0800080200000020
|
||||
.quad 0x0800000200000020, 0x0801080200100020
|
||||
.quad 0x0801080000100000, 0x0800000000000000
|
||||
.quad 0x0800080000000000, 0x0001000000100000
|
||||
.quad 0x0000000200000020, 0x0801000200100020
|
||||
.quad 0x0001080000100000, 0x0001000200100020
|
||||
.quad 0x0800080200000020, 0x0000000000000000
|
||||
.quad 0x0800000000000000, 0x0000080000000000
|
||||
.quad 0x0001080200100020, 0x0801000000100000
|
||||
.quad 0x0001000200100020, 0x0800000200000020
|
||||
.quad 0x0000000000000000, 0x0001080000100000
|
||||
.quad 0x0000080200000020, 0x0801080000100000
|
||||
.quad 0x0801000000100000, 0x0000080200000020
|
||||
.quad 0x0000000000000000, 0x0001080200100020
|
||||
.quad 0x0801000200100020, 0x0001000000100000
|
||||
.quad 0x0800080200000020, 0x0801000000100000
|
||||
.quad 0x0801080000100000, 0x0000080000000000
|
||||
.quad 0x0801000000100000, 0x0800080000000000
|
||||
.quad 0x0000000200000020, 0x0801080200100020
|
||||
.quad 0x0001080200100020, 0x0000000200000020
|
||||
.quad 0x0000080000000000, 0x0800000000000000
|
||||
.quad 0x0000080200000020, 0x0801080000100000
|
||||
.quad 0x0001000000100000, 0x0800000200000020
|
||||
.quad 0x0001000200100020, 0x0800080200000020
|
||||
.quad 0x0800000200000020, 0x0001000200100020
|
||||
.quad 0x0001080000100000, 0x0000000000000000
|
||||
.quad 0x0800080000000000, 0x0000080200000020
|
||||
.quad 0x0800000000000000, 0x0801000200100020
|
||||
.quad 0x0801080200100020, 0x0001080000100000
|
||||
.L_s3:
|
||||
.quad 0x0000002000000208, 0x0000202008020200
|
||||
.quad 0x0000000000000000, 0x0000200008020008
|
||||
.quad 0x0000002008000200, 0x0000000000000000
|
||||
.quad 0x0000202000020208, 0x0000002008000200
|
||||
.quad 0x0000200000020008, 0x0000000008000008
|
||||
.quad 0x0000000008000008, 0x0000200000020000
|
||||
.quad 0x0000202008020208, 0x0000200000020008
|
||||
.quad 0x0000200008020000, 0x0000002000000208
|
||||
.quad 0x0000000008000000, 0x0000000000000008
|
||||
.quad 0x0000202008020200, 0x0000002000000200
|
||||
.quad 0x0000202000020200, 0x0000200008020000
|
||||
.quad 0x0000200008020008, 0x0000202000020208
|
||||
.quad 0x0000002008000208, 0x0000202000020200
|
||||
.quad 0x0000200000020000, 0x0000002008000208
|
||||
.quad 0x0000000000000008, 0x0000202008020208
|
||||
.quad 0x0000002000000200, 0x0000000008000000
|
||||
.quad 0x0000202008020200, 0x0000000008000000
|
||||
.quad 0x0000200000020008, 0x0000002000000208
|
||||
.quad 0x0000200000020000, 0x0000202008020200
|
||||
.quad 0x0000002008000200, 0x0000000000000000
|
||||
.quad 0x0000002000000200, 0x0000200000020008
|
||||
.quad 0x0000202008020208, 0x0000002008000200
|
||||
.quad 0x0000000008000008, 0x0000002000000200
|
||||
.quad 0x0000000000000000, 0x0000200008020008
|
||||
.quad 0x0000002008000208, 0x0000200000020000
|
||||
.quad 0x0000000008000000, 0x0000202008020208
|
||||
.quad 0x0000000000000008, 0x0000202000020208
|
||||
.quad 0x0000202000020200, 0x0000000008000008
|
||||
.quad 0x0000200008020000, 0x0000002008000208
|
||||
.quad 0x0000002000000208, 0x0000200008020000
|
||||
.quad 0x0000202000020208, 0x0000000000000008
|
||||
.quad 0x0000200008020008, 0x0000202000020200
|
||||
.L_s4:
|
||||
.quad 0x1008020000002001, 0x1000020800002001
|
||||
.quad 0x1000020800002001, 0x0000000800000000
|
||||
.quad 0x0008020800002000, 0x1008000800000001
|
||||
.quad 0x1008000000000001, 0x1000020000002001
|
||||
.quad 0x0000000000000000, 0x0008020000002000
|
||||
.quad 0x0008020000002000, 0x1008020800002001
|
||||
.quad 0x1000000800000001, 0x0000000000000000
|
||||
.quad 0x0008000800000000, 0x1008000000000001
|
||||
.quad 0x1000000000000001, 0x0000020000002000
|
||||
.quad 0x0008000000000000, 0x1008020000002001
|
||||
.quad 0x0000000800000000, 0x0008000000000000
|
||||
.quad 0x1000020000002001, 0x0000020800002000
|
||||
.quad 0x1008000800000001, 0x1000000000000001
|
||||
.quad 0x0000020800002000, 0x0008000800000000
|
||||
.quad 0x0000020000002000, 0x0008020800002000
|
||||
.quad 0x1008020800002001, 0x1000000800000001
|
||||
.quad 0x0008000800000000, 0x1008000000000001
|
||||
.quad 0x0008020000002000, 0x1008020800002001
|
||||
.quad 0x1000000800000001, 0x0000000000000000
|
||||
.quad 0x0000000000000000, 0x0008020000002000
|
||||
.quad 0x0000020800002000, 0x0008000800000000
|
||||
.quad 0x1008000800000001, 0x1000000000000001
|
||||
.quad 0x1008020000002001, 0x1000020800002001
|
||||
.quad 0x1000020800002001, 0x0000000800000000
|
||||
.quad 0x1008020800002001, 0x1000000800000001
|
||||
.quad 0x1000000000000001, 0x0000020000002000
|
||||
.quad 0x1008000000000001, 0x1000020000002001
|
||||
.quad 0x0008020800002000, 0x1008000800000001
|
||||
.quad 0x1000020000002001, 0x0000020800002000
|
||||
.quad 0x0008000000000000, 0x1008020000002001
|
||||
.quad 0x0000000800000000, 0x0008000000000000
|
||||
.quad 0x0000020000002000, 0x0008020800002000
|
||||
.L_s5:
|
||||
.quad 0x0000001000000100, 0x0020001002080100
|
||||
.quad 0x0020000002080000, 0x0420001002000100
|
||||
.quad 0x0000000000080000, 0x0000001000000100
|
||||
.quad 0x0400000000000000, 0x0020000002080000
|
||||
.quad 0x0400001000080100, 0x0000000000080000
|
||||
.quad 0x0020001002000100, 0x0400001000080100
|
||||
.quad 0x0420001002000100, 0x0420000002080000
|
||||
.quad 0x0000001000080100, 0x0400000000000000
|
||||
.quad 0x0020000002000000, 0x0400000000080000
|
||||
.quad 0x0400000000080000, 0x0000000000000000
|
||||
.quad 0x0400001000000100, 0x0420001002080100
|
||||
.quad 0x0420001002080100, 0x0020001002000100
|
||||
.quad 0x0420000002080000, 0x0400001000000100
|
||||
.quad 0x0000000000000000, 0x0420000002000000
|
||||
.quad 0x0020001002080100, 0x0020000002000000
|
||||
.quad 0x0420000002000000, 0x0000001000080100
|
||||
.quad 0x0000000000080000, 0x0420001002000100
|
||||
.quad 0x0000001000000100, 0x0020000002000000
|
||||
.quad 0x0400000000000000, 0x0020000002080000
|
||||
.quad 0x0420001002000100, 0x0400001000080100
|
||||
.quad 0x0020001002000100, 0x0400000000000000
|
||||
.quad 0x0420000002080000, 0x0020001002080100
|
||||
.quad 0x0400001000080100, 0x0000001000000100
|
||||
.quad 0x0020000002000000, 0x0420000002080000
|
||||
.quad 0x0420001002080100, 0x0000001000080100
|
||||
.quad 0x0420000002000000, 0x0420001002080100
|
||||
.quad 0x0020000002080000, 0x0000000000000000
|
||||
.quad 0x0400000000080000, 0x0420000002000000
|
||||
.quad 0x0000001000080100, 0x0020001002000100
|
||||
.quad 0x0400001000000100, 0x0000000000080000
|
||||
.quad 0x0000000000000000, 0x0400000000080000
|
||||
.quad 0x0020001002080100, 0x0400001000000100
|
||||
.L_s6:
|
||||
.quad 0x0200000120000010, 0x0204000020000000
|
||||
.quad 0x0000040000000000, 0x0204040120000010
|
||||
.quad 0x0204000020000000, 0x0000000100000010
|
||||
.quad 0x0204040120000010, 0x0004000000000000
|
||||
.quad 0x0200040020000000, 0x0004040100000010
|
||||
.quad 0x0004000000000000, 0x0200000120000010
|
||||
.quad 0x0004000100000010, 0x0200040020000000
|
||||
.quad 0x0200000020000000, 0x0000040100000010
|
||||
.quad 0x0000000000000000, 0x0004000100000010
|
||||
.quad 0x0200040120000010, 0x0000040000000000
|
||||
.quad 0x0004040000000000, 0x0200040120000010
|
||||
.quad 0x0000000100000010, 0x0204000120000010
|
||||
.quad 0x0204000120000010, 0x0000000000000000
|
||||
.quad 0x0004040100000010, 0x0204040020000000
|
||||
.quad 0x0000040100000010, 0x0004040000000000
|
||||
.quad 0x0204040020000000, 0x0200000020000000
|
||||
.quad 0x0200040020000000, 0x0000000100000010
|
||||
.quad 0x0204000120000010, 0x0004040000000000
|
||||
.quad 0x0204040120000010, 0x0004000000000000
|
||||
.quad 0x0000040100000010, 0x0200000120000010
|
||||
.quad 0x0004000000000000, 0x0200040020000000
|
||||
.quad 0x0200000020000000, 0x0000040100000010
|
||||
.quad 0x0200000120000010, 0x0204040120000010
|
||||
.quad 0x0004040000000000, 0x0204000020000000
|
||||
.quad 0x0004040100000010, 0x0204040020000000
|
||||
.quad 0x0000000000000000, 0x0204000120000010
|
||||
.quad 0x0000000100000010, 0x0000040000000000
|
||||
.quad 0x0204000020000000, 0x0004040100000010
|
||||
.quad 0x0000040000000000, 0x0004000100000010
|
||||
.quad 0x0200040120000010, 0x0000000000000000
|
||||
.quad 0x0204040020000000, 0x0200000020000000
|
||||
.quad 0x0004000100000010, 0x0200040120000010
|
||||
.L_s7:
|
||||
.quad 0x0002000000200000, 0x2002000004200002
|
||||
.quad 0x2000000004000802, 0x0000000000000000
|
||||
.quad 0x0000000000000800, 0x2000000004000802
|
||||
.quad 0x2002000000200802, 0x0002000004200800
|
||||
.quad 0x2002000004200802, 0x0002000000200000
|
||||
.quad 0x0000000000000000, 0x2000000004000002
|
||||
.quad 0x2000000000000002, 0x0000000004000000
|
||||
.quad 0x2002000004200002, 0x2000000000000802
|
||||
.quad 0x0000000004000800, 0x2002000000200802
|
||||
.quad 0x2002000000200002, 0x0000000004000800
|
||||
.quad 0x2000000004000002, 0x0002000004200000
|
||||
.quad 0x0002000004200800, 0x2002000000200002
|
||||
.quad 0x0002000004200000, 0x0000000000000800
|
||||
.quad 0x2000000000000802, 0x2002000004200802
|
||||
.quad 0x0002000000200800, 0x2000000000000002
|
||||
.quad 0x0000000004000000, 0x0002000000200800
|
||||
.quad 0x0000000004000000, 0x0002000000200800
|
||||
.quad 0x0002000000200000, 0x2000000004000802
|
||||
.quad 0x2000000004000802, 0x2002000004200002
|
||||
.quad 0x2002000004200002, 0x2000000000000002
|
||||
.quad 0x2002000000200002, 0x0000000004000000
|
||||
.quad 0x0000000004000800, 0x0002000000200000
|
||||
.quad 0x0002000004200800, 0x2000000000000802
|
||||
.quad 0x2002000000200802, 0x0002000004200800
|
||||
.quad 0x2000000000000802, 0x2000000004000002
|
||||
.quad 0x2002000004200802, 0x0002000004200000
|
||||
.quad 0x0002000000200800, 0x0000000000000000
|
||||
.quad 0x2000000000000002, 0x2002000004200802
|
||||
.quad 0x0000000000000000, 0x2002000000200802
|
||||
.quad 0x0002000004200000, 0x0000000000000800
|
||||
.quad 0x2000000004000002, 0x0000000004000800
|
||||
.quad 0x0000000000000800, 0x2002000000200002
|
||||
.L_s8:
|
||||
.quad 0x0100010410001000, 0x0000010000001000
|
||||
.quad 0x0000000000040000, 0x0100010410041000
|
||||
.quad 0x0100000010000000, 0x0100010410001000
|
||||
.quad 0x0000000400000000, 0x0100000010000000
|
||||
.quad 0x0000000400040000, 0x0100000010040000
|
||||
.quad 0x0100010410041000, 0x0000010000041000
|
||||
.quad 0x0100010010041000, 0x0000010400041000
|
||||
.quad 0x0000010000001000, 0x0000000400000000
|
||||
.quad 0x0100000010040000, 0x0100000410000000
|
||||
.quad 0x0100010010001000, 0x0000010400001000
|
||||
.quad 0x0000010000041000, 0x0000000400040000
|
||||
.quad 0x0100000410040000, 0x0100010010041000
|
||||
.quad 0x0000010400001000, 0x0000000000000000
|
||||
.quad 0x0000000000000000, 0x0100000410040000
|
||||
.quad 0x0100000410000000, 0x0100010010001000
|
||||
.quad 0x0000010400041000, 0x0000000000040000
|
||||
.quad 0x0000010400041000, 0x0000000000040000
|
||||
.quad 0x0100010010041000, 0x0000010000001000
|
||||
.quad 0x0000000400000000, 0x0100000410040000
|
||||
.quad 0x0000010000001000, 0x0000010400041000
|
||||
.quad 0x0100010010001000, 0x0000000400000000
|
||||
.quad 0x0100000410000000, 0x0100000010040000
|
||||
.quad 0x0100000410040000, 0x0100000010000000
|
||||
.quad 0x0000000000040000, 0x0100010410001000
|
||||
.quad 0x0000000000000000, 0x0100010410041000
|
||||
.quad 0x0000000400040000, 0x0100000410000000
|
||||
.quad 0x0100000010040000, 0x0100010010001000
|
||||
.quad 0x0100010410001000, 0x0000000000000000
|
||||
.quad 0x0100010410041000, 0x0000010000041000
|
||||
.quad 0x0000010000041000, 0x0000010400001000
|
||||
.quad 0x0000010400001000, 0x0000000400040000
|
||||
.quad 0x0100000010000000, 0x0100010010041000
|
||||
509
arch/x86/crypto/des3_ede_glue.c
Normal file
509
arch/x86/crypto/des3_ede_glue.c
Normal file
|
|
@ -0,0 +1,509 @@
|
|||
/*
|
||||
* Glue Code for assembler optimized version of 3DES
|
||||
*
|
||||
* Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
|
||||
*
|
||||
* CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
|
||||
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
|
||||
* CTR part based on code (crypto/ctr.c) by:
|
||||
* (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <crypto/des.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/types.h>
|
||||
#include <crypto/algapi.h>
|
||||
|
||||
struct des3_ede_x86_ctx {
|
||||
u32 enc_expkey[DES3_EDE_EXPKEY_WORDS];
|
||||
u32 dec_expkey[DES3_EDE_EXPKEY_WORDS];
|
||||
};
|
||||
|
||||
/* regular block cipher functions */
|
||||
asmlinkage void des3_ede_x86_64_crypt_blk(const u32 *expkey, u8 *dst,
|
||||
const u8 *src);
|
||||
|
||||
/* 3-way parallel cipher functions */
|
||||
asmlinkage void des3_ede_x86_64_crypt_blk_3way(const u32 *expkey, u8 *dst,
|
||||
const u8 *src);
|
||||
|
||||
static inline void des3_ede_enc_blk(struct des3_ede_x86_ctx *ctx, u8 *dst,
|
||||
const u8 *src)
|
||||
{
|
||||
u32 *enc_ctx = ctx->enc_expkey;
|
||||
|
||||
des3_ede_x86_64_crypt_blk(enc_ctx, dst, src);
|
||||
}
|
||||
|
||||
static inline void des3_ede_dec_blk(struct des3_ede_x86_ctx *ctx, u8 *dst,
|
||||
const u8 *src)
|
||||
{
|
||||
u32 *dec_ctx = ctx->dec_expkey;
|
||||
|
||||
des3_ede_x86_64_crypt_blk(dec_ctx, dst, src);
|
||||
}
|
||||
|
||||
static inline void des3_ede_enc_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst,
|
||||
const u8 *src)
|
||||
{
|
||||
u32 *enc_ctx = ctx->enc_expkey;
|
||||
|
||||
des3_ede_x86_64_crypt_blk_3way(enc_ctx, dst, src);
|
||||
}
|
||||
|
||||
static inline void des3_ede_dec_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst,
|
||||
const u8 *src)
|
||||
{
|
||||
u32 *dec_ctx = ctx->dec_expkey;
|
||||
|
||||
des3_ede_x86_64_crypt_blk_3way(dec_ctx, dst, src);
|
||||
}
|
||||
|
||||
static void des3_ede_x86_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
des3_ede_enc_blk(crypto_tfm_ctx(tfm), dst, src);
|
||||
}
|
||||
|
||||
static void des3_ede_x86_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
des3_ede_dec_blk(crypto_tfm_ctx(tfm), dst, src);
|
||||
}
|
||||
|
||||
static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
|
||||
const u32 *expkey)
|
||||
{
|
||||
unsigned int bsize = DES3_EDE_BLOCK_SIZE;
|
||||
unsigned int nbytes;
|
||||
int err;
|
||||
|
||||
err = blkcipher_walk_virt(desc, walk);
|
||||
|
||||
while ((nbytes = walk->nbytes)) {
|
||||
u8 *wsrc = walk->src.virt.addr;
|
||||
u8 *wdst = walk->dst.virt.addr;
|
||||
|
||||
/* Process four block batch */
|
||||
if (nbytes >= bsize * 3) {
|
||||
do {
|
||||
des3_ede_x86_64_crypt_blk_3way(expkey, wdst,
|
||||
wsrc);
|
||||
|
||||
wsrc += bsize * 3;
|
||||
wdst += bsize * 3;
|
||||
nbytes -= bsize * 3;
|
||||
} while (nbytes >= bsize * 3);
|
||||
|
||||
if (nbytes < bsize)
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* Handle leftovers */
|
||||
do {
|
||||
des3_ede_x86_64_crypt_blk(expkey, wdst, wsrc);
|
||||
|
||||
wsrc += bsize;
|
||||
wdst += bsize;
|
||||
nbytes -= bsize;
|
||||
} while (nbytes >= bsize);
|
||||
|
||||
done:
|
||||
err = blkcipher_walk_done(desc, walk, nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
struct blkcipher_walk walk;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
return ecb_crypt(desc, &walk, ctx->enc_expkey);
|
||||
}
|
||||
|
||||
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
struct blkcipher_walk walk;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
return ecb_crypt(desc, &walk, ctx->dec_expkey);
|
||||
}
|
||||
|
||||
static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
|
||||
struct blkcipher_walk *walk)
|
||||
{
|
||||
struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
unsigned int bsize = DES3_EDE_BLOCK_SIZE;
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
u64 *src = (u64 *)walk->src.virt.addr;
|
||||
u64 *dst = (u64 *)walk->dst.virt.addr;
|
||||
u64 *iv = (u64 *)walk->iv;
|
||||
|
||||
do {
|
||||
*dst = *src ^ *iv;
|
||||
des3_ede_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
|
||||
iv = dst;
|
||||
|
||||
src += 1;
|
||||
dst += 1;
|
||||
nbytes -= bsize;
|
||||
} while (nbytes >= bsize);
|
||||
|
||||
*(u64 *)walk->iv = *iv;
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
nbytes = __cbc_encrypt(desc, &walk);
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
|
||||
struct blkcipher_walk *walk)
|
||||
{
|
||||
struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
unsigned int bsize = DES3_EDE_BLOCK_SIZE;
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
u64 *src = (u64 *)walk->src.virt.addr;
|
||||
u64 *dst = (u64 *)walk->dst.virt.addr;
|
||||
u64 ivs[3 - 1];
|
||||
u64 last_iv;
|
||||
|
||||
/* Start of the last block. */
|
||||
src += nbytes / bsize - 1;
|
||||
dst += nbytes / bsize - 1;
|
||||
|
||||
last_iv = *src;
|
||||
|
||||
/* Process four block batch */
|
||||
if (nbytes >= bsize * 3) {
|
||||
do {
|
||||
nbytes -= bsize * 3 - bsize;
|
||||
src -= 3 - 1;
|
||||
dst -= 3 - 1;
|
||||
|
||||
ivs[0] = src[0];
|
||||
ivs[1] = src[1];
|
||||
|
||||
des3_ede_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
|
||||
|
||||
dst[1] ^= ivs[0];
|
||||
dst[2] ^= ivs[1];
|
||||
|
||||
nbytes -= bsize;
|
||||
if (nbytes < bsize)
|
||||
goto done;
|
||||
|
||||
*dst ^= *(src - 1);
|
||||
src -= 1;
|
||||
dst -= 1;
|
||||
} while (nbytes >= bsize * 3);
|
||||
}
|
||||
|
||||
/* Handle leftovers */
|
||||
for (;;) {
|
||||
des3_ede_dec_blk(ctx, (u8 *)dst, (u8 *)src);
|
||||
|
||||
nbytes -= bsize;
|
||||
if (nbytes < bsize)
|
||||
break;
|
||||
|
||||
*dst ^= *(src - 1);
|
||||
src -= 1;
|
||||
dst -= 1;
|
||||
}
|
||||
|
||||
done:
|
||||
*dst ^= *(u64 *)walk->iv;
|
||||
*(u64 *)walk->iv = last_iv;
|
||||
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
nbytes = __cbc_decrypt(desc, &walk);
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx,
|
||||
struct blkcipher_walk *walk)
|
||||
{
|
||||
u8 *ctrblk = walk->iv;
|
||||
u8 keystream[DES3_EDE_BLOCK_SIZE];
|
||||
u8 *src = walk->src.virt.addr;
|
||||
u8 *dst = walk->dst.virt.addr;
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
|
||||
des3_ede_enc_blk(ctx, keystream, ctrblk);
|
||||
crypto_xor(keystream, src, nbytes);
|
||||
memcpy(dst, keystream, nbytes);
|
||||
|
||||
crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
|
||||
struct blkcipher_walk *walk)
|
||||
{
|
||||
struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
unsigned int bsize = DES3_EDE_BLOCK_SIZE;
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
__be64 *src = (__be64 *)walk->src.virt.addr;
|
||||
__be64 *dst = (__be64 *)walk->dst.virt.addr;
|
||||
u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
|
||||
__be64 ctrblocks[3];
|
||||
|
||||
/* Process four block batch */
|
||||
if (nbytes >= bsize * 3) {
|
||||
do {
|
||||
/* create ctrblks for parallel encrypt */
|
||||
ctrblocks[0] = cpu_to_be64(ctrblk++);
|
||||
ctrblocks[1] = cpu_to_be64(ctrblk++);
|
||||
ctrblocks[2] = cpu_to_be64(ctrblk++);
|
||||
|
||||
des3_ede_enc_blk_3way(ctx, (u8 *)ctrblocks,
|
||||
(u8 *)ctrblocks);
|
||||
|
||||
dst[0] = src[0] ^ ctrblocks[0];
|
||||
dst[1] = src[1] ^ ctrblocks[1];
|
||||
dst[2] = src[2] ^ ctrblocks[2];
|
||||
|
||||
src += 3;
|
||||
dst += 3;
|
||||
} while ((nbytes -= bsize * 3) >= bsize * 3);
|
||||
|
||||
if (nbytes < bsize)
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* Handle leftovers */
|
||||
do {
|
||||
ctrblocks[0] = cpu_to_be64(ctrblk++);
|
||||
|
||||
des3_ede_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
|
||||
|
||||
dst[0] = src[0] ^ ctrblocks[0];
|
||||
|
||||
src += 1;
|
||||
dst += 1;
|
||||
} while ((nbytes -= bsize) >= bsize);
|
||||
|
||||
done:
|
||||
*(__be64 *)walk->iv = cpu_to_be64(ctrblk);
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt_block(desc, &walk, DES3_EDE_BLOCK_SIZE);
|
||||
|
||||
while ((nbytes = walk.nbytes) >= DES3_EDE_BLOCK_SIZE) {
|
||||
nbytes = __ctr_crypt(desc, &walk);
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
|
||||
if (walk.nbytes) {
|
||||
ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk);
|
||||
err = blkcipher_walk_done(desc, &walk, 0);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int des3_ede_x86_setkey(struct crypto_tfm *tfm, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct des3_ede_x86_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
u32 i, j, tmp;
|
||||
int err;
|
||||
|
||||
/* Generate encryption context using generic implementation. */
|
||||
err = __des3_ede_setkey(ctx->enc_expkey, &tfm->crt_flags, key, keylen);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
/* Fix encryption context for this implementation and form decryption
|
||||
* context. */
|
||||
j = DES3_EDE_EXPKEY_WORDS - 2;
|
||||
for (i = 0; i < DES3_EDE_EXPKEY_WORDS; i += 2, j -= 2) {
|
||||
tmp = ror32(ctx->enc_expkey[i + 1], 4);
|
||||
ctx->enc_expkey[i + 1] = tmp;
|
||||
|
||||
ctx->dec_expkey[j + 0] = ctx->enc_expkey[i + 0];
|
||||
ctx->dec_expkey[j + 1] = tmp;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct crypto_alg des3_ede_algs[4] = { {
|
||||
.cra_name = "des3_ede",
|
||||
.cra_driver_name = "des3_ede-asm",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.cipher = {
|
||||
.cia_min_keysize = DES3_EDE_KEY_SIZE,
|
||||
.cia_max_keysize = DES3_EDE_KEY_SIZE,
|
||||
.cia_setkey = des3_ede_x86_setkey,
|
||||
.cia_encrypt = des3_ede_x86_encrypt,
|
||||
.cia_decrypt = des3_ede_x86_decrypt,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
.cra_name = "ecb(des3_ede)",
|
||||
.cra_driver_name = "ecb-des3_ede-asm",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = DES3_EDE_KEY_SIZE,
|
||||
.max_keysize = DES3_EDE_KEY_SIZE,
|
||||
.setkey = des3_ede_x86_setkey,
|
||||
.encrypt = ecb_encrypt,
|
||||
.decrypt = ecb_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "cbc(des3_ede)",
|
||||
.cra_driver_name = "cbc-des3_ede-asm",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = DES3_EDE_KEY_SIZE,
|
||||
.max_keysize = DES3_EDE_KEY_SIZE,
|
||||
.ivsize = DES3_EDE_BLOCK_SIZE,
|
||||
.setkey = des3_ede_x86_setkey,
|
||||
.encrypt = cbc_encrypt,
|
||||
.decrypt = cbc_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "ctr(des3_ede)",
|
||||
.cra_driver_name = "ctr-des3_ede-asm",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = DES3_EDE_KEY_SIZE,
|
||||
.max_keysize = DES3_EDE_KEY_SIZE,
|
||||
.ivsize = DES3_EDE_BLOCK_SIZE,
|
||||
.setkey = des3_ede_x86_setkey,
|
||||
.encrypt = ctr_crypt,
|
||||
.decrypt = ctr_crypt,
|
||||
},
|
||||
},
|
||||
} };
|
||||
|
||||
static bool is_blacklisted_cpu(void)
|
||||
{
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
|
||||
return false;
|
||||
|
||||
if (boot_cpu_data.x86 == 0x0f) {
|
||||
/*
|
||||
* On Pentium 4, des3_ede-x86_64 is slower than generic C
|
||||
* implementation because use of 64bit rotates (which are really
|
||||
* slow on P4). Therefore blacklist P4s.
|
||||
*/
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int force;
|
||||
module_param(force, int, 0);
|
||||
MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
|
||||
|
||||
static int __init des3_ede_x86_init(void)
|
||||
{
|
||||
if (!force && is_blacklisted_cpu()) {
|
||||
pr_info("des3_ede-x86_64: performance on this CPU would be suboptimal: disabling des3_ede-x86_64.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
return crypto_register_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs));
|
||||
}
|
||||
|
||||
static void __exit des3_ede_x86_fini(void)
|
||||
{
|
||||
crypto_unregister_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs));
|
||||
}
|
||||
|
||||
module_init(des3_ede_x86_init);
|
||||
module_exit(des3_ede_x86_fini);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("Triple DES EDE Cipher Algorithm, asm optimized");
|
||||
MODULE_ALIAS_CRYPTO("des3_ede");
|
||||
MODULE_ALIAS_CRYPTO("des3_ede-asm");
|
||||
MODULE_ALIAS_CRYPTO("des");
|
||||
MODULE_ALIAS_CRYPTO("des-asm");
|
||||
MODULE_AUTHOR("Jussi Kivilinna <jussi.kivilinna@iki.fi>");
|
||||
164
arch/x86/crypto/fpu.c
Normal file
164
arch/x86/crypto/fpu.c
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
/*
|
||||
* FPU: Wrapper for blkcipher touching fpu
|
||||
*
|
||||
* Copyright (c) Intel Corp.
|
||||
* Author: Huang Ying <ying.huang@intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <asm/i387.h>
|
||||
|
||||
struct crypto_fpu_ctx {
|
||||
struct crypto_blkcipher *child;
|
||||
};
|
||||
|
||||
static int crypto_fpu_setkey(struct crypto_tfm *parent, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(parent);
|
||||
struct crypto_blkcipher *child = ctx->child;
|
||||
int err;
|
||||
|
||||
crypto_blkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
|
||||
crypto_blkcipher_set_flags(child, crypto_tfm_get_flags(parent) &
|
||||
CRYPTO_TFM_REQ_MASK);
|
||||
err = crypto_blkcipher_setkey(child, key, keylen);
|
||||
crypto_tfm_set_flags(parent, crypto_blkcipher_get_flags(child) &
|
||||
CRYPTO_TFM_RES_MASK);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int crypto_fpu_encrypt(struct blkcipher_desc *desc_in,
|
||||
struct scatterlist *dst, struct scatterlist *src,
|
||||
unsigned int nbytes)
|
||||
{
|
||||
int err;
|
||||
struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm);
|
||||
struct crypto_blkcipher *child = ctx->child;
|
||||
struct blkcipher_desc desc = {
|
||||
.tfm = child,
|
||||
.info = desc_in->info,
|
||||
.flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP,
|
||||
};
|
||||
|
||||
kernel_fpu_begin();
|
||||
err = crypto_blkcipher_crt(desc.tfm)->encrypt(&desc, dst, src, nbytes);
|
||||
kernel_fpu_end();
|
||||
return err;
|
||||
}
|
||||
|
||||
static int crypto_fpu_decrypt(struct blkcipher_desc *desc_in,
|
||||
struct scatterlist *dst, struct scatterlist *src,
|
||||
unsigned int nbytes)
|
||||
{
|
||||
int err;
|
||||
struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm);
|
||||
struct crypto_blkcipher *child = ctx->child;
|
||||
struct blkcipher_desc desc = {
|
||||
.tfm = child,
|
||||
.info = desc_in->info,
|
||||
.flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP,
|
||||
};
|
||||
|
||||
kernel_fpu_begin();
|
||||
err = crypto_blkcipher_crt(desc.tfm)->decrypt(&desc, dst, src, nbytes);
|
||||
kernel_fpu_end();
|
||||
return err;
|
||||
}
|
||||
|
||||
static int crypto_fpu_init_tfm(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
|
||||
struct crypto_spawn *spawn = crypto_instance_ctx(inst);
|
||||
struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
struct crypto_blkcipher *cipher;
|
||||
|
||||
cipher = crypto_spawn_blkcipher(spawn);
|
||||
if (IS_ERR(cipher))
|
||||
return PTR_ERR(cipher);
|
||||
|
||||
ctx->child = cipher;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void crypto_fpu_exit_tfm(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
crypto_free_blkcipher(ctx->child);
|
||||
}
|
||||
|
||||
static struct crypto_instance *crypto_fpu_alloc(struct rtattr **tb)
|
||||
{
|
||||
struct crypto_instance *inst;
|
||||
struct crypto_alg *alg;
|
||||
int err;
|
||||
|
||||
err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
|
||||
alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
CRYPTO_ALG_TYPE_MASK);
|
||||
if (IS_ERR(alg))
|
||||
return ERR_CAST(alg);
|
||||
|
||||
inst = crypto_alloc_instance("fpu", alg);
|
||||
if (IS_ERR(inst))
|
||||
goto out_put_alg;
|
||||
|
||||
inst->alg.cra_flags = alg->cra_flags;
|
||||
inst->alg.cra_priority = alg->cra_priority;
|
||||
inst->alg.cra_blocksize = alg->cra_blocksize;
|
||||
inst->alg.cra_alignmask = alg->cra_alignmask;
|
||||
inst->alg.cra_type = alg->cra_type;
|
||||
inst->alg.cra_blkcipher.ivsize = alg->cra_blkcipher.ivsize;
|
||||
inst->alg.cra_blkcipher.min_keysize = alg->cra_blkcipher.min_keysize;
|
||||
inst->alg.cra_blkcipher.max_keysize = alg->cra_blkcipher.max_keysize;
|
||||
inst->alg.cra_ctxsize = sizeof(struct crypto_fpu_ctx);
|
||||
inst->alg.cra_init = crypto_fpu_init_tfm;
|
||||
inst->alg.cra_exit = crypto_fpu_exit_tfm;
|
||||
inst->alg.cra_blkcipher.setkey = crypto_fpu_setkey;
|
||||
inst->alg.cra_blkcipher.encrypt = crypto_fpu_encrypt;
|
||||
inst->alg.cra_blkcipher.decrypt = crypto_fpu_decrypt;
|
||||
|
||||
out_put_alg:
|
||||
crypto_mod_put(alg);
|
||||
return inst;
|
||||
}
|
||||
|
||||
static void crypto_fpu_free(struct crypto_instance *inst)
|
||||
{
|
||||
crypto_drop_spawn(crypto_instance_ctx(inst));
|
||||
kfree(inst);
|
||||
}
|
||||
|
||||
static struct crypto_template crypto_fpu_tmpl = {
|
||||
.name = "fpu",
|
||||
.alloc = crypto_fpu_alloc,
|
||||
.free = crypto_fpu_free,
|
||||
.module = THIS_MODULE,
|
||||
};
|
||||
|
||||
int __init crypto_fpu_init(void)
|
||||
{
|
||||
return crypto_register_template(&crypto_fpu_tmpl);
|
||||
}
|
||||
|
||||
void __exit crypto_fpu_exit(void)
|
||||
{
|
||||
crypto_unregister_template(&crypto_fpu_tmpl);
|
||||
}
|
||||
|
||||
MODULE_ALIAS_CRYPTO("fpu");
|
||||
132
arch/x86/crypto/ghash-clmulni-intel_asm.S
Normal file
132
arch/x86/crypto/ghash-clmulni-intel_asm.S
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
/*
|
||||
* Accelerated GHASH implementation with Intel PCLMULQDQ-NI
|
||||
* instructions. This file contains accelerated part of ghash
|
||||
* implementation. More information about PCLMULQDQ can be found at:
|
||||
*
|
||||
* http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/
|
||||
*
|
||||
* Copyright (c) 2009 Intel Corp.
|
||||
* Author: Huang Ying <ying.huang@intel.com>
|
||||
* Vinodh Gopal
|
||||
* Erdinc Ozturk
|
||||
* Deniz Karakoyunlu
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published
|
||||
* by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/inst.h>
|
||||
|
||||
.data
|
||||
|
||||
.align 16
|
||||
.Lbswap_mask:
|
||||
.octa 0x000102030405060708090a0b0c0d0e0f
|
||||
|
||||
#define DATA %xmm0
|
||||
#define SHASH %xmm1
|
||||
#define T1 %xmm2
|
||||
#define T2 %xmm3
|
||||
#define T3 %xmm4
|
||||
#define BSWAP %xmm5
|
||||
#define IN1 %xmm6
|
||||
|
||||
.text
|
||||
|
||||
/*
|
||||
* __clmul_gf128mul_ble: internal ABI
|
||||
* input:
|
||||
* DATA: operand1
|
||||
* SHASH: operand2, hash_key << 1 mod poly
|
||||
* output:
|
||||
* DATA: operand1 * operand2 mod poly
|
||||
* changed:
|
||||
* T1
|
||||
* T2
|
||||
* T3
|
||||
*/
|
||||
__clmul_gf128mul_ble:
|
||||
movaps DATA, T1
|
||||
pshufd $0b01001110, DATA, T2
|
||||
pshufd $0b01001110, SHASH, T3
|
||||
pxor DATA, T2
|
||||
pxor SHASH, T3
|
||||
|
||||
PCLMULQDQ 0x00 SHASH DATA # DATA = a0 * b0
|
||||
PCLMULQDQ 0x11 SHASH T1 # T1 = a1 * b1
|
||||
PCLMULQDQ 0x00 T3 T2 # T2 = (a1 + a0) * (b1 + b0)
|
||||
pxor DATA, T2
|
||||
pxor T1, T2 # T2 = a0 * b1 + a1 * b0
|
||||
|
||||
movaps T2, T3
|
||||
pslldq $8, T3
|
||||
psrldq $8, T2
|
||||
pxor T3, DATA
|
||||
pxor T2, T1 # <T1:DATA> is result of
|
||||
# carry-less multiplication
|
||||
|
||||
# first phase of the reduction
|
||||
movaps DATA, T3
|
||||
psllq $1, T3
|
||||
pxor DATA, T3
|
||||
psllq $5, T3
|
||||
pxor DATA, T3
|
||||
psllq $57, T3
|
||||
movaps T3, T2
|
||||
pslldq $8, T2
|
||||
psrldq $8, T3
|
||||
pxor T2, DATA
|
||||
pxor T3, T1
|
||||
|
||||
# second phase of the reduction
|
||||
movaps DATA, T2
|
||||
psrlq $5, T2
|
||||
pxor DATA, T2
|
||||
psrlq $1, T2
|
||||
pxor DATA, T2
|
||||
psrlq $1, T2
|
||||
pxor T2, T1
|
||||
pxor T1, DATA
|
||||
ret
|
||||
ENDPROC(__clmul_gf128mul_ble)
|
||||
|
||||
/* void clmul_ghash_mul(char *dst, const u128 *shash) */
|
||||
ENTRY(clmul_ghash_mul)
|
||||
movups (%rdi), DATA
|
||||
movups (%rsi), SHASH
|
||||
movaps .Lbswap_mask, BSWAP
|
||||
PSHUFB_XMM BSWAP DATA
|
||||
call __clmul_gf128mul_ble
|
||||
PSHUFB_XMM BSWAP DATA
|
||||
movups DATA, (%rdi)
|
||||
ret
|
||||
ENDPROC(clmul_ghash_mul)
|
||||
|
||||
/*
|
||||
* void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
|
||||
* const u128 *shash);
|
||||
*/
|
||||
ENTRY(clmul_ghash_update)
|
||||
cmp $16, %rdx
|
||||
jb .Lupdate_just_ret # check length
|
||||
movaps .Lbswap_mask, BSWAP
|
||||
movups (%rdi), DATA
|
||||
movups (%rcx), SHASH
|
||||
PSHUFB_XMM BSWAP DATA
|
||||
.align 4
|
||||
.Lupdate_loop:
|
||||
movups (%rsi), IN1
|
||||
PSHUFB_XMM BSWAP IN1
|
||||
pxor IN1, DATA
|
||||
call __clmul_gf128mul_ble
|
||||
sub $16, %rdx
|
||||
add $16, %rsi
|
||||
cmp $16, %rdx
|
||||
jge .Lupdate_loop
|
||||
PSHUFB_XMM BSWAP DATA
|
||||
movups DATA, (%rdi)
|
||||
.Lupdate_just_ret:
|
||||
ret
|
||||
ENDPROC(clmul_ghash_update)
|
||||
344
arch/x86/crypto/ghash-clmulni-intel_glue.c
Normal file
344
arch/x86/crypto/ghash-clmulni-intel_glue.c
Normal file
|
|
@ -0,0 +1,344 @@
|
|||
/*
|
||||
* Accelerated GHASH implementation with Intel PCLMULQDQ-NI
|
||||
* instructions. This file contains glue code.
|
||||
*
|
||||
* Copyright (c) 2009 Intel Corp.
|
||||
* Author: Huang Ying <ying.huang@intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published
|
||||
* by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/err.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/cryptd.h>
|
||||
#include <crypto/gf128mul.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <asm/i387.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
|
||||
#define GHASH_BLOCK_SIZE 16
|
||||
#define GHASH_DIGEST_SIZE 16
|
||||
|
||||
void clmul_ghash_mul(char *dst, const u128 *shash);
|
||||
|
||||
void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
|
||||
const u128 *shash);
|
||||
|
||||
struct ghash_async_ctx {
|
||||
struct cryptd_ahash *cryptd_tfm;
|
||||
};
|
||||
|
||||
struct ghash_ctx {
|
||||
u128 shash;
|
||||
};
|
||||
|
||||
struct ghash_desc_ctx {
|
||||
u8 buffer[GHASH_BLOCK_SIZE];
|
||||
u32 bytes;
|
||||
};
|
||||
|
||||
static int ghash_init(struct shash_desc *desc)
|
||||
{
|
||||
struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
|
||||
memset(dctx, 0, sizeof(*dctx));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ghash_setkey(struct crypto_shash *tfm,
|
||||
const u8 *key, unsigned int keylen)
|
||||
{
|
||||
struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
|
||||
be128 *x = (be128 *)key;
|
||||
u64 a, b;
|
||||
|
||||
if (keylen != GHASH_BLOCK_SIZE) {
|
||||
crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* perform multiplication by 'x' in GF(2^128) */
|
||||
a = be64_to_cpu(x->a);
|
||||
b = be64_to_cpu(x->b);
|
||||
|
||||
ctx->shash.a = (b << 1) | (a >> 63);
|
||||
ctx->shash.b = (a << 1) | (b >> 63);
|
||||
|
||||
if (a >> 63)
|
||||
ctx->shash.b ^= ((u64)0xc2) << 56;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ghash_update(struct shash_desc *desc,
|
||||
const u8 *src, unsigned int srclen)
|
||||
{
|
||||
struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
|
||||
u8 *dst = dctx->buffer;
|
||||
|
||||
kernel_fpu_begin();
|
||||
if (dctx->bytes) {
|
||||
int n = min(srclen, dctx->bytes);
|
||||
u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
|
||||
|
||||
dctx->bytes -= n;
|
||||
srclen -= n;
|
||||
|
||||
while (n--)
|
||||
*pos++ ^= *src++;
|
||||
|
||||
if (!dctx->bytes)
|
||||
clmul_ghash_mul(dst, &ctx->shash);
|
||||
}
|
||||
|
||||
clmul_ghash_update(dst, src, srclen, &ctx->shash);
|
||||
kernel_fpu_end();
|
||||
|
||||
if (srclen & 0xf) {
|
||||
src += srclen - (srclen & 0xf);
|
||||
srclen &= 0xf;
|
||||
dctx->bytes = GHASH_BLOCK_SIZE - srclen;
|
||||
while (srclen--)
|
||||
*dst++ ^= *src++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
|
||||
{
|
||||
u8 *dst = dctx->buffer;
|
||||
|
||||
if (dctx->bytes) {
|
||||
u8 *tmp = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
|
||||
|
||||
while (dctx->bytes--)
|
||||
*tmp++ ^= 0;
|
||||
|
||||
kernel_fpu_begin();
|
||||
clmul_ghash_mul(dst, &ctx->shash);
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
dctx->bytes = 0;
|
||||
}
|
||||
|
||||
static int ghash_final(struct shash_desc *desc, u8 *dst)
|
||||
{
|
||||
struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
|
||||
u8 *buf = dctx->buffer;
|
||||
|
||||
ghash_flush(ctx, dctx);
|
||||
memcpy(dst, buf, GHASH_BLOCK_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg ghash_alg = {
|
||||
.digestsize = GHASH_DIGEST_SIZE,
|
||||
.init = ghash_init,
|
||||
.update = ghash_update,
|
||||
.final = ghash_final,
|
||||
.setkey = ghash_setkey,
|
||||
.descsize = sizeof(struct ghash_desc_ctx),
|
||||
.base = {
|
||||
.cra_name = "__ghash",
|
||||
.cra_driver_name = "__ghash-pclmulqdqni",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = GHASH_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct ghash_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
};
|
||||
|
||||
static int ghash_async_init(struct ahash_request *req)
|
||||
{
|
||||
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
|
||||
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
|
||||
struct ahash_request *cryptd_req = ahash_request_ctx(req);
|
||||
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
|
||||
|
||||
if (!irq_fpu_usable()) {
|
||||
memcpy(cryptd_req, req, sizeof(*req));
|
||||
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
|
||||
return crypto_ahash_init(cryptd_req);
|
||||
} else {
|
||||
struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
|
||||
struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
|
||||
|
||||
desc->tfm = child;
|
||||
desc->flags = req->base.flags;
|
||||
return crypto_shash_init(desc);
|
||||
}
|
||||
}
|
||||
|
||||
static int ghash_async_update(struct ahash_request *req)
|
||||
{
|
||||
struct ahash_request *cryptd_req = ahash_request_ctx(req);
|
||||
|
||||
if (!irq_fpu_usable()) {
|
||||
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
|
||||
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
|
||||
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
|
||||
|
||||
memcpy(cryptd_req, req, sizeof(*req));
|
||||
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
|
||||
return crypto_ahash_update(cryptd_req);
|
||||
} else {
|
||||
struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
|
||||
return shash_ahash_update(req, desc);
|
||||
}
|
||||
}
|
||||
|
||||
static int ghash_async_final(struct ahash_request *req)
|
||||
{
|
||||
struct ahash_request *cryptd_req = ahash_request_ctx(req);
|
||||
|
||||
if (!irq_fpu_usable()) {
|
||||
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
|
||||
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
|
||||
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
|
||||
|
||||
memcpy(cryptd_req, req, sizeof(*req));
|
||||
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
|
||||
return crypto_ahash_final(cryptd_req);
|
||||
} else {
|
||||
struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
|
||||
return crypto_shash_final(desc, req->result);
|
||||
}
|
||||
}
|
||||
|
||||
static int ghash_async_digest(struct ahash_request *req)
|
||||
{
|
||||
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
|
||||
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
|
||||
struct ahash_request *cryptd_req = ahash_request_ctx(req);
|
||||
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
|
||||
|
||||
if (!irq_fpu_usable()) {
|
||||
memcpy(cryptd_req, req, sizeof(*req));
|
||||
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
|
||||
return crypto_ahash_digest(cryptd_req);
|
||||
} else {
|
||||
struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
|
||||
struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
|
||||
|
||||
desc->tfm = child;
|
||||
desc->flags = req->base.flags;
|
||||
return shash_ahash_digest(req, desc);
|
||||
}
|
||||
}
|
||||
|
||||
static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
|
||||
struct crypto_ahash *child = &ctx->cryptd_tfm->base;
|
||||
int err;
|
||||
|
||||
crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
|
||||
crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm)
|
||||
& CRYPTO_TFM_REQ_MASK);
|
||||
err = crypto_ahash_setkey(child, key, keylen);
|
||||
crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child)
|
||||
& CRYPTO_TFM_RES_MASK);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ghash_async_init_tfm(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct cryptd_ahash *cryptd_tfm;
|
||||
struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", 0, 0);
|
||||
if (IS_ERR(cryptd_tfm))
|
||||
return PTR_ERR(cryptd_tfm);
|
||||
ctx->cryptd_tfm = cryptd_tfm;
|
||||
crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
|
||||
sizeof(struct ahash_request) +
|
||||
crypto_ahash_reqsize(&cryptd_tfm->base));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ghash_async_exit_tfm(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
cryptd_free_ahash(ctx->cryptd_tfm);
|
||||
}
|
||||
|
||||
static struct ahash_alg ghash_async_alg = {
|
||||
.init = ghash_async_init,
|
||||
.update = ghash_async_update,
|
||||
.final = ghash_async_final,
|
||||
.setkey = ghash_async_setkey,
|
||||
.digest = ghash_async_digest,
|
||||
.halg = {
|
||||
.digestsize = GHASH_DIGEST_SIZE,
|
||||
.base = {
|
||||
.cra_name = "ghash",
|
||||
.cra_driver_name = "ghash-clmulni",
|
||||
.cra_priority = 400,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = GHASH_BLOCK_SIZE,
|
||||
.cra_type = &crypto_ahash_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ghash_async_init_tfm,
|
||||
.cra_exit = ghash_async_exit_tfm,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
static const struct x86_cpu_id pcmul_cpu_id[] = {
|
||||
X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), /* Pickle-Mickle-Duck */
|
||||
{}
|
||||
};
|
||||
MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id);
|
||||
|
||||
static int __init ghash_pclmulqdqni_mod_init(void)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (!x86_match_cpu(pcmul_cpu_id))
|
||||
return -ENODEV;
|
||||
|
||||
err = crypto_register_shash(&ghash_alg);
|
||||
if (err)
|
||||
goto err_out;
|
||||
err = crypto_register_ahash(&ghash_async_alg);
|
||||
if (err)
|
||||
goto err_shash;
|
||||
|
||||
return 0;
|
||||
|
||||
err_shash:
|
||||
crypto_unregister_shash(&ghash_alg);
|
||||
err_out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static void __exit ghash_pclmulqdqni_mod_exit(void)
|
||||
{
|
||||
crypto_unregister_ahash(&ghash_async_alg);
|
||||
crypto_unregister_shash(&ghash_alg);
|
||||
}
|
||||
|
||||
module_init(ghash_pclmulqdqni_mod_init);
|
||||
module_exit(ghash_pclmulqdqni_mod_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("GHASH Message Digest Algorithm, "
|
||||
"acclerated by PCLMULQDQ-NI");
|
||||
MODULE_ALIAS_CRYPTO("ghash");
|
||||
150
arch/x86/crypto/glue_helper-asm-avx.S
Normal file
150
arch/x86/crypto/glue_helper-asm-avx.S
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
/*
|
||||
* Shared glue code for 128bit block ciphers, AVX assembler macros
|
||||
*
|
||||
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#define load_8way(src, x0, x1, x2, x3, x4, x5, x6, x7) \
|
||||
vmovdqu (0*16)(src), x0; \
|
||||
vmovdqu (1*16)(src), x1; \
|
||||
vmovdqu (2*16)(src), x2; \
|
||||
vmovdqu (3*16)(src), x3; \
|
||||
vmovdqu (4*16)(src), x4; \
|
||||
vmovdqu (5*16)(src), x5; \
|
||||
vmovdqu (6*16)(src), x6; \
|
||||
vmovdqu (7*16)(src), x7;
|
||||
|
||||
#define store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \
|
||||
vmovdqu x0, (0*16)(dst); \
|
||||
vmovdqu x1, (1*16)(dst); \
|
||||
vmovdqu x2, (2*16)(dst); \
|
||||
vmovdqu x3, (3*16)(dst); \
|
||||
vmovdqu x4, (4*16)(dst); \
|
||||
vmovdqu x5, (5*16)(dst); \
|
||||
vmovdqu x6, (6*16)(dst); \
|
||||
vmovdqu x7, (7*16)(dst);
|
||||
|
||||
#define store_cbc_8way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \
|
||||
vpxor (0*16)(src), x1, x1; \
|
||||
vpxor (1*16)(src), x2, x2; \
|
||||
vpxor (2*16)(src), x3, x3; \
|
||||
vpxor (3*16)(src), x4, x4; \
|
||||
vpxor (4*16)(src), x5, x5; \
|
||||
vpxor (5*16)(src), x6, x6; \
|
||||
vpxor (6*16)(src), x7, x7; \
|
||||
store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
|
||||
|
||||
#define inc_le128(x, minus_one, tmp) \
|
||||
vpcmpeqq minus_one, x, tmp; \
|
||||
vpsubq minus_one, x, x; \
|
||||
vpslldq $8, tmp, tmp; \
|
||||
vpsubq tmp, x, x;
|
||||
|
||||
#define load_ctr_8way(iv, bswap, x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2) \
|
||||
vpcmpeqd t0, t0, t0; \
|
||||
vpsrldq $8, t0, t0; /* low: -1, high: 0 */ \
|
||||
vmovdqa bswap, t1; \
|
||||
\
|
||||
/* load IV and byteswap */ \
|
||||
vmovdqu (iv), x7; \
|
||||
vpshufb t1, x7, x0; \
|
||||
\
|
||||
/* construct IVs */ \
|
||||
inc_le128(x7, t0, t2); \
|
||||
vpshufb t1, x7, x1; \
|
||||
inc_le128(x7, t0, t2); \
|
||||
vpshufb t1, x7, x2; \
|
||||
inc_le128(x7, t0, t2); \
|
||||
vpshufb t1, x7, x3; \
|
||||
inc_le128(x7, t0, t2); \
|
||||
vpshufb t1, x7, x4; \
|
||||
inc_le128(x7, t0, t2); \
|
||||
vpshufb t1, x7, x5; \
|
||||
inc_le128(x7, t0, t2); \
|
||||
vpshufb t1, x7, x6; \
|
||||
inc_le128(x7, t0, t2); \
|
||||
vmovdqa x7, t2; \
|
||||
vpshufb t1, x7, x7; \
|
||||
inc_le128(t2, t0, t1); \
|
||||
vmovdqu t2, (iv);
|
||||
|
||||
#define store_ctr_8way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \
|
||||
vpxor (0*16)(src), x0, x0; \
|
||||
vpxor (1*16)(src), x1, x1; \
|
||||
vpxor (2*16)(src), x2, x2; \
|
||||
vpxor (3*16)(src), x3, x3; \
|
||||
vpxor (4*16)(src), x4, x4; \
|
||||
vpxor (5*16)(src), x5, x5; \
|
||||
vpxor (6*16)(src), x6, x6; \
|
||||
vpxor (7*16)(src), x7, x7; \
|
||||
store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
|
||||
|
||||
#define gf128mul_x_ble(iv, mask, tmp) \
|
||||
vpsrad $31, iv, tmp; \
|
||||
vpaddq iv, iv, iv; \
|
||||
vpshufd $0x13, tmp, tmp; \
|
||||
vpand mask, tmp, tmp; \
|
||||
vpxor tmp, iv, iv;
|
||||
|
||||
#define load_xts_8way(iv, src, dst, x0, x1, x2, x3, x4, x5, x6, x7, tiv, t0, \
|
||||
t1, xts_gf128mul_and_shl1_mask) \
|
||||
vmovdqa xts_gf128mul_and_shl1_mask, t0; \
|
||||
\
|
||||
/* load IV */ \
|
||||
vmovdqu (iv), tiv; \
|
||||
vpxor (0*16)(src), tiv, x0; \
|
||||
vmovdqu tiv, (0*16)(dst); \
|
||||
\
|
||||
/* construct and store IVs, also xor with source */ \
|
||||
gf128mul_x_ble(tiv, t0, t1); \
|
||||
vpxor (1*16)(src), tiv, x1; \
|
||||
vmovdqu tiv, (1*16)(dst); \
|
||||
\
|
||||
gf128mul_x_ble(tiv, t0, t1); \
|
||||
vpxor (2*16)(src), tiv, x2; \
|
||||
vmovdqu tiv, (2*16)(dst); \
|
||||
\
|
||||
gf128mul_x_ble(tiv, t0, t1); \
|
||||
vpxor (3*16)(src), tiv, x3; \
|
||||
vmovdqu tiv, (3*16)(dst); \
|
||||
\
|
||||
gf128mul_x_ble(tiv, t0, t1); \
|
||||
vpxor (4*16)(src), tiv, x4; \
|
||||
vmovdqu tiv, (4*16)(dst); \
|
||||
\
|
||||
gf128mul_x_ble(tiv, t0, t1); \
|
||||
vpxor (5*16)(src), tiv, x5; \
|
||||
vmovdqu tiv, (5*16)(dst); \
|
||||
\
|
||||
gf128mul_x_ble(tiv, t0, t1); \
|
||||
vpxor (6*16)(src), tiv, x6; \
|
||||
vmovdqu tiv, (6*16)(dst); \
|
||||
\
|
||||
gf128mul_x_ble(tiv, t0, t1); \
|
||||
vpxor (7*16)(src), tiv, x7; \
|
||||
vmovdqu tiv, (7*16)(dst); \
|
||||
\
|
||||
gf128mul_x_ble(tiv, t0, t1); \
|
||||
vmovdqu tiv, (iv);
|
||||
|
||||
#define store_xts_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \
|
||||
vpxor (0*16)(dst), x0, x0; \
|
||||
vpxor (1*16)(dst), x1, x1; \
|
||||
vpxor (2*16)(dst), x2, x2; \
|
||||
vpxor (3*16)(dst), x3, x3; \
|
||||
vpxor (4*16)(dst), x4, x4; \
|
||||
vpxor (5*16)(dst), x5, x5; \
|
||||
vpxor (6*16)(dst), x6, x6; \
|
||||
vpxor (7*16)(dst), x7, x7; \
|
||||
store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue