OC类从应用启动到加载流程探究

要分析类的启动加载流程,我们可以从一个点入手,即load方法的调用,因为该方法会在应用启动时自动调用,我们根据这个特性查看一下调用堆栈,如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
(lldb) bt
* thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 2.1
* frame #0: 0x0000000100bb9dec AppLaunch`+[ViewController load](self=ViewController, _cmd="load") at ViewController.m:17:5
frame #1: 0x00000001c0d3c25c libobjc.A.dylib`load_images + 944
frame #2: 0x0000000100fca21c dyld`dyld::notifySingle(dyld_image_states, ImageLoader const*, ImageLoader::InitializerTimingList*) + 464
frame #3: 0x0000000100fdb5e8 dyld`ImageLoader::recursiveInitialization(ImageLoader::LinkContext const&, unsigned int, char const*, ImageLoader::InitializerTimingList&, ImageLoader::UninitedUpwards&) + 512
frame #4: 0x0000000100fd9878 dyld`ImageLoader::processInitializers(ImageLoader::LinkContext const&, unsigned int, ImageLoader::InitializerTimingList&, ImageLoader::UninitedUpwards&) + 184
frame #5: 0x0000000100fd9940 dyld`ImageLoader::runInitializers(ImageLoader::LinkContext const&, ImageLoader::InitializerTimingList&) + 92
frame #6: 0x0000000100fca6d8 dyld`dyld::initializeMainExecutable() + 216
frame #7: 0x0000000100fcf928 dyld`dyld::_main(macho_header const*, unsigned long, int, char const**, char const**, char const**, unsigned long*) + 5216
frame #8: 0x0000000100fc9208 dyld`dyldbootstrap::start(dyld3::MachOLoaded const*, int, char const**, dyld3::MachOLoaded const*, unsigned long*) + 396
frame #9: 0x0000000100fc9038 dyld`_dyld_start + 56

可以看到,程序是从_dyld_start这个函数开始运行的,老办法,我们到dyld的源码中去一探究竟。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#if __arm64__
.text
.align 2
.globl __dyld_start
__dyld_start:
mov x28, sp
and sp, x28, #~15 // force 16-byte alignment of stack
mov x0, #0
mov x1, #0
stp x1, x0, [sp, #-16]! // make aligned terminating frame
mov fp, sp // set up fp to point to terminating frame
sub sp, sp, #16 // make room for local variables
#if __LP64__
ldr x0, [x28] // get app's mh into x0
ldr x1, [x28, #8] // get argc into x1 (kernel passes 32-bit int argc as 64-bits on stack to keep alignment)
add x2, x28, #16 // get argv into x2
#else
ldr w0, [x28] // get app's mh into x0
ldr w1, [x28, #4] // get argc into x1 (kernel passes 32-bit int argc as 64-bits on stack to keep alignment)
add w2, w28, #8 // get argv into x2
#endif
adrp x3,___dso_handle@page
add x3,x3,___dso_handle@pageoff // get dyld's mh in to x4
mov x4,sp // x5 has &startGlue
// call dyldbootstrap::start(app_mh, argc, argv, dyld_mh, &startGlue)
bl __ZN13dyldbootstrap5startEPKN5dyld311MachOLoadedEiPPKcS3_Pm
mov x16,x0 // save entry point address in x16
#if __LP64__
ldr x1, [sp]
#else
ldr w1, [sp]
#endif
cmp x1, #0
b.ne Lnew
// LC_UNIXTHREAD way, clean up stack and jump to result
#if __LP64__
add sp, x28, #8 // restore unaligned stack pointer without app mh
#else
add sp, x28, #4 // restore unaligned stack pointer without app mh
#endif
#if __arm64e__
braaz x16 // jump to the program's entry point
#else
br x16 // jump to the program's entry point
#endif
// LC_MAIN case, set up stack for call to main()
Lnew: mov lr, x1 // simulate return address into _start in libdyld.dylib
#if __LP64__
ldr x0, [x28, #8] // main param1 = argc
add x1, x28, #16 // main param2 = argv
add x2, x1, x0, lsl #3
add x2, x2, #8 // main param3 = &env[0]
mov x3, x2
Lapple: ldr x4, [x3]
add x3, x3, #8
#else
ldr w0, [x28, #4] // main param1 = argc
add x1, x28, #8 // main param2 = argv
add x2, x1, x0, lsl #2
add x2, x2, #4 // main param3 = &env[0]
mov x3, x2
Lapple: ldr w4, [x3]
add x3, x3, #4
#endif
cmp x4, #0
b.ne Lapple // main param4 = apple
#if __arm64e__
braaz x16
#else
br x16
#endif
#endif // __arm64__

dyld的入口函数也是使用汇编来编写的,我们不需要逐行理解,找一下关键字,结合上面的堆栈信息和汇编注释,我们定位到dyldbootstrap::start这个关键方法,最终跟踪到以下调用链:

1
__dyld_start -> dyldbootstrap::start -> _main -> initializeMainExecutable -> runInitializers -> ImageLoader::runInitializers -> ImageLoader::processInitializers -> ImageLoader::recursiveInitialization -> notifySingle -> objc::load_images -> [ViewController load]

上面的流程有一个比较奇怪的调用出现在objc::load_images,因为全局都没有找到这个方法的任何申明和定义,我们猜测这个函数可能是外界传过来的一个函数指针,那么如何验证呢?回到线索中断前的最后一个方法中去:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
static void notifySingle(dyld_image_states state, const ImageLoader* image, ImageLoader::InitializerTimingList* timingInfo)
{
//dyld::log("notifySingle(state=%d, image=%s)\n", state, image->getPath());
std::vector<dyld_image_state_change_handler>* handlers = stateToHandlers(state, sSingleHandlers);
if ( handlers != NULL ) {
dyld_image_info info;
info.imageLoadAddress = image->machHeader();
info.imageFilePath = image->getRealPath();
info.imageFileModDate = image->lastModified();
for (std::vector<dyld_image_state_change_handler>::iterator it = handlers->begin(); it != handlers->end(); ++it) {
const char* result = (*it)(state, 1, &info);
if ( (result != NULL) && (state == dyld_image_state_mapped) ) {
//fprintf(stderr, " image rejected by handler=%p\n", *it);
// make copy of thrown string so that later catch clauses can free it
const char* str = strdup(result);
throw str;
}
}
}
if ( state == dyld_image_state_mapped ) {
// <rdar://problem/7008875> Save load addr + UUID for images from outside the shared cache
if ( !image->inSharedCache() ) {
dyld_uuid_info info;
if ( image->getUUID(info.imageUUID) ) {
info.imageLoadAddress = image->machHeader();
addNonSharedCacheImageUUID(info);
}
}
}
if ( (state == dyld_image_state_dependents_initialized) && (sNotifyObjCInit != NULL) && image->notifyObjC() ) {
uint64_t t0 = mach_absolute_time();
dyld3::ScopedTimer timer(DBG_DYLD_TIMING_OBJC_INIT, (uint64_t)image->machHeader(), 0, 0);
// 重点!!!
(*sNotifyObjCInit)(image->getRealPath(), image->machHeader());
uint64_t t1 = mach_absolute_time();
uint64_t t2 = mach_absolute_time();
uint64_t timeInObjC = t1-t0;
uint64_t emptyTime = (t2-t1)*100;
if ( (timeInObjC > emptyTime) && (timingInfo != NULL) ) {
timingInfo->addTime(image->getShortName(), timeInObjC);
}
}
// mach message csdlc about dynamically unloaded images
if ( image->addFuncNotified() && (state == dyld_image_state_terminated) ) {
notifyKernel(*image, false);
const struct mach_header* loadAddress[] = { image->machHeader() };
const char* loadPath[] = { image->getPath() };
notifyMonitoringDyld(true, 1, loadAddress, loadPath);
}
}

果然,在里面我们发现有一些比较敏感的关键字sNotifyObjCInit,这个应该跟objc的初始化有关系,我们全局搜索一下,最后定位到下面的函数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
//
// Note: only for use by objc runtime
// Register handlers to be called when objc images are mapped, unmapped, and initialized.
// Dyld will call back the "mapped" function with an array of images that contain an objc-image-info section.
// Those images that are dylibs will have the ref-counts automatically bumped, so objc will no longer need to
// call dlopen() on them to keep them from being unloaded. During the call to _dyld_objc_notify_register(),
// dyld will call the "mapped" function with already loaded objc images. During any later dlopen() call,
// dyld will also call the "mapped" function. Dyld will call the "init" function when dyld would be called
// initializers in that image. This is when objc calls any +load methods in that image.
//
void _dyld_objc_notify_register(_dyld_objc_notify_mapped mapped,
_dyld_objc_notify_init init,
_dyld_objc_notify_unmapped unmapped);
void _dyld_objc_notify_register(_dyld_objc_notify_mapped mapped,
_dyld_objc_notify_init init,
_dyld_objc_notify_unmapped unmapped)
{
dyld::registerObjCNotifiers(mapped, init, unmapped);
}

上面的注释写得已经比较清楚,这个函数专供 objc runtime。那我们就去runtime的源码中找找嘛。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
/***********************************************************************
* _objc_init
* Bootstrap initialization. Registers our image notifier with dyld.
* Called by libSystem BEFORE library initialization time
**********************************************************************/
void _objc_init(void)
{
static bool initialized = false;
if (initialized) return;
initialized = true;
// fixme defer initialization until an objc-using image is found?
environ_init();
tls_init();
static_init();
runtime_init();
exception_init();
cache_init();
_imp_implementationWithBlock_init();
_dyld_objc_notify_register(&map_images, load_images, unmap_image);
#if __OBJC2__
didCallDyldNotifyRegister = true;
#endif
}

果然在_objc_init初始化函数中发现了调用逻辑,分别注册了map_imagesload_images两个回调函数,看到这里熟悉应用启动流程的同学应该想到了,这不就是可执行文件的mmap和load么。但是新的问题又来了,这个_objc_init又是在什么时候被调用的呢?我们直接在里面搞个断点看看调用堆栈:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
(lldb) bt
* thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 3.1
frame #0: 0x00000001c0d4ce0c libobjc.A.dylib`_objc_init
* frame #1: 0x00000001003b88f8 libdispatch.dylib`_os_object_init + 20
frame #2: 0x00000001003c7ea0 libdispatch.dylib`libdispatch_init + 292
frame #3: 0x00000001dadfd888 libSystem.B.dylib`libSystem_initializer + 200
frame #4: 0x0000000100120810 dyld`ImageLoaderMachO::doModInitFunctions(ImageLoader::LinkContext const&) + 424
frame #5: 0x0000000100120bd8 dyld`ImageLoaderMachO::doInitialization(ImageLoader::LinkContext const&) + 52
frame #6: 0x000000010011b600 dyld`ImageLoader::recursiveInitialization(ImageLoader::LinkContext const&, unsigned int, char const*, ImageLoader::InitializerTimingList&, ImageLoader::UninitedUpwards&) + 536
frame #7: 0x000000010011b56c dyld`ImageLoader::recursiveInitialization(ImageLoader::LinkContext const&, unsigned int, char const*, ImageLoader::InitializerTimingList&, ImageLoader::UninitedUpwards&) + 388
frame #8: 0x0000000100119878 dyld`ImageLoader::processInitializers(ImageLoader::LinkContext const&, unsigned int, ImageLoader::InitializerTimingList&, ImageLoader::UninitedUpwards&) + 184
frame #9: 0x0000000100119940 dyld`ImageLoader::runInitializers(ImageLoader::LinkContext const&, ImageLoader::InitializerTimingList&) + 92
frame #10: 0x000000010010a688 dyld`dyld::initializeMainExecutable() + 136
frame #11: 0x000000010010f928 dyld`dyld::_main(macho_header const*, unsigned long, int, char const**, char const**, char const**, unsigned long*) + 5216
frame #12: 0x0000000100109208 dyld`dyldbootstrap::start(dyld3::MachOLoaded const*, int, char const**, dyld3::MachOLoaded const*, unsigned long*) + 396
frame #13: 0x0000000100109038 dyld`_dyld_start + 56

前面我们已经看过的函数跳过,直接从ImageLoaderMachO::doInitialization开始。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
bool ImageLoaderMachO::doInitialization(const LinkContext& context)
{
CRSetCrashLogMessage2(this->getPath());
// mach-o has -init and static initializers
doImageInit(context);
doModInitFunctions(context);
CRSetCrashLogMessage2(NULL);
return (fHasDashInit || fHasInitializers);
}
void ImageLoaderMachO::doModInitFunctions(const LinkContext& context)
{
// 去掉我们不关心的信息 我眼里只有下面这句话
if ( ! dyld::gProcessInfo->libSystemInitialized ) {
// <rdar://problem/17973316> libSystem initializer must run first
const char* installPath = getInstallPath();
if ( (installPath == NULL) || (strcmp(installPath, libSystemPath(context)) != 0) )
dyld::throwf("initializer in image (%s) that does not link with libSystem.dylib\n", this->getPath());
}
}

看注释和异常文案,libSystem必须最先被初始化,这也解释了上面的堆栈情况。行吧,那我们再去libSystem的源码中看看有没有libSystem_initializer这个初始化方法。

果然是有的,去掉一些不关系的信息,大概如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
// libsyscall_initializer() initializes all of libSystem.dylib
// <rdar://problem/4892197>
__attribute__((constructor))
static void
libSystem_initializer(int argc,
const char* argv[],
const char* envp[],
const char* apple[],
const struct ProgramVars* vars)
{
_libSystem_ktrace0(ARIADNE_LIFECYCLE_libsystem_init | DBG_FUNC_START);
__libkernel_init(&libkernel_funcs, envp, apple, vars);
_libSystem_ktrace_init_func(KERNEL);
__libplatform_init(NULL, envp, apple, vars);
_libSystem_ktrace_init_func(PLATFORM);
__pthread_init(&libpthread_funcs, envp, apple, vars);
_libSystem_ktrace_init_func(PTHREAD);
_libc_initializer(&libc_funcs, envp, apple, vars);
_libSystem_ktrace_init_func(LIBC);
// TODO: Move __malloc_init before __libc_init after breaking malloc's upward link to Libc
__malloc_init(apple);
_libSystem_ktrace_init_func(MALLOC);
#if TARGET_OS_OSX
/* <rdar://problem/9664631> */
__keymgr_initializer();
_libSystem_ktrace_init_func(KEYMGR);
#endif
// No ASan interceptors are invoked before this point. ASan is normally initialized via the malloc interceptor:
// _dyld_initializer() -> tlv_load_notification -> wrap_malloc -> ASanInitInternal
_dyld_initializer();
_libSystem_ktrace_init_func(DYLD);
libdispatch_init();
_libSystem_ktrace_init_func(LIBDISPATCH);
#if !TARGET_OS_DRIVERKIT
_libxpc_initializer();
_libSystem_ktrace_init_func(LIBXPC);
#if CURRENT_VARIANT_asan
setenv("DT_BYPASS_LEAKS_CHECK", "1", 1);
#endif
#endif // !TARGET_OS_DRIVERKIT
// must be initialized after dispatch
_libtrace_init();
_libSystem_ktrace_init_func(LIBTRACE);
#if !TARGET_OS_DRIVERKIT
#if defined(HAVE_SYSTEM_SECINIT)
_libsecinit_initializer();
_libSystem_ktrace_init_func(SECINIT);
#endif
#if defined(HAVE_SYSTEM_CONTAINERMANAGER)
_container_init(apple);
_libSystem_ktrace_init_func(CONTAINERMGR);
#endif
__libdarwin_init();
_libSystem_ktrace_init_func(DARWIN);
#endif // !TARGET_OS_DRIVERKIT
//...
}

可以看到,里面做了很多系统库的初始化工作,我们也看到了熟悉的身影

1
2
3
4
5
6
7
8
// 这里有点意思,因为libSystem的初始化流程就是从dyld进来的,但是进来后才对dyld做初始化。
_dyld_initializer();
_libSystem_ktrace_init_func(DYLD);
// 堆栈里面的
libdispatch_init();
_libSystem_ktrace_init_func(LIBDISPATCH);

看了一堆初始化方法,还是没有找到我们要的_objc_init呀,这货到底藏哪了。我们回到堆栈,发现在libdispatch的初始化后又调用了_os_object_init这个方法,猜测是不是跟这个方法有关系呢?没办法,我们还得去libdispatch的源码中看一下。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
void
_os_object_init(void)
{
_objc_init(); //我在这里哦~~~
Block_callbacks_RR callbacks = {
sizeof(Block_callbacks_RR),
(void (*)(const void *))&objc_retain,
(void (*)(const void *))&objc_release,
(void (*)(const void *))&_os_objc_destructInstance
};
_Block_use_RR2(&callbacks);
#if DISPATCH_COCOA_COMPAT
const char *v = getenv("OBJC_DEBUG_MISSING_POOLS");
if (v) _os_object_debug_missing_pools = _dispatch_parse_bool(v);
v = getenv("DISPATCH_DEBUG_MISSING_POOLS");
if (v) _os_object_debug_missing_pools = _dispatch_parse_bool(v);
v = getenv("LIBDISPATCH_DEBUG_MISSING_POOLS");
if (v) _os_object_debug_missing_pools = _dispatch_parse_bool(v);
#endif
}

呵呵,妖怪哪里跑!!!

小结

现在我们终于把OC类从app启动到加载的整个流程跑通了,但是光知道流程显然还是不够啊,类是如何从二进制文件被创建成类对象的呢?我们之后再继续探索这一块的内容吧。