My first take on C++ coroutinesIn this post I’ll go through the disassembly (commented inline) of the following C++ snippet:
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667 #include <concepts>#include <coroutine> class task { public: class promise_type { public: class always_suspend { public: bool await_ready() const noexcept; bool await_suspend( std::coroutine_handle<promise_type> handle) noexcept; void await_resume() noexcept; }; promise_type(); always_suspend initial_suspend(); always_suspend final_suspend() noexcept; void unhandled_exception(); always_suspend yield_value(std::integral auto&& v); task get_return_object(); private: std::coroutine_handle<promise_type> handle_; };}; bool task::promise_type::always_suspend::await_ready() const noexcept { return false;} bool task::promise_type::always_suspend::await_suspend( std::coroutine_handle<promise_type> handle) noexcept { return true;} void task::promise_type::always_suspend::await_resume() noexcept {} task::promise_type::promise_type() { handle_ = std::coroutine_handle<promise_type>::from_promise(*this);} task::promise_type::always_suspend task::promise_type::initial_suspend() { return {};} task::promise_type::always_suspendtask::promise_type::final_suspend() noexcept { return {};} void task::promise_type::unhandled_exception() {} template <std::integral T>task::promise_type::always_suspend task::promise_type::yield_value(T&& v) { return {};} task task::promise_type::get_return_object() { return {}; } task fancy_task(int x) { for (int i = 0; i != 100; ++i) { co_yield i + x; }} int main() { auto task = fancy_task(5); }
Disassembly:
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167 task::promise_type::always_suspend::await_ready() const: mov eax, 0 rettask::promise_type::always_suspend::await_suspend(std::__n4861::coroutine_handle<task::promise_type>): mov eax, 1 retfancy_task(fancy_task(int)::_Z10fancy_taski.Frame*) [clone .actor]: push rbp push rbx sub rsp, 8 mov rbx, rdi ; `rbx` holds `coroutine_handle<>`. movzx ecx, WORD PTR [rdi+36] ; Loads current state (of the state machine the coroutine represents). test cl, 1 ; Not destroying the coroutine?. je .L4 cmp cx, 7 ; State > 3? Note that 3 comes from `7 >> 1`. ja .L5 ; Unexpected then. mov eax, 170 ; ??? shr rax, cl test al, 1 jne .L6.L5: ; Unexpected state. ud2.L4: cmp cx, 4 ; State 2. je .L7 ja .L8 test cx, cx je .L9 ; State 0. cmp cx, 2 jne .L11 ; Not state 1, either. Unexpected..L10: mov BYTE PTR [rbx+39], 1 ; ??? state 0 finishes (i.e., `initial_suspend` called)? mov DWORD PTR [rbx+44], 0 ; Initialize `i`. jmp .L14.L8: cmp cx, 6 ; State 3. jne .L11.L6: ; Epilogue. State 3. ; ??? Coroutine alive? ; ; Note that this is not necessarily the same as `coroutine_handle<>.done()`, ; the latter returns `true` as soon as `final_suspend()` is reached, while ; this flag is set only after `final_suspend()` has been resumed. cmp BYTE PTR [rbx+38], 0 jne .L17.L3: ; Return control to the caller. add rsp, 8 pop rbx pop rbp ret.L11: ; Unexpected state. ud2.L9: ; Prologue. State 0, runs before `initial_suspend()`. mov QWORD PTR [rbx+24], rdi ; Stores `coroutine_handle<>` in ... `coroutine_handle<>`? mov BYTE PTR [rdi+39], 0 ; ??? state 0 finishes? lea rbp, [rdi+40] ; Space allocated for `always_suspend` returned by `initial_suspend()` in coroutine state. mov rdi, rbp call task::promise_type::always_suspend::await_ready() const test al, al jne .L10 ; No need to suspend. mov WORD PTR [rbx+36], 2 ; Next state (of FSM this coroutine represents) will be state 1. mov rsi, rbx mov rdi, rbp call task::promise_type::always_suspend::await_suspend(std::__n4861::coroutine_handle<task::promise_type>) ; See if we really need to suspend. test al, al jne .L3 ; Yes, returning to the caller. jmp .L10 ; Jump ahead to state 1 then..L7: ; State 2. Also the beginning of the `for` loop. mov eax, DWORD PTR [rbx+44] ; Loads `i`. add eax, 1 mov DWORD PTR [rbx+44], eax.L14: mov eax, DWORD PTR [rbx+44] cmp eax, 100 ; `for` loop ends? je .L18 add eax, DWORD PTR [rbx+32] ; Loads `x` and adds it to `i`. mov DWORD PTR [rbx+52], eax ; Keep the result. lea rbp, [rbx+48] ; `always_suspend` object (returned by `yield_value(integral auto&&)`) allocated in coroutine state? mov rdi, rbp call task::promise_type::always_suspend::await_ready() const ; See if we need to suspend. test al, al jne .L7 ; No need to suspend. mov WORD PTR [rbx+36], 4 mov rsi, QWORD PTR [rbx+24] ; `coroutine_handle<>`. mov rdi, rbp call task::promise_type::always_suspend::await_suspend(std::__n4861::coroutine_handle<task::promise_type>) test al, al ; Needs to suspend? jne .L3 ; Yes, leaving. jmp .L7 ; Otherwise keep looping..L18: ; `for` loop ends. ; Clear pointer to FSM represented by this coroutine. ; ; Indicates `coroutine_handle::done()`? ; ; As a reminder, resuming a coroutine suspended on its `final_suspend()` is U.B, ; therefore setting FSM to `nullptr` prior to calling `final_suspend()` won't ; hurt well-formed programs. mov QWORD PTR [rbx], 0 lea rbp, [rbx+56] ; `always_suspend` for `final_suspend()`. mov rdi, rbp call task::promise_type::always_suspend::await_ready() const test al, al jne .L6 ; Don't suspend, falling through to state 3. mov WORD PTR [rbx+36], 6 ; Next state is 3. mov rsi, QWORD PTR [rbx+24] ; `coroutine_handle<>`. mov rdi, rbp call task::promise_type::always_suspend::await_suspend(std::__n4861::coroutine_handle<task::promise_type>) test al, al jne .L3 ; Suspend, returning to the caller. jmp .L6 ; No need to suspend, continuing state 3..L17: ; Destroyes the coroutine. mov rdi, rbx call operator delete(void*) jmp .L3fancy_task(fancy_task(int)::_Z10fancy_taski.Frame*) [clone .destroy]: sub rsp, 8 or WORD PTR [rdi+36], 1 # LSB set to 1 indicates leaving? call fancy_task(fancy_task(int)::_Z10fancy_taski.Frame*) [clone .actor] add rsp, 8 rettask::promise_type::always_suspend::await_resume(): rettask::promise_type::promise_type() [base object constructor]: mov QWORD PTR [rdi], 0 lea rax, [rdi-16] ; `coroutine_handle`. mov QWORD PTR [rdi], rax rettask::promise_type::initial_suspend(): rettask::promise_type::final_suspend(): rettask::promise_type::unhandled_exception(): rettask::promise_type::get_return_object(): retfancy_task(int): push rbp push rbx sub rsp, 8 mov ebp, edi mov edi, 64 ; Allocates coroutine state (stack frame, actually). The `coroutine_handle<>` ; also holds this value (at least on GCC). call operator new(unsigned long) mov rbx, rax mov BYTE PTR [rax+38], 1 ; ??? Coroutine alive? ; State machine? Seemingly reset to `nullptr` on coroutine completion (i.e., ; `coroutine_handle<>.done()`) mov QWORD PTR [rax], OFFSET FLAT:fancy_task(fancy_task(int)::_Z10fancy_taski.Frame*) [clone .actor] mov QWORD PTR [rax+8], OFFSET FLAT:fancy_task(fancy_task(int)::_Z10fancy_taski.Frame*) [clone .destroy] ; `coroutine_handle.destroy` mov DWORD PTR [rax+32], ebp ; Saves stack pointer lea rdi, [rax+16] ; Pointer to promise object contained by coroutine state. call task::promise_type::promise_type() [complete object constructor] mov WORD PTR [rbx+36], 0 ; Current state? mov rdi, rbx call fancy_task(fancy_task(int)::_Z10fancy_taski.Frame*) [clone .actor] ; Enter the state machine. add rsp, 8 pop rbx pop rbp retmain: sub rsp, 8 mov edi, 5 ; Our argument to `fancy_task` call fancy_task(int) mov eax, 0 add rsp, 8 ret
In this post I’ll go through the disassembly (commented inline) of the following C++ snippet:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | #include <concepts> #include <coroutine> class task { public: class promise_type { public: class always_suspend { public: bool await_ready() const noexcept; bool await_suspend( std::coroutine_handle<promise_type> handle) noexcept; void await_resume() noexcept; }; promise_type(); always_suspend initial_suspend(); always_suspend final_suspend() noexcept; void unhandled_exception(); always_suspend yield_value(std::integral auto&& v); task get_return_object(); private: std::coroutine_handle<promise_type> handle_; }; }; bool task::promise_type::always_suspend::await_ready() const noexcept { return false; } bool task::promise_type::always_suspend::await_suspend( std::coroutine_handle<promise_type> handle) noexcept { return true; } void task::promise_type::always_suspend::await_resume() noexcept {} task::promise_type::promise_type() { handle_ = std::coroutine_handle<promise_type>::from_promise(*this); } task::promise_type::always_suspend task::promise_type::initial_suspend() { return {}; } task::promise_type::always_suspend task::promise_type::final_suspend() noexcept { return {}; } void task::promise_type::unhandled_exception() {} template <std::integral T> task::promise_type::always_suspend task::promise_type::yield_value(T&& v) { return {}; } task task::promise_type::get_return_object() { return {}; } task fancy_task(int x) { for (int i = 0; i != 100; ++i) { co_yield i + x; } } int main() { auto task = fancy_task(5); } |
Disassembly:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 | task::promise_type::always_suspend::await_ready() const: mov eax, 0 ret task::promise_type::always_suspend::await_suspend(std::__n4861::coroutine_handle<task::promise_type>): mov eax, 1 ret fancy_task(fancy_task(int)::_Z10fancy_taski.Frame*) [clone .actor]: push rbp push rbx sub rsp, 8 mov rbx, rdi ; `rbx` holds `coroutine_handle<>`. movzx ecx, WORD PTR [rdi+36] ; Loads current state (of the state machine the coroutine represents). test cl, 1 ; Not destroying the coroutine?. je .L4 cmp cx, 7 ; State > 3? Note that 3 comes from `7 >> 1`. ja .L5 ; Unexpected then. mov eax, 170 ; ??? shr rax, cl test al, 1 jne .L6 .L5: ; Unexpected state. ud2 .L4: cmp cx, 4 ; State 2. je .L7 ja .L8 test cx, cx je .L9 ; State 0. cmp cx, 2 jne .L11 ; Not state 1, either. Unexpected. .L10: mov BYTE PTR [rbx+39], 1 ; ??? state 0 finishes (i.e., `initial_suspend` called)? mov DWORD PTR [rbx+44], 0 ; Initialize `i`. jmp .L14 .L8: cmp cx, 6 ; State 3. jne .L11 .L6: ; Epilogue. State 3. ; ??? Coroutine alive? ; ; Note that this is not necessarily the same as `coroutine_handle<>.done()`, ; the latter returns `true` as soon as `final_suspend()` is reached, while ; this flag is set only after `final_suspend()` has been resumed. cmp BYTE PTR [rbx+38], 0 jne .L17 .L3: ; Return control to the caller. add rsp, 8 pop rbx pop rbp ret .L11: ; Unexpected state. ud2 .L9: ; Prologue. State 0, runs before `initial_suspend()`. mov QWORD PTR [rbx+24], rdi ; Stores `coroutine_handle<>` in ... `coroutine_handle<>`? mov BYTE PTR [rdi+39], 0 ; ??? state 0 finishes? lea rbp, [rdi+40] ; Space allocated for `always_suspend` returned by `initial_suspend()` in coroutine state. mov rdi, rbp call task::promise_type::always_suspend::await_ready() const test al, al jne .L10 ; No need to suspend. mov WORD PTR [rbx+36], 2 ; Next state (of FSM this coroutine represents) will be state 1. mov rsi, rbx mov rdi, rbp call task::promise_type::always_suspend::await_suspend(std::__n4861::coroutine_handle<task::promise_type>) ; See if we really need to suspend. test al, al jne .L3 ; Yes, returning to the caller. jmp .L10 ; Jump ahead to state 1 then. .L7: ; State 2. Also the beginning of the `for` loop. mov eax, DWORD PTR [rbx+44] ; Loads `i`. add eax, 1 mov DWORD PTR [rbx+44], eax .L14: mov eax, DWORD PTR [rbx+44] cmp eax, 100 ; `for` loop ends? je .L18 add eax, DWORD PTR [rbx+32] ; Loads `x` and adds it to `i`. mov DWORD PTR [rbx+52], eax ; Keep the result. lea rbp, [rbx+48] ; `always_suspend` object (returned by `yield_value(integral auto&&)`) allocated in coroutine state? mov rdi, rbp call task::promise_type::always_suspend::await_ready() const ; See if we need to suspend. test al, al jne .L7 ; No need to suspend. mov WORD PTR [rbx+36], 4 mov rsi, QWORD PTR [rbx+24] ; `coroutine_handle<>`. mov rdi, rbp call task::promise_type::always_suspend::await_suspend(std::__n4861::coroutine_handle<task::promise_type>) test al, al ; Needs to suspend? jne .L3 ; Yes, leaving. jmp .L7 ; Otherwise keep looping. .L18: ; `for` loop ends. ; Clear pointer to FSM represented by this coroutine. ; ; Indicates `coroutine_handle::done()`? ; ; As a reminder, resuming a coroutine suspended on its `final_suspend()` is U.B, ; therefore setting FSM to `nullptr` prior to calling `final_suspend()` won't ; hurt well-formed programs. mov QWORD PTR [rbx], 0 lea rbp, [rbx+56] ; `always_suspend` for `final_suspend()`. mov rdi, rbp call task::promise_type::always_suspend::await_ready() const test al, al jne .L6 ; Don't suspend, falling through to state 3. mov WORD PTR [rbx+36], 6 ; Next state is 3. mov rsi, QWORD PTR [rbx+24] ; `coroutine_handle<>`. mov rdi, rbp call task::promise_type::always_suspend::await_suspend(std::__n4861::coroutine_handle<task::promise_type>) test al, al jne .L3 ; Suspend, returning to the caller. jmp .L6 ; No need to suspend, continuing state 3. .L17: ; Destroyes the coroutine. mov rdi, rbx call operator delete(void*) jmp .L3 fancy_task(fancy_task(int)::_Z10fancy_taski.Frame*) [clone .destroy]: sub rsp, 8 or WORD PTR [rdi+36], 1 # LSB set to 1 indicates leaving? call fancy_task(fancy_task(int)::_Z10fancy_taski.Frame*) [clone .actor] add rsp, 8 ret task::promise_type::always_suspend::await_resume(): ret task::promise_type::promise_type() [base object constructor]: mov QWORD PTR [rdi], 0 lea rax, [rdi-16] ; `coroutine_handle`. mov QWORD PTR [rdi], rax ret task::promise_type::initial_suspend(): ret task::promise_type::final_suspend(): ret task::promise_type::unhandled_exception(): ret task::promise_type::get_return_object(): ret fancy_task(int): push rbp push rbx sub rsp, 8 mov ebp, edi mov edi, 64 ; Allocates coroutine state (stack frame, actually). The `coroutine_handle<>` ; also holds this value (at least on GCC). call operator new(unsigned long) mov rbx, rax mov BYTE PTR [rax+38], 1 ; ??? Coroutine alive? ; State machine? Seemingly reset to `nullptr` on coroutine completion (i.e., ; `coroutine_handle<>.done()`) mov QWORD PTR [rax], OFFSET FLAT:fancy_task(fancy_task(int)::_Z10fancy_taski.Frame*) [clone .actor] mov QWORD PTR [rax+8], OFFSET FLAT:fancy_task(fancy_task(int)::_Z10fancy_taski.Frame*) [clone .destroy] ; `coroutine_handle.destroy` mov DWORD PTR [rax+32], ebp ; Saves stack pointer lea rdi, [rax+16] ; Pointer to promise object contained by coroutine state. call task::promise_type::promise_type() [complete object constructor] mov WORD PTR [rbx+36], 0 ; Current state? mov rdi, rbx call fancy_task(fancy_task(int)::_Z10fancy_taski.Frame*) [clone .actor] ; Enter the state machine. add rsp, 8 pop rbx pop rbp ret main: sub rsp, 8 mov edi, 5 ; Our argument to `fancy_task` call fancy_task(int) mov eax, 0 add rsp, 8 ret |