My first take on C++ coroutines

In this post I’ll go through the disassembly (commented inline) of the following C++ snippet:

#include <concepts>
#include <coroutine>

class task {
   public:
    class promise_type {
       public:
        class always_suspend {
           public:
            bool await_ready() const noexcept;
            bool await_suspend(
                std::coroutine_handle<promise_type> handle) noexcept;
            void await_resume() noexcept;
        };

        promise_type();
        always_suspend initial_suspend();
        always_suspend final_suspend() noexcept;
        void unhandled_exception();
        always_suspend yield_value(std::integral auto&& v);
        task get_return_object();

       private:
        std::coroutine_handle<promise_type> handle_;
    };
};

bool task::promise_type::always_suspend::await_ready() const noexcept {
    return false;
}

bool task::promise_type::always_suspend::await_suspend(
    std::coroutine_handle<promise_type> handle) noexcept {
    return true;
}

void task::promise_type::always_suspend::await_resume() noexcept {}

task::promise_type::promise_type() {
    handle_ = std::coroutine_handle<promise_type>::from_promise(*this);
}

task::promise_type::always_suspend task::promise_type::initial_suspend() {
    return {};
}

task::promise_type::always_suspend
task::promise_type::final_suspend() noexcept {
    return {};
}

void task::promise_type::unhandled_exception() {}

template <std::integral T>
task::promise_type::always_suspend task::promise_type::yield_value(T&& v) {
    return {};
}

task task::promise_type::get_return_object() { return {}; }

task fancy_task(int x) {
    for (int i = 0; i != 100; ++i) {
        co_yield i + x;
    }
}

int main() { auto task = fancy_task(5); }

Disassembly:

task::promise_type::always_suspend::await_ready() const:
        mov     eax, 0
        ret
task::promise_type::always_suspend::await_suspend(std::__n4861::coroutine_handle<task::promise_type>):
        mov     eax, 1
        ret
fancy_task(fancy_task(int)::_Z10fancy_taski.Frame*) [clone .actor]:
        push    rbp
        push    rbx
        sub     rsp, 8
        mov     rbx, rdi  ; `rbx` holds `coroutine_handle<>`.
        movzx   ecx, WORD PTR [rdi+36]  ; Loads current state (of the state machine the coroutine represents).
        test    cl, 1  ; Not destroying the coroutine?.
        je      .L4
        cmp     cx, 7  ; State > 3? Note that 3 comes from `7 >> 1`.
        ja      .L5  ; Unexpected then.
        mov     eax, 170  ; ???
        shr     rax, cl
        test    al, 1
        jne     .L6
.L5:  ; Unexpected state.
        ud2
.L4:
        cmp     cx, 4  ; State 2.
        je      .L7
        ja      .L8
        test    cx, cx
        je      .L9  ; State 0.
        cmp     cx, 2
        jne     .L11  ; Not state 1, either. Unexpected.
.L10:
        mov     BYTE PTR [rbx+39], 1  ; ??? state 0 finishes (i.e., `initial_suspend` called)?
        mov     DWORD PTR [rbx+44], 0  ; Initialize `i`.
        jmp     .L14
.L8:
        cmp     cx, 6  ; State 3.
        jne     .L11
.L6:  ; Epilogue. State 3.
        ; ??? Coroutine alive? 
        ;
        ; Note that this is not necessarily the same as `coroutine_handle<>.done()`,
        ; the latter returns `true` as soon as `final_suspend()` is reached, while
        ; this flag is set only after `final_suspend()` has been resumed.
        cmp     BYTE PTR [rbx+38], 0
        jne     .L17
.L3:  ; Return control to the caller.
        add     rsp, 8
        pop     rbx
        pop     rbp
        ret
.L11:  ; Unexpected state.
        ud2
.L9:  ; Prologue. State 0, runs before `initial_suspend()`.
        mov     QWORD PTR [rbx+24], rdi  ; Stores `coroutine_handle<>` in ... `coroutine_handle<>`?
        mov     BYTE PTR [rdi+39], 0  ; ??? state 0 finishes?
        lea     rbp, [rdi+40]  ; Space allocated for `always_suspend` returned by `initial_suspend()` in coroutine state.
        mov     rdi, rbp
        call    task::promise_type::always_suspend::await_ready() const
        test    al, al
        jne     .L10  ; No need to suspend.
        mov     WORD PTR [rbx+36], 2  ; Next state (of FSM this coroutine represents) will be state 1.
        mov     rsi, rbx
        mov     rdi, rbp
        call    task::promise_type::always_suspend::await_suspend(std::__n4861::coroutine_handle<task::promise_type>)  ; See if we really need to suspend.
        test    al, al
        jne     .L3  ; Yes, returning to the caller.
        jmp     .L10  ; Jump ahead to state 1 then.
.L7:  ; State 2. Also the beginning of the `for` loop.
        mov     eax, DWORD PTR [rbx+44]  ; Loads `i`.
        add     eax, 1
        mov     DWORD PTR [rbx+44], eax
.L14:
        mov     eax, DWORD PTR [rbx+44]
        cmp     eax, 100  ; `for` loop ends?
        je      .L18
        add     eax, DWORD PTR [rbx+32]  ; Loads `x` and adds it to `i`.
        mov     DWORD PTR [rbx+52], eax  ; Keep the result.
        lea     rbp, [rbx+48]  ; `always_suspend` object (returned by `yield_value(integral auto&&)`) allocated in coroutine state?
        mov     rdi, rbp
        call    task::promise_type::always_suspend::await_ready() const  ; See if we need to suspend.
        test    al, al
        jne     .L7  ; No need to suspend.
        mov     WORD PTR [rbx+36], 4
        mov     rsi, QWORD PTR [rbx+24]  ; `coroutine_handle<>`.
        mov     rdi, rbp
        call    task::promise_type::always_suspend::await_suspend(std::__n4861::coroutine_handle<task::promise_type>)
        test    al, al  ; Needs to suspend?
        jne     .L3  ; Yes, leaving.
        jmp     .L7  ; Otherwise keep looping.
.L18:  ; `for` loop ends.
        ; Clear pointer to FSM represented by this coroutine.
        ;
        ; Indicates `coroutine_handle::done()`?
        ;
        ; As a reminder, resuming a coroutine suspended on its `final_suspend()` is U.B,
        ; therefore setting FSM to `nullptr` prior to calling `final_suspend()` won't
        ; hurt well-formed programs.
        mov     QWORD PTR [rbx], 0
        lea     rbp, [rbx+56]  ; `always_suspend` for `final_suspend()`.
        mov     rdi, rbp
        call    task::promise_type::always_suspend::await_ready() const
        test    al, al
        jne     .L6  ; Don't suspend, falling through to state 3.
        mov     WORD PTR [rbx+36], 6  ; Next state is 3.
        mov     rsi, QWORD PTR [rbx+24]  ; `coroutine_handle<>`.
        mov     rdi, rbp
        call    task::promise_type::always_suspend::await_suspend(std::__n4861::coroutine_handle<task::promise_type>)
        test    al, al
        jne     .L3  ; Suspend, returning to the caller.
        jmp     .L6  ; No need to suspend, continuing state 3.
.L17:  ; Destroyes the coroutine.
        mov     rdi, rbx
        call    operator delete(void*)
        jmp     .L3
fancy_task(fancy_task(int)::_Z10fancy_taski.Frame*) [clone .destroy]:
        sub     rsp, 8
        or      WORD PTR [rdi+36], 1  # LSB set to 1 indicates leaving?
        call    fancy_task(fancy_task(int)::_Z10fancy_taski.Frame*) [clone .actor]
        add     rsp, 8
        ret
task::promise_type::always_suspend::await_resume():
        ret
task::promise_type::promise_type() [base object constructor]:
        mov     QWORD PTR [rdi], 0
        lea     rax, [rdi-16]  ; `coroutine_handle`.
        mov     QWORD PTR [rdi], rax
        ret
task::promise_type::initial_suspend():
        ret
task::promise_type::final_suspend():
        ret
task::promise_type::unhandled_exception():
        ret
task::promise_type::get_return_object():
        ret
fancy_task(int):
        push    rbp
        push    rbx
        sub     rsp, 8
        mov     ebp, edi
        mov     edi, 64
        ; Allocates coroutine state (stack frame, actually). The `coroutine_handle<>`
        ; also holds this value (at least on GCC).
        call    operator new(unsigned long)
        mov     rbx, rax
        mov     BYTE PTR [rax+38], 1  ; ??? Coroutine alive?
        ; State machine? Seemingly reset to `nullptr` on coroutine completion (i.e.,
        ; `coroutine_handle<>.done()`)
        mov     QWORD PTR [rax], OFFSET FLAT:fancy_task(fancy_task(int)::_Z10fancy_taski.Frame*) [clone .actor]
        mov     QWORD PTR [rax+8], OFFSET FLAT:fancy_task(fancy_task(int)::_Z10fancy_taski.Frame*) [clone .destroy]  ; `coroutine_handle.destroy`
        mov     DWORD PTR [rax+32], ebp  ; Saves stack pointer
        lea     rdi, [rax+16]  ; Pointer to promise object contained by coroutine state.
        call    task::promise_type::promise_type() [complete object constructor]
        mov     WORD PTR [rbx+36], 0  ; Current state?
        mov     rdi, rbx
        call    fancy_task(fancy_task(int)::_Z10fancy_taski.Frame*) [clone .actor]  ; Enter the state machine.
        add     rsp, 8
        pop     rbx
        pop     rbp
        ret
main:
        sub     rsp, 8
        mov     edi, 5  ; Our argument to `fancy_task`
        call    fancy_task(int)
        mov     eax, 0
        add     rsp, 8
        ret

Leave a Reply

Your email address will not be published. Required fields are marked *