FrameProducer

class IFrameProducer {
public:
    // @brief: EOF can be set to indicate that a stream has finished. If EOF is
    // set, the callee must ensure, that subsequent calls to read don't block
    // but instead they return black frames. This is essential to guarantee a
    // clean shutdown procedure while working with a double-buffered reader
    // @return: if camera is not initialized return nullopt
    virtual std::optional<FrameProducerReadResult>
    read(const GpuBgraSpan& bgra_left,
         const GpuBgraSpan& bgra_right,
         const GpuXyzaSpan& xyza) = 0;
         
    virtual Size resolution() const = 0;
};
struct FrameProducerReadResult {
    bool success{false};
    bool eof{true};
    Timestamp timestamp;
    GpuBgraView bgra_left;
    GpuBgraView bgra_right;
    GpuXyzaView xyza;
    GpuDepthView depth;
};

What's suboptimal?

template<typename T, size_t channels>
struct GpuView {
    using value_type = T;
    T* gpu_ptr{};
    cudaEvent_t ready{};

    size_t step{};
    int width{};
    int height{};

  protected:
    template<typename View>
    [[nodiscard]] constexpr View cast() const
    {
        return {{gpu_ptr, ready, step, width, height}};
    }
};

// Distinct types instead of aliases to guarantee type safety
struct GpuBgraView : GpuView<const uint8_t, 4> {
};

struct GpuBgraSpan : GpuView<uint8_t, 4> {
    [[nodiscard]] constexpr auto as_view() const { return cast<GpuBgraView>(); }
};

What's suboptimal?

class IFrameProducer {
public:
    // @brief: EOF can be set to indicate that a stream has finished. If EOF is
    // set, the callee must ensure, that subsequent calls to read don't block
    // but instead they return black frames. This is essential to guarantee a
    // clean shutdown procedure while working with a double-buffered reader
    // @return: if camera is not initialized return nullopt
    virtual std::optional<FrameProducerReadResult>
    read(const GpuBgraSpan& bgra_left,
         const GpuBgraSpan& bgra_right,
         const GpuXyzaSpan& xyza) = 0;
         
    virtual Size resolution() const = 0;
};
struct FrameProducerReadResult {
    bool success{false};
    bool eof{true};
    Timestamp timestamp;
    GpuBgraView bgra_left;
    GpuBgraView bgra_right;
    GpuXyzaView xyza;
    GpuDepthView depth;
};
  • GpuBgraView::xyza vs GpuBgraView::depth ?
  • What's in the buffer if eof ? (last frame or one past)
  • std::optional<FrameProducerReadResult> vs success ?
  • eof == false implies success == false ?
  • Where's GpuBgraView::{bgra, xyza} memory ?

GpuBgraView::xyza vs GpuBgraView::depth ?

 % ag '\.depth'
pymodule/tests/test_core.py
317:        self.depth_test(3)
320:        self.depth_test(4)

tec/invisible_man/src/invisible_man.cc
333:    depths_.reserve(static_cast<size_t>(settings_.depth_calibration_.size));
372:        if (t_ == settings_.depth_calibration_.start - 1) {
381:        const auto t_calibration = settings_.depth_calibration_.size;
488:                                  settings_.depth_calibration_.threshold,

tec/checkout/src/sqlite_database.cc
137:                    face.depth, img_path, nlohmann::json(face.depths));

tec/zed/src/zed.cc
163:        init_parameters_.depth_mode = sl::DEPTH_MODE::ULTRA;
204:        init_parameters_.depth_mode = sl::DEPTH_MODE::ULTRA;
238:        init_parameters_.depth_mode = sl::DEPTH_MODE::ULTRA;

zed_streaming_sender_src/src/main.cc
40:    init_parameters.depth_mode = sl::DEPTH_MODE::NONE;

GpuBgraView::depth is never used

eof = false => success = false ?

What's in the buffer if eof ? 

std::optional<FrameProducerReadResult> Zed::read(const GpuBgraSpan& bgra_left,
                                                 const GpuBgraSpan& bgra_right,
                                                 const GpuXyzaSpan& xyza)
{
    do {
        auto raw_result = impl_->get_raw_frame(bgra_left, bgra_right, xyza);
        if (raw_result->status == sl::ERROR_CODE::END_OF_SVOFILE_REACHED) {
            copy_black_frame(bgra_left, bgra_left, xyza);
            result.eof = true;
            break;
        }
    } while (true);

    result.success = true;
    return result;
}

Returns black frames if eof -> one past last frame

std::optional<FrameProducerReadResult>

vs .success ?

std::optional<FrameProducerReadResult> Zed::read(const GpuBgraSpan& bgra_left,
                                                 const GpuBgraSpan& bgra_right,
                                                 const GpuXyzaSpan& xyza)
{
    FrameProducerReadResult result;
    do {
        auto raw_result = impl_->get_raw_frame(bgra_left, bgra_right, xyza);
        if (raw_result == std::nullopt) {
            return std::nullopt;
        }
    } while (true);

    result.success = true;
    return result;
}

Ambiguous state

std::optional<FrameProducerReadResult> Zed::read(const GpuBgraSpan& bgra_left,
                                                 const GpuBgraSpan& bgra_right,
                                                 const GpuXyzaSpan& xyza)
{
    FrameProducerReadResult result;
    do {
        auto raw_result = impl_->get_raw_frame(bgra_left, bgra_right, xyza);
        if (raw_result == std::nullopt) {
            return std::nullopt;
        }
    } while (true);

    result.success = true;
    return result;
}

FrameProducerReadResult.success

is never used (anymore)

 % ag 'result\.success'
 
tec/frame_producer/src/camera_usb_reader.cc
47:    result.success = true;

tec/frame_producer/include/private/frame_producer/static_frame_producer_impl.hpp
120:        result.success = true;

tec/zed/src/zed.cc
679:    result.success = true;
-FrameProducerReadResult DoubleBufferProducer::download()
+std::optional<FrameProducerReadResult> DoubleBufferProducer::download()
 {
     std::unique_lock<std::mutex> lock{mutex_};
     EXPECT(image_requested_ - image_downloaded_ == 1 &&
@@ -48,7 +48,7 @@ void DoubleBufferProducer::run()
 
     result_ =
         producer_->read(buffer_.bgra_left, buffer_.bgra_right, buffer_.cloud);
-    EXPECT(result_.success);
+    EXPECT(result_ == std::nullopt || result_->success);

Remove ambiguous states?

Ambiguous state

struct FrameProducerReadSuccess {
    Timestamp timestamp;
    GpuBgraView bgra_left;
    GpuBgraView bgra_right;
    GpuXyzaView xyza;
    GpuDepthView depth;
};

struct FrameProducerReadEof {};

struct FrameProducerReadFailure {};

using FrameProducerReadResult = std::variant<
    FrameProducerReadSuccess,
    FrameProducerReadFailure,
    FrameProducerReadEof>;
struct FrameProducerReadResult {
    bool success{false};
    bool eof{true};
    Timestamp timestamp;
    GpuBgraView bgra_left;
    GpuBgraView bgra_right;
    GpuXyzaView xyza;
    GpuDepthView depth;
};

GpuBgraView::{bgra, xyza} memory ?

class IFrameProducer {
public:
    virtual std::optional<FrameProducerReadResult>
    read(const GpuBgraSpan& bgra_left,
         const GpuBgraSpan& bgra_right,
         const GpuXyzaSpan& xyza) = 0;

};
struct FrameProducerReadResult {
    bool success{false};
    bool eof{true};
    Timestamp timestamp;
    GpuBgraView bgra_left;
    GpuBgraView bgra_right;
    GpuXyzaView xyza;
    GpuDepthView depth;
};
template<typename T, size_t channels>
struct GpuView {
    using value_type = T;
    T* gpu_ptr{};
    cudaEvent_t ready{};

    size_t step{};
    int width{};
    int height{};

  protected:
    template<typename View>
    [[nodiscard]] constexpr View cast() const
    {
        return {{gpu_ptr, ready, step, width, height}};
    }
};

// Distinct types instead of aliases to guarantee type safety
struct GpuBgraView : GpuView<const uint8_t, 4> {
};

struct GpuBgraSpan : GpuView<uint8_t, 4> {
    [[nodiscard]] constexpr auto as_view() const { return cast<GpuBgraView>(); }
};
std::optional<FrameProducerReadResult> Zed::read(const GpuBgraSpan& bgra_left,
                                                 const GpuBgraSpan& bgra_right,
                                                 const GpuXyzaSpan& xyza)
{
    auto raw_result = impl_->get_raw_frame(bgra_left, bgra_right, xyza);

    result.success = true;
    result.bgra_left = bgra_left.as_view();
    result.bgra_right = bgra_right.as_view();
    result.xyza = xyza.as_view();
    return result;
}

Same memory out

void copy_color(const sl::Mat& src, const GpuBgraSpan& dst, cudaStream_t stream)
{
    EXPECT(src.getDataType() == sl::MAT_TYPE::U8_C4);
    const auto [size, step, begin] = get_sl_gpu_mat_description<uint8_t>(src);
    EXPECT(size.width == dst.width && size.height == dst.height);

    checkCuda(cudaMemcpy2DAsync(
        dst.gpu_ptr, dst.step, begin, step, static_cast<size_t>(size.width * 4),
        static_cast<size_t>(size.height), cudaMemcpyDeviceToDevice, stream));
    checkCuda(cudaEventRecord(dst.ready, stream));
}

Why return a view into the span?

    FrameProducerReadResult StaticFrameProducer::read_blocking(
                  const GpuBgraSpan& bgra_left,
                  const GpuBgraSpan& /*bgra_right*/,
                  const GpuXyzaSpan& xyza) override
    {
        // the StaticFrameProducer needs to update buffer views in case that the
        // loaded image is smaller than expected. If not, only loading of
        // uniformly sized images would be supported
        auto copy_bgra = bgra_left;
        auto copy_xyza = xyza;
        read_frame(copy_bgra, copy_xyza);

        FrameProducerReadResult result;
        result.success = true;
        result.eof = eof_;
        result.bgra_left = copy_bgra.as_view();
        return result;
    }

We can downsize images!

std::optional<FrameProducerReadResult> Zed::read(const GpuBgraSpan& bgra_left,
                                                 const GpuBgraSpan& bgra_right,
                                                 const GpuXyzaSpan& xyza)
{
    std::lock_guard<std::mutex> lock(camera_access_mutex_);
    using clock = std::chrono::high_resolution_clock;
    auto read_time = clock::now();

    auto is_video = impl_->is_video();
    auto is_looped_video = impl_->is_looped_svo();

    FrameProducerReadResult result;
    result.eof = false;

    std::chrono::milliseconds timestamp{};
    do {
        auto wait = clock::now() - read_time;
        // reconnect in case waiting for new frame is long
        // or svo recording reached max duration per file
        if (!is_video && is_waiting_long(wait)) {
            log::zed().warn("Waiting too long, reconnecting");
            reconnect();
            read_time = clock::now();
        }

        if (!is_video && is_svo_recording_max()) {
            log::zed().info("Recording to new file");
            reconnect();
            read_time = clock::now();
        }

        auto raw_result = impl_->get_raw_frame(
            bgra_left, bgra_right, xyza);

        if (raw_result == std::nullopt) {
            return std::nullopt;
        }

        if (raw_result->status == 
                sl::ERROR_CODE::END_OF_SVOFILE_REACHED) {
            if (is_looped_video) {
                log::zed().info("Wrapping SVO around");
                reconnect();
            } else {
                copy_black_frame(bgra_left, bgra_left, xyza);
                result.eof = true;
                break;
            }
#ifdef __aarch64__
        } else if (raw_result->status ==
                   sl::ERROR_CODE::CAMERA_NOT_INITIALIZED) {
            // From ZED 3.2.1 this condition holds for no
            // apparent reason. The SDK does not internally
            // report any error.
            log::zed().warn(
                "Graph Shutdown not possible because of camera "
                "uninitialized. Forcing reconnection."
                "\nsl::ERROR_CODE: {}",
                raw_result->status);
            reconnect();
#endif
        }

        if (raw_result->status == sl::ERROR_CODE::SUCCESS) {
            timestamp = raw_result->timestamp;
            break;
        }
    } while (true);

    result.success = true;
    result.timestamp = Timestamp(timestamp);
    result.bgra_left = bgra_left.as_view();
    result.bgra_right = bgra_right.as_view();
    result.xyza = xyza.as_view();
    return result;
}
std::optional<FrameProducerReadResult> Zed::read(const GpuBgraSpan& bgra_left,
                                                 const GpuBgraSpan& bgra_right,
                                                 const GpuXyzaSpan& xyza)
{
    std::lock_guard<std::mutex> lock(camera_access_mutex_);
    using clock = std::chrono::high_resolution_clock;
    auto read_time = clock::now();

    auto is_video = impl_->is_video();
    auto is_looped_video = impl_->is_looped_svo();

    FrameProducerReadResult result;
    result.eof = false;

    std::chrono::milliseconds timestamp{};
    do {
        auto wait = clock::now() - read_time;
        // reconnect in case waiting for new frame is long
        // or svo recording reached max duration per file
        if (!is_video && is_waiting_long(wait)) {
            log::zed().warn("Waiting too long, reconnecting");
            reconnect();
            read_time = clock::now();
        }

        if (!is_video && is_svo_recording_max()) {
            log::zed().info("Recording to new file");
            reconnect();
            read_time = clock::now();
        }

        auto raw_result = impl_->get_raw_frame(
            bgra_left, bgra_right, xyza);

        if (raw_result == std::nullopt) {
            return std::nullopt;
        }

        if (raw_result->status == 
                sl::ERROR_CODE::END_OF_SVOFILE_REACHED) {
            if (is_looped_video) {
                log::zed().info("Wrapping SVO around");
                reconnect();
            } else {
                copy_black_frame(bgra_left, bgra_left, xyza);
                result.eof = true;
                break;
            }
#ifdef __aarch64__
        } else if (raw_result->status ==
                   sl::ERROR_CODE::CAMERA_NOT_INITIALIZED) {
            // From ZED 3.2.1 this condition holds for no
            // apparent reason. The SDK does not internally
            // report any error.
            log::zed().warn(
                "Graph Shutdown not possible because of camera "
                "uninitialized. Forcing reconnection."
                "\nsl::ERROR_CODE: {}",
                raw_result->status);
            reconnect();
#endif
        }

        if (raw_result->status == sl::ERROR_CODE::SUCCESS) {
            timestamp = raw_result->timestamp;
            break;
        }
    } while (true);

    result.success = true;
    result.timestamp = Timestamp(timestamp);
    result.bgra_left = bgra_left.as_view();
    result.bgra_right = bgra_right.as_view();
    result.xyza = xyza.as_view();
    return result;
}
  • locking mutex on every iteration (used to be multi-threading)
  • reconnect inside the loop then continue?
  • time-based reconnects
  • platform-specific hacks
  • various flags for checking video, video-looped, camera, network, recording, etc

Fin.

Made with Slides.com