跳转至

代码

tiny_conv.hpp

/**
 * @file tiny_conv.hpp
 * @brief Convolutional layers for tiny_ai (Conv1D / Conv2D).
 */

#pragma once

#include "tiny_layer.hpp"

#ifdef __cplusplus

namespace tiny
{

class Conv1D : public Layer
{
public:
    Tensor weight;   // [out_ch, in_ch, kernel]
    Tensor bias;     // [out_ch]
#if TINY_AI_TRAINING_ENABLED
    Tensor dweight;
    Tensor dbias;
#endif

    Conv1D(int in_channels, int out_channels, int kernel_size,
           int stride = 1, int padding = 0, bool use_bias = true);

    Tensor forward(const Tensor &x) override;

#if TINY_AI_TRAINING_ENABLED
    Tensor backward(const Tensor &grad_out) override;
    void   collect_params(std::vector<ParamGroup> &groups) override;
#endif

    int in_channels()  const { return in_ch_; }
    int out_channels() const { return out_ch_; }

private:
    int  in_ch_, out_ch_, kernel_, stride_, padding_;
    bool use_bias_;
#if TINY_AI_TRAINING_ENABLED
    Tensor x_cache_;
#endif
    void he_init();
};

class Conv2D : public Layer
{
public:
    Tensor weight;   // [out_ch, in_ch, kH, kW]
    Tensor bias;     // [out_ch]
#if TINY_AI_TRAINING_ENABLED
    Tensor dweight;
    Tensor dbias;
#endif

    Conv2D(int in_channels, int out_channels, int kH, int kW,
           int sH = 1, int sW = 1, int pH = 0, int pW = 0,
           bool use_bias = true);

    Tensor forward(const Tensor &x) override;

#if TINY_AI_TRAINING_ENABLED
    Tensor backward(const Tensor &grad_out) override;
    void   collect_params(std::vector<ParamGroup> &groups) override;
#endif

private:
    int  in_ch_, out_ch_, kH_, kW_, sH_, sW_, pH_, pW_;
    bool use_bias_;
#if TINY_AI_TRAINING_ENABLED
    Tensor x_cache_;
#endif
    void he_init();
};

} // namespace tiny

#endif // __cplusplus

tiny_conv.cpp

/**
 * @file tiny_conv.cpp
 * @brief Conv1D and Conv2D implementations.
 */

#include "tiny_conv.hpp"
#include <cmath>
#include <cstdlib>
#include <cstring>

#ifdef __cplusplus

namespace tiny
{

// ============================================================================
// Conv1D
// ============================================================================

Conv1D::Conv1D(int in_ch, int out_ch, int kernel, int stride, int padding, bool use_bias)
    : Layer("conv1d", true),
      weight(out_ch, in_ch, kernel),
      bias(use_bias ? out_ch : 0),
      in_ch_(in_ch), out_ch_(out_ch), kernel_(kernel),
      stride_(stride), padding_(padding), use_bias_(use_bias)
{
#if TINY_AI_TRAINING_ENABLED
    dweight = Tensor::zeros_like(weight);
    if (use_bias_) dbias = Tensor(out_ch);
#endif
    he_init();
}

void Conv1D::he_init()
{
    float std_dev = sqrtf(2.0f / (float)(in_ch_ * kernel_));
    for (int i = 0; i < weight.size; i++)
    {
        float u1 = (float)(rand() + 1) / ((float)RAND_MAX + 1.0f);
        float u2 = (float)rand()       / ((float)RAND_MAX + 1.0f);
        float n  = sqrtf(-2.0f * logf(u1)) * cosf(2.0f * TINY_PI * u2);
        weight.data[i] = n * std_dev;
    }
    if (use_bias_) bias.zero();
}

Tensor Conv1D::forward(const Tensor &x)
{
    int B   = x.shape[0];
    int Lin = x.shape[2];
    int Lo  = (Lin + 2 * padding_ - kernel_) / stride_ + 1;

    int Lp = Lin + 2 * padding_;
    Tensor xp(B, in_ch_, Lp);
    for (int b = 0; b < B; b++)
        for (int c = 0; c < in_ch_; c++)
            for (int l = 0; l < Lin; l++)
                xp.at(b, c, l + padding_) = x.at(b, c, l);

#if TINY_AI_TRAINING_ENABLED
    x_cache_ = xp.clone();
#endif

    Tensor out(B, out_ch_, Lo);
    for (int b = 0; b < B; b++)
        for (int oc = 0; oc < out_ch_; oc++)
            for (int t = 0; t < Lo; t++)
            {
                float sum = use_bias_ ? bias.data[oc] : 0.0f;
                for (int ic = 0; ic < in_ch_; ic++)
                    for (int k = 0; k < kernel_; k++)
                        sum += weight.at(oc, ic, k) * xp.at(b, ic, t * stride_ + k);
                out.at(b, oc, t) = sum;
            }
    return out;
}

#if TINY_AI_TRAINING_ENABLED

Tensor Conv1D::backward(const Tensor &grad_out)
{
    int B   = x_cache_.shape[0];
    int Lp  = x_cache_.shape[2];
    int Lo  = grad_out.shape[2];
    int Lin = Lp - 2 * padding_;

    Tensor g_xp(B, in_ch_, Lp);

    for (int b = 0; b < B; b++)
        for (int oc = 0; oc < out_ch_; oc++)
            for (int t = 0; t < Lo; t++)
            {
                float go = grad_out.at(b, oc, t);
                for (int ic = 0; ic < in_ch_; ic++)
                    for (int k = 0; k < kernel_; k++)
                        dweight.at(oc, ic, k) += go * x_cache_.at(b, ic, t * stride_ + k);
                if (use_bias_) dbias.data[oc] += go;
            }

    for (int b = 0; b < B; b++)
        for (int oc = 0; oc < out_ch_; oc++)
            for (int t = 0; t < Lo; t++)
            {
                float go = grad_out.at(b, oc, t);
                for (int ic = 0; ic < in_ch_; ic++)
                    for (int k = 0; k < kernel_; k++)
                        g_xp.at(b, ic, t * stride_ + k) += go * weight.at(oc, ic, k);
            }

    Tensor g_x(B, in_ch_, Lin);
    for (int b = 0; b < B; b++)
        for (int ic = 0; ic < in_ch_; ic++)
            for (int l = 0; l < Lin; l++)
                g_x.at(b, ic, l) = g_xp.at(b, ic, l + padding_);
    return g_x;
}

void Conv1D::collect_params(std::vector<ParamGroup> &groups)
{
    groups.push_back({&weight, &dweight});
    if (use_bias_) groups.push_back({&bias, &dbias});
}

#endif

// ============================================================================
// Conv2D
// ============================================================================

Conv2D::Conv2D(int in_ch, int out_ch, int kH, int kW,
               int sH, int sW, int pH, int pW, bool use_bias)
    : Layer("conv2d", true),
      weight(out_ch, in_ch, kH, kW),
      bias(use_bias ? out_ch : 0),
      in_ch_(in_ch), out_ch_(out_ch),
      kH_(kH), kW_(kW), sH_(sH), sW_(sW), pH_(pH), pW_(pW),
      use_bias_(use_bias)
{
#if TINY_AI_TRAINING_ENABLED
    dweight = Tensor::zeros_like(weight);
    if (use_bias_) dbias = Tensor(out_ch);
#endif
    he_init();
}

void Conv2D::he_init()
{
    float std_dev = sqrtf(2.0f / (float)(in_ch_ * kH_ * kW_));
    for (int i = 0; i < weight.size; i++)
    {
        float u1 = (float)(rand() + 1) / ((float)RAND_MAX + 1.0f);
        float u2 = (float)rand()       / ((float)RAND_MAX + 1.0f);
        float n  = sqrtf(-2.0f * logf(u1)) * cosf(2.0f * TINY_PI * u2);
        weight.data[i] = n * std_dev;
    }
    if (use_bias_) bias.zero();
}

Tensor Conv2D::forward(const Tensor &x)
{
    int B  = x.shape[0];
    int H  = x.shape[2];
    int W  = x.shape[3];
    int OH = (H + 2 * pH_ - kH_) / sH_ + 1;
    int OW = (W + 2 * pW_ - kW_) / sW_ + 1;

    int Hp = H + 2 * pH_;
    int Wp = W + 2 * pW_;
    Tensor xp(B, in_ch_, Hp, Wp);
    for (int b = 0; b < B; b++)
        for (int c = 0; c < in_ch_; c++)
            for (int h = 0; h < H; h++)
                for (int w = 0; w < W; w++)
                    xp.at(b, c, h + pH_, w + pW_) = x.at(b, c, h, w);

#if TINY_AI_TRAINING_ENABLED
    x_cache_ = xp.clone();
#endif

    Tensor out(B, out_ch_, OH, OW);
    for (int b = 0; b < B; b++)
        for (int oc = 0; oc < out_ch_; oc++)
            for (int oh = 0; oh < OH; oh++)
                for (int ow = 0; ow < OW; ow++)
                {
                    float sum = use_bias_ ? bias.data[oc] : 0.0f;
                    for (int ic = 0; ic < in_ch_; ic++)
                        for (int kh = 0; kh < kH_; kh++)
                            for (int kw = 0; kw < kW_; kw++)
                                sum += weight.at(oc, ic, kh, kw) *
                                       xp.at(b, ic, oh * sH_ + kh, ow * sW_ + kw);
                    out.at(b, oc, oh, ow) = sum;
                }
    return out;
}

#if TINY_AI_TRAINING_ENABLED

Tensor Conv2D::backward(const Tensor &grad_out)
{
    int B  = x_cache_.shape[0];
    int Hp = x_cache_.shape[2];
    int Wp = x_cache_.shape[3];
    int OH = grad_out.shape[2];
    int OW = grad_out.shape[3];

    Tensor g_xp(B, in_ch_, Hp, Wp);

    for (int b = 0; b < B; b++)
        for (int oc = 0; oc < out_ch_; oc++)
            for (int oh = 0; oh < OH; oh++)
                for (int ow = 0; ow < OW; ow++)
                {
                    float go = grad_out.at(b, oc, oh, ow);
                    if (use_bias_) dbias.data[oc] += go;
                    for (int ic = 0; ic < in_ch_; ic++)
                        for (int kh = 0; kh < kH_; kh++)
                            for (int kw = 0; kw < kW_; kw++)
                            {
                                dweight.at(oc, ic, kh, kw) +=
                                    go * x_cache_.at(b, ic, oh * sH_ + kh, ow * sW_ + kw);
                                g_xp.at(b, ic, oh * sH_ + kh, ow * sW_ + kw) +=
                                    go * weight.at(oc, ic, kh, kw);
                            }
                }

    int H = Hp - 2 * pH_;
    int W = Wp - 2 * pW_;
    Tensor g_x(B, in_ch_, H, W);
    for (int b = 0; b < B; b++)
        for (int c = 0; c < in_ch_; c++)
            for (int h = 0; h < H; h++)
                for (int w = 0; w < W; w++)
                    g_x.at(b, c, h, w) = g_xp.at(b, c, h + pH_, w + pW_);
    return g_x;
}

void Conv2D::collect_params(std::vector<ParamGroup> &groups)
{
    groups.push_back({&weight, &dweight});
    if (use_bias_) groups.push_back({&bias, &dbias});
}

#endif // TINY_AI_TRAINING_ENABLED

} // namespace tiny

#endif // __cplusplus