Python FFI调用C++ ML库：对象生命周期管理与异常安全机制

大家好，今天我们来深入探讨一个非常实用的主题：如何使用Python的Foreign Function Interface (FFI) 调用 C++ 编写的机器学习 (ML) 库，并重点关注对象生命周期管理和异常安全机制。这在实际项目中非常常见，尤其是在需要利用 C++ 的高性能和现有 ML 库，同时又希望保持 Python 开发的灵活性和易用性时。

1. 为什么选择 FFI？

首先，我们来明确一下为什么选择 FFI，而不是其他方案，例如使用Boost.Python 或 Cython。虽然 Boost.Python 和 Cython 在某些情况下更方便，但 FFI 提供了更底层的控制，避免了额外的编译依赖，并且更容易与复杂的 C++ 代码集成。FFI 的主要优势在于：

灵活性： 可以直接调用 C++ 导出的函数，无需编写额外的包装代码。
控制力： 可以精细地控制内存管理和数据类型转换。
避免依赖： 避免了对特定 C++ 编译器的依赖，增加了跨平台兼容性。
现有代码复用： 可以直接复用现有的 C++ 代码，无需修改或重写。

2. FFI 基础：libffi 与 cffi

在 Python 中，我们通常使用 cffi 库来实现 FFI。cffi 建立在 libffi 之上，libffi 是一个可移植的 C 调用接口库。cffi 提供了两种使用模式：

ABI 模式（Application Binary Interface）： 直接调用 C 编译的共享库，不需要额外的编译步骤，但类型检查较弱，需要对 C 的 ABI 有一定的了解。
API 模式（Application Programming Interface）： 需要提供 C 头文件的声明，cffi 会根据声明生成 C 代码，然后编译成 Python 扩展模块。API 模式提供了更好的类型检查和安全性。

为了保证我们的代码的健壮性，我们将重点使用 API 模式。

3. C++ ML 库示例：一个简单的线性回归

为了演示 FFI 的使用，我们创建一个简单的 C++ 线性回归库。虽然实际的 ML 库会更复杂，但这个例子足以说明问题。

linear_regression.h

#ifndef LINEAR_REGRESSION_H
#define LINEAR_REGRESSION_H

#include <vector>

class LinearRegression {
public:
  LinearRegression();
  ~LinearRegression();

  void fit(const std::vector<std::vector<double>>& X, const std::vector<double>& y);
  double predict(const std::vector<double>& x) const;

private:
  std::vector<double> coefficients_;
  double intercept_;
};

#endif // LINEAR_REGRESSION_H

linear_regression.cpp

#include "linear_regression.h"
#include <numeric>
#include <cmath>

LinearRegression::LinearRegression() : coefficients_(), intercept_(0.0) {}

LinearRegression::~LinearRegression() {}

void LinearRegression::fit(const std::vector<std::vector<double>>& X, const std::vector<double>& y) {
  // Simple linear regression using least squares
  int n = X.size();
  int p = X[0].size();

  // Calculate means
  std::vector<double> x_means(p, 0.0);
  double y_mean = std::accumulate(y.begin(), y.end(), 0.0) / n;

  for (int i = 0; i < n; ++i) {
    for (int j = 0; j < p; ++j) {
      x_means[j] += X[i][j];
    }
  }

  for (int j = 0; j < p; ++j) {
    x_means[j] /= n;
  }

  // Calculate coefficients
  coefficients_.resize(p);
  for (int j = 0; j < p; ++j) {
    double numerator = 0.0;
    double denominator = 0.0;
    for (int i = 0; i < n; ++i) {
      numerator += (X[i][j] - x_means[j]) * (y[i] - y_mean);
      denominator += (X[i][j] - x_means[j]) * (X[i][j] - x_means[j]);
    }
    coefficients_[j] = numerator / denominator;
  }

  // Calculate intercept
  intercept_ = y_mean;
  for (int j = 0; j < p; ++j) {
    intercept_ -= coefficients_[j] * x_means[j];
  }
}

double LinearRegression::predict(const std::vector<double>& x) const {
  double prediction = intercept_;
  for (size_t i = 0; i < coefficients_.size(); ++i) {
    prediction += coefficients_[i] * x[i];
  }
  return prediction;
}

编译 C++ 库：

g++ -std=c++11 -fPIC -shared linear_regression.cpp -o liblinear_regression.so

4. 使用 cffi 调用 C++ 库

现在，我们使用 cffi 来调用这个 C++ 库。

linear_regression_ffi.py

from cffi import FFI
import numpy as np

ffi = FFI()

ffi.cdef("""
    typedef struct {
        double* data;
        int rows;
        int cols;
    } Matrix;

    typedef struct {
        double* data;
        int size;
    } Vector;

    typedef struct LinearRegression LinearRegression;
    LinearRegression* linear_regression_new();
    void linear_regression_fit(LinearRegression* lr, Matrix* X, Vector* y);
    double linear_regression_predict(LinearRegression* lr, Vector* x);
    void linear_regression_delete(LinearRegression* lr);

    Matrix* matrix_new(int rows, int cols);
    void matrix_set(Matrix* matrix, int row, int col, double value);
    double matrix_get(Matrix* matrix, int row, int col);
    void matrix_delete(Matrix* matrix);

    Vector* vector_new(int size);
    void vector_set(Vector* vector, int index, double value);
    double vector_get(Vector* vector, int index);
    void vector_delete(Vector* vector);
""")

lib = ffi.dlopen("./liblinear_regression.so")

class LinearRegression:
    def __init__(self):
        self._lr = lib.linear_regression_new()

    def fit(self, X, y):
        # X: numpy array (n_samples, n_features)
        # y: numpy array (n_samples,)

        X = np.asarray(X, dtype=np.float64)
        y = np.asarray(y, dtype=np.float64)

        X_c = ffi.new("Matrix*", (ffi.new("double[]", X.size), X.shape[0], X.shape[1]))
        X_c.data = ffi.cast("double*", X.ctypes.data)

        y_c = ffi.new("Vector*", (ffi.new("double[]", y.size), y.shape[0]))
        y_c.data = ffi.cast("double*", y.ctypes.data)

        lib.linear_regression_fit(self._lr, X_c, y_c)

    def predict(self, x):
        # x: numpy array (n_features,)
        x = np.asarray(x, dtype=np.float64)
        x_c = ffi.new("Vector*", (ffi.new("double[]", x.size), x.shape[0]))
        x_c.data = ffi.cast("double*", x.ctypes.data)
        return lib.linear_regression_predict(self._lr, x_c)

    def __del__(self):
        lib.linear_regression_delete(self._lr)

linear_regression_wrapper.cpp

因为C++标准库中的std::vector 无法直接被cffi理解，所以我们需要一些wrapper函数来进行转换。

#include "linear_regression.h"
#include <vector>
#include <iostream>

extern "C" {

    typedef struct {
        double* data;
        int rows;
        int cols;
    } Matrix;

    typedef struct {
        double* data;
        int size;
    } Vector;

    LinearRegression* linear_regression_new() {
        return new LinearRegression();
    }

    void linear_regression_fit(LinearRegression* lr, Matrix* X, Vector* y) {
        std::vector<std::vector<double>> X_cpp(X->rows, std::vector<double>(X->cols));
        std::vector<double> y_cpp(y->size);

        for (int i = 0; i < X->rows; ++i) {
            for (int j = 0; j < X->cols; ++j) {
                X_cpp[i][j] = X->data[i * X->cols + j];
            }
        }

        for (int i = 0; i < y->size; ++i) {
            y_cpp[i] = y->data[i];
        }

        lr->fit(X_cpp, y_cpp);
    }

    double linear_regression_predict(LinearRegression* lr, Vector* x) {
        std::vector<double> x_cpp(x->size);
        for (int i = 0; i < x->size; ++i) {
            x_cpp[i] = x->data[i];
        }
        return lr->predict(x_cpp);
    }

    void linear_regression_delete(LinearRegression* lr) {
        delete lr;
    }

    Matrix* matrix_new(int rows, int cols) {
        Matrix* matrix = new Matrix;
        matrix->rows = rows;
        matrix->cols = cols;
        matrix->data = new double[rows * cols];
        return matrix;
    }

    void matrix_set(Matrix* matrix, int row, int col, double value) {
        matrix->data[row * matrix->cols + col] = value;
    }

    double matrix_get(Matrix* matrix, int row, int col) {
       return matrix->data[row * matrix->cols + col];
    }

    void matrix_delete(Matrix* matrix) {
        delete[] matrix->data;
        delete matrix;
    }

    Vector* vector_new(int size) {
        Vector* vector = new Vector;
        vector->size = size;
        vector->data = new double[size];
        return vector;
    }

    void vector_set(Vector* vector, int index, double value) {
        vector->data[index] = value;
    }

    double vector_get(Vector* vector, int index) {
        return vector->data[index];
    }

    void vector_delete(Vector* vector) {
        delete[] vector->data;
        delete vector;
    }
}

编译 wrapper 文件:

g++ -std=c++11 -fPIC -shared linear_regression_wrapper.cpp linear_regression.cpp -o liblinear_regression.so

测试代码：

import numpy as np
from linear_regression_ffi import LinearRegression

# Generate some random data
X = np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float64)
y = np.array([5, 11, 17], dtype=np.float64)
x = np.array([7, 8], dtype=np.float64)

# Create a LinearRegression object
lr = LinearRegression()

# Fit the model
lr.fit(X, y)

# Predict
prediction = lr.predict(x)
print("Prediction:", prediction)

代码解释：

ffi.cdef(): 定义了 C++ 函数和数据结构的签名，cffi 会根据这些签名生成 C 代码。
ffi.dlopen(): 加载 C++ 共享库。
LinearRegression 类: Python 类，封装了对 C++ 线性回归对象的调用。
linear_regression_new(): 创建 C++ 线性回归对象。
linear_regression_fit(): 训练模型。
linear_regression_predict(): 进行预测。
linear_regression_delete(): 释放 C++ 线性回归对象。
__del__(): 析构函数，用于在 Python 对象被销毁时释放 C++ 对象。

5. 对象生命周期管理

对象生命周期管理是 FFI 中最重要的方面之一。如果不正确地管理对象的生命周期，会导致内存泄漏或程序崩溃。

关键点：

所有权： 必须明确谁拥有 C++ 对象的生命周期。在这个例子中，Python 类 LinearRegression 拥有 C++ 线性回归对象的生命周期。
构造与析构： 在 Python 对象创建时，创建 C++ 对象；在 Python 对象销毁时，销毁 C++ 对象。
资源释放： 确保在适当的时候释放 C++ 对象占用的内存。

使用 __del__ 方法：

Python 的 __del__ 方法会在对象被垃圾回收时调用。我们可以在 __del__ 方法中释放 C++ 对象占用的内存。

class LinearRegression:
    # ... (省略其他代码) ...

    def __del__(self):
        lib.linear_regression_delete(self._lr)

注意事项：

__del__ 方法的调用时机是不确定的，因此不应该在 __del__ 方法中执行重要的操作。
避免在 __del__ 方法中抛出异常，否则可能会导致程序崩溃。

6. 异常安全机制

C++ 代码可能会抛出异常，如果这些异常没有被正确处理，会导致程序崩溃。我们需要在 FFI 调用中增加异常安全机制，以保证程序的健壮性。

处理 C++ 异常的策略：

禁止 C++ 异常传播到 Python： 最简单的方法是在 C++ 代码中捕获所有异常，并将它们转换为错误码或错误消息返回给 Python。
使用 try...catch 块： 在 C++ 代码中使用 try...catch 块捕获异常，并将异常信息传递给 Python。
使用 C++ 异常转换为 Python 异常的机制： 一些 FFI 库提供了将 C++ 异常转换为 Python 异常的机制。

示例：使用 try...catch 块

修改 linear_regression_wrapper.cpp:

#include "linear_regression.h"
#include <vector>
#include <iostream>
#include <stdexcept> // Required for std::runtime_error

extern "C" {

    typedef struct {
        double* data;
        int rows;
        int cols;
    } Matrix;

    typedef struct {
        double* data;
        int size;
    } Vector;

    LinearRegression* linear_regression_new() {
        return new LinearRegression();
    }

    void linear_regression_fit(LinearRegression* lr, Matrix* X, Vector* y) {
        try {
            std::vector<std::vector<double>> X_cpp(X->rows, std::vector<double>(X->cols));
            std::vector<double> y_cpp(y->size);

            for (int i = 0; i < X->rows; ++i) {
                for (int j = 0; j < X->cols; ++j) {
                    X_cpp[i][j] = X->data[i * X->cols + j];
                }
            }

            for (int i = 0; i < y->size; ++i) {
                y_cpp[i] = y->data[i];
            }

            lr->fit(X_cpp, y_cpp);
        } catch (const std::exception& e) {
            // Handle the exception, e.g., print an error message
            std::cerr << "C++ Exception in fit: " << e.what() << std::endl;
            // Optionally, re-throw a custom exception or return an error code
            throw; // Re-throw the exception to be caught in Python
            // or return an error code
        }
    }

    double linear_regression_predict(LinearRegression* lr, Vector* x) {
        try {
            std::vector<double> x_cpp(x->size);
            for (int i = 0; i < x->size; ++i) {
                x_cpp[i] = x->data[i];
            }
            return lr->predict(x_cpp);
        } catch (const std::exception& e) {
            // Handle the exception, e.g., print an error message
            std::cerr << "C++ Exception in predict: " << e.what() << std::endl;
            throw;
        }
    }

    void linear_regression_delete(LinearRegression* lr) {
        delete lr;
    }

    Matrix* matrix_new(int rows, int cols) {
        Matrix* matrix = new Matrix;
        matrix->rows = rows;
        matrix->cols = cols;
        matrix->data = new double[rows * cols];
        return matrix;
    }

    void matrix_set(Matrix* matrix, int row, int col, double value) {
        matrix->data[row * matrix->cols + col] = value;
    }

    double matrix_get(Matrix* matrix, int row, int col) {
       return matrix->data[row * matrix->cols + col];
    }

    void matrix_delete(Matrix* matrix) {
        delete[] matrix->data;
        delete matrix;
    }

    Vector* vector_new(int size) {
        Vector* vector = new Vector;
        vector->size = size;
        vector->data = new double[size];
        return vector;
    }

    void vector_set(Vector* vector, int index, double value) {
        vector->data[index] = value;
    }

    double vector_get(Vector* vector, int index) {
        return vector->data[index];
    }

    void vector_delete(Vector* vector) {
        delete[] vector->data;
        delete vector;
    }
}

在 Python 中捕获异常：

要使 C++ 异常能够在 Python 中被捕获，需要修改 Python FFI 代码。首先，需要修改 ffi.cdef 来声明 linear_regression_fit 和 linear_regression_predict 函数可能抛出异常：

from cffi import FFI
import numpy as np

ffi = FFI()

ffi.cdef("""
    typedef struct {
        double* data;
        int rows;
        int cols;
    } Matrix;

    typedef struct {
        double* data;
        int size;
    } Vector;

    typedef struct LinearRegression LinearRegression;
    LinearRegression* linear_regression_new();
    void linear_regression_fit(LinearRegression* lr, Matrix* X, Vector* y);
    double linear_regression_predict(LinearRegression* lr, Vector* x);
    void linear_regression_delete(LinearRegression* lr);

    Matrix* matrix_new(int rows, int cols);
    void matrix_set(Matrix* matrix, int row, int col, double value);
    double matrix_get(Matrix* matrix, int row, int col);
    void matrix_delete(Matrix* matrix);

    Vector* vector_new(int size);
    void vector_set(Vector* vector, int index, double value);
    double vector_get(Vector* vector, int index);
    void vector_delete(Vector* vector);
""")

lib = ffi.dlopen("./liblinear_regression.so")

class LinearRegression:
    def __init__(self):
        self._lr = lib.linear_regression_new()

    def fit(self, X, y):
        # X: numpy array (n_samples, n_features)
        # y: numpy array (n_samples,)

        X = np.asarray(X, dtype=np.float64)
        y = np.asarray(y, dtype=np.float64)

        X_c = ffi.new("Matrix*", (ffi.new("double[]", X.size), X.shape[0], X.shape[1]))
        X_c.data = ffi.cast("double*", X.ctypes.data)

        y_c = ffi.new("Vector*", (ffi.new("double[]", y.size), y.shape[0]))
        y_c.data = ffi.cast("double*", y.ctypes.data)

        try:
            lib.linear_regression_fit(self._lr, X_c, y_c)
        except ffi.error as e:
            print(f"Caught C++ exception: {e}")
            raise  # Re-raise the exception to be handled by the caller

    def predict(self, x):
        # x: numpy array (n_features,)
        x = np.asarray(x, dtype=np.float64)
        x_c = ffi.new("Vector*", (ffi.new("double[]", x.size), x.shape[0]))
        x_c.data = ffi.cast("double*", x.ctypes.data)
        try:
            result = lib.linear_regression_predict(self._lr, x_c)
            return result
        except ffi.error as e:
            print(f"Caught C++ exception: {e}")
            raise

    def __del__(self):
        lib.linear_regression_delete(self._lr)

现在，如果 C++ 代码抛出异常，它将被 cffi 捕获并转换为 ffi.error 异常，可以在 Python 代码中捕获和处理。

注意事项：

确保 C++ 异常类型和 Python 异常类型兼容。
避免在 C++ 异常处理代码中执行复杂的操作，以避免出现新的异常。

7. 数据类型转换

在 FFI 调用中，数据类型转换是一个重要的环节。我们需要将 Python 数据类型转换为 C++ 数据类型，并将 C++ 数据类型转换为 Python 数据类型。

常用的数据类型转换：

Python 类型	C++ 类型	转换方法
`int`	`int`	直接转换
`float`	`double`	直接转换
`str`	`char*`	使用 `ffi.new("char[]", string.encode('utf-8'))`
`bytes`	`char*`	使用 `ffi.new("char[]", bytes)`
`list`	`std::vector`	需要手动转换，例如使用循环将 Python 列表中的元素复制到 C++ 向量中
`numpy.ndarray`	`double*`	使用 `ffi.cast("double*", array.ctypes.data)` 和 numpy 的 `ctypes` 接口.

*示例：numpy.ndarray 到 `double` 的转换**

import numpy as np

# Create a numpy array
array = np.array([1.0, 2.0, 3.0], dtype=np.float64)

# Convert the numpy array to a C++ double*
array_ptr = ffi.cast("double*", array.ctypes.data)

8. 总结：FFI 调用 C++ ML 库的关键点

本文详细介绍了如何使用 Python 的 FFI 调用 C++ 编写的机器学习库。我们强调了对象生命周期管理和异常安全机制的重要性，并提供了代码示例。希望通过本文，你能更好地理解 FFI 的使用，并在实际项目中应用 FFI 技术。

9. 后续学习方向

更复杂的 C++ 数据结构： 学习如何使用 FFI 调用涉及更复杂 C++ 数据结构（例如，自定义类、STL 容器）的函数。
性能优化： 学习如何优化 FFI 调用的性能，例如使用零拷贝技术、减少数据类型转换的开销。
更高级的异常处理： 学习如何使用更高级的异常处理技术，例如将 C++ 异常转换为 Python 异常。
使用其他 FFI 库： 了解其他的 FFI 库，例如 ctypes，并比较它们的优缺点。

更多IT精英技术系列讲座，到智猿学院