简要说明
common文件定义了一个Caffe类,Caffe类里面定义了一个RNG类(random number generator ),RNG类里面定义了一个Generator类,Generator类里面有一个指向rng_t的指针。
rng_t其实就是就是boost::mt19937。(这是因为在rng.hpp中有这么一句话:typedef boost::mt19937 rng_t;)
Caffe对象中有一个指向RNG对象的指针,RNG对象中有一个指向Generator对象的指针,Generator中有一个指向rng_t的指针,并且可以用相应的函数返回这些指针。
主要成员变量的结构
class Caffe
{
shared_ptr<RNG> random_generator_;//指向一个子类RNG对象的指针
Brew mode_; //enum Brew { CPU, GPU };
int solver_count_;
bool root_solver_;
//如果是gpu+cpu模式,那么Caffe()后,solver_count_为1,root_solver为true
class RNG //random number generator
{
shared_ptr<Generator> generator_;//指向一个构造器的指针
class Generator
{
private:
shared_ptr<caffe::rng_t> rng_;//指向rng_t对象的指针
// rng_t就是boost::mt19937;
};
}
}
函数结构
class Caffe
{
Caffe& Caffe::Get() //返回一个指向Caffe对象的指针,如果没有,new一个。
inline static RNG& rng_stream(); //目的返回random_generator_(RNG),没有生成一个
class RNG //random number generator
{
void* generator();////返回(void*)rng_t
class Generator
{
};
}
}
caffe_rng()最后返回的是boost::mt19937的指针
inline static RNG& rng_stream() { //rng_stream返回RNG
if (!Get().random_generator_) {
Get().random_generator_.reset(new RNG());
}
return *(Get().random_generator_);
}
void* Caffe::RNG::generator() {//返回(void*)rng_t
return static_cast<void*>(generator_->rng());
}
inline rng_t* caffe_rng() {//rng_stream().generator()
return static_cast<caffe::rng_t*>(Caffe::rng_stream().generator());
}
Caffe::rng_stream 返回类型是RNG, Caffe::RNG::generator()返回(void*)rng_t,所以最后返回rng_t,而且是用static_cast从void*转化为rng_t*
RNG类
class RNG {
public:
RNG();
explicit RNG(unsigned int seed);
explicit RNG(const RNG&);
RNG& operator=(const RNG&);
void* generator();
private:
class Generator;
shared_ptr<Generator> generator_;
};
cluster_seedgen()的功能就是获取一个int64_t类型的随机种子
//获取随机数种子
int64_t cluster_seedgen(void) {
int64_t s, seed, pid;
FILE* f = fopen("/dev/urandom", "rb");
if (f && fread(&seed, 1, sizeof(seed), f) == sizeof(seed)) {
fclose(f);
return seed;
}
LOG(INFO) << "System entropy source not available, "
"using fallback algorithm to generate seed instead.";
if (f)
fclose(f);
pid = getpid();
s = time(NULL);
seed = std::abs(((s * 181) * ((pid - 83) * 359)) % 104729);
return seed;
}
Generator类
class Caffe::RNG::Generator {
public:
//两个构造函数,但是都利用随机数种子int64_t
Generator() : rng_(new caffe::rng_t(cluster_seedgen())) {}
explicit Generator(unsigned int seed) : rng_(new caffe::rng_t(seed)) {}
//返回rng_t指针
caffe::rng_t* rng() { return rng_.get(); }
private:
shared_ptr<caffe::rng_t> rng_;
};
common.hpp:
#ifndef CAFFE_COMMON_HPP_
#define CAFFE_COMMON_HPP_
#include <boost/shared_ptr.hpp>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <climits>
#include <cmath>
#include <fstream> // NOLINT(readability/streams)
#include <iostream> // NOLINT(readability/streams)
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <utility> // pair
#include <vector>
#include "caffe/util/device_alternate.hpp"
// Convert macro to string
#define STRINGIFY(m) #m
#define AS_STRING(m) STRINGIFY(m)
// gflags 2.1 issue: namespace google was changed to gflags without warning.
// Luckily we will be able to use GFLAGS_GFLAGS_H_ to detect if it is version
// 2.1. If yes, we will add a temporary solution to redirect the namespace.
// TODO(Yangqing): Once gflags solves the problem in a more elegant way, let's
// remove the following hack.
#ifndef GFLAGS_GFLAGS_H_
namespace gflags = google;
#endif // GFLAGS_GFLAGS_H_
// Disable the copy and assignment operator for a class.
#define DISABLE_COPY_AND_ASSIGN(classname) \
private:\
classname(const classname&);\
classname& operator=(const classname&)
// Instantiate a class with float and double specifications.
#define INSTANTIATE_CLASS(classname) \
char gInstantiationGuard##classname; \
template class classname<float>; \
template class classname<double>
#define INSTANTIATE_LAYER_GPU_FORWARD(classname) \
template void classname<float>::Forward_gpu( \
const std::vector<Blob<float>*>& bottom, \
const std::vector<Blob<float>*>& top); \
template void classname<double>::Forward_gpu( \
const std::vector<Blob<double>*>& bottom, \
const std::vector<Blob<double>*>& top);
#define INSTANTIATE_LAYER_GPU_BACKWARD(classname) \
template void classname<float>::Backward_gpu( \
const std::vector<Blob<float>*>& top, \
const std::vector<bool>& propagate_down, \
const std::vector<Blob<float>*>& bottom); \
template void classname<double>::Backward_gpu( \
const std::vector<Blob<double>*>& top, \
const std::vector<bool>& propagate_down, \
const std::vector<Blob<double>*>& bottom)
#define INSTANTIATE_LAYER_GPU_FUNCS(classname) \
INSTANTIATE_LAYER_GPU_FORWARD(classname); \
INSTANTIATE_LAYER_GPU_BACKWARD(classname)
// A simple macro to mark codes that are not implemented, so that when the code
// is executed we will see a fatal log.
#define NOT_IMPLEMENTED LOG(FATAL) << "Not Implemented Yet"
// See PR #1236
namespace cv { class Mat; }
namespace caffe {
// We will use the boost shared_ptr instead of the new C++11 one mainly
// because cuda does not work (at least now) well with C++11 features.
using boost::shared_ptr;
// Common functions and classes from std that caffe often uses.
using std::fstream;
using std::ios;
using std::isnan;
using std::isinf;
using std::iterator;
using std::make_pair;
using std::map;
using std::ostringstream;
using std::pair;
using std::set;
using std::string;
using std::stringstream;
using std::vector;
// A global initialization function that you should call in your main function.
// Currently it initializes google flags and google logging.
void GlobalInit(int* pargc, char*** pargv);
// A singleton class to hold common caffe stuff, such as the handler that
// caffe is going to use for cublas, curand, etc.
class Caffe {
public:
~Caffe();
// Thread local context for Caffe. Moved to common.cpp instead of
// including boost/thread.hpp to avoid a boost/NVCC issues (#1009, #1010)
// on OSX. Also fails on Linux with CUDA 7.0.18.
static Caffe& Get();
enum Brew { CPU, GPU };
// This random number generator facade hides boost and CUDA rng
// implementation from one another (for cross-platform compatibility).
class RNG {
public:
RNG();
explicit RNG(unsigned int seed);
explicit RNG(const RNG&);
RNG& operator=(const RNG&);
void* generator();
private:
class Generator;
shared_ptr<Generator> generator_;
};
// Getters for boost rng, curand, and cublas handles
inline static RNG& rng_stream() {
if (!Get().random_generator_) {
Get().random_generator_.reset(new RNG());
}
return *(Get().random_generator_);
}
#ifndef CPU_ONLY
inline static cublasHandle_t cublas_handle() { return Get().cublas_handle_; }
inline static curandGenerator_t curand_generator() {
return Get().curand_generator_;
}
#endif
// Returns the mode: running on CPU or GPU.
inline static Brew mode() { return Get().mode_; }
// The setters for the variables
// Sets the mode. It is recommended that you don't change the mode halfway
// into the program since that may cause allocation of pinned memory being
// freed in a non-pinned way, which may cause problems - I haven't verified
// it personally but better to note it here in the header file.
inline static void set_mode(Brew mode) { Get().mode_ = mode; }
// Sets the random seed of both boost and curand
static void set_random_seed(const unsigned int seed);
// Sets the device. Since we have cublas and curand stuff, set device also
// requires us to reset those values.
static void SetDevice(const int device_id);
// Prints the current GPU status.
static void DeviceQuery();
// Check if specified device is available
static bool CheckDevice(const int device_id);
// Search from start_id to the highest possible device ordinal,
// return the ordinal of the first available device.
static int FindDevice(const int start_id = 0);
// Parallel training info
inline static int solver_count() { return Get().solver_count_; }
inline static void set_solver_count(int val) { Get().solver_count_ = val; }
inline static bool root_solver() { return Get().root_solver_; }
inline static void set_root_solver(bool val) { Get().root_solver_ = val; }
protected:
#ifndef CPU_ONLY
cublasHandle_t cublas_handle_;
curandGenerator_t curand_generator_;
#endif
shared_ptr<RNG> random_generator_;
Brew mode_;
int solver_count_;
bool root_solver_;
private:
// The private constructor to avoid duplicate instantiation.
Caffe();
DISABLE_COPY_AND_ASSIGN(Caffe);
};
} // namespace caffe
#endif // CAFFE_COMMON_HPP_
common.cpp:
#include <boost/thread.hpp>
#include <glog/logging.h>
#include <cmath>
#include <cstdio>
#include <ctime>
#include "caffe/common.hpp"
#include "caffe/util/rng.hpp"
namespace caffe {
// Make sure each thread can have different values.
static boost::thread_specific_ptr<Caffe> thread_instance_;
Caffe& Caffe::Get() { //返回一个指向Caffe对象的指针,如果没有,new一个。
if (!thread_instance_.get()) {
thread_instance_.reset(new Caffe());
}
return *(thread_instance_.get());
}
//获取随机数种子
int64_t cluster_seedgen(void) {
int64_t s, seed, pid;
FILE* f = fopen("/dev/urandom", "rb");
if (f && fread(&seed, 1, sizeof(seed), f) == sizeof(seed)) {
fclose(f);
return seed;
}
LOG(INFO) << "System entropy source not available, "
"using fallback algorithm to generate seed instead.";
if (f)
fclose(f);
pid = getpid();
s = time(NULL);
seed = std::abs(((s * 181) * ((pid - 83) * 359)) % 104729);
return seed;
}
void GlobalInit(int* pargc, char*** pargv) {
// Google flags.
::gflags::ParseCommandLineFlags(pargc, pargv, true);
// Google logging.
::google::InitGoogleLogging(*(pargv)[0]);
// Provide a backtrace on segfault.
::google::InstallFailureSignalHandler();
}
#ifdef CPU_ONLY // CPU-only Caffe.
Caffe::Caffe()
: random_generator_(), mode_(Caffe::CPU),
solver_count_(1), root_solver_(true) { }
Caffe::~Caffe() { }
void Caffe::set_random_seed(const unsigned int seed) {
// RNG seed
Get().random_generator_.reset(new RNG(seed));
}
void Caffe::SetDevice(const int device_id) {
NO_GPU;
}
void Caffe::DeviceQuery() {
NO_GPU;
}
bool Caffe::CheckDevice(const int device_id) {
NO_GPU;
return false;
}
int Caffe::FindDevice(const int start_id) {
NO_GPU;
return -1;
}
class Caffe::RNG::Generator {
public:
Generator() : rng_(new caffe::rng_t(cluster_seedgen())) {}
explicit Generator(unsigned int seed) : rng_(new caffe::rng_t(seed)) {}
caffe::rng_t* rng() { return rng_.get(); }
private:
shared_ptr<caffe::rng_t> rng_;
};
Caffe::RNG::RNG() : generator_(new Generator()) { }
Caffe::RNG::RNG(unsigned int seed) : generator_(new Generator(seed)) { }
Caffe::RNG& Caffe::RNG::operator=(const RNG& other) {
generator_ = other.generator_;
return *this;
}
void* Caffe::RNG::generator() {
return static_cast<void*>(generator_->rng());
}
#else // Normal GPU + CPU Caffe.
Caffe::Caffe()
/*构造函数solver_count_赋值为1,root_solver_赋值为true,
cublas_handle_和curand_generator_貌似只在使用gpu模式才用到 */
: cublas_handle_(NULL), curand_generator_(NULL), random_generator_(),
mode_(Caffe::CPU), solver_count_(1), root_solver_(true) {
// Try to create a cublas handler, and report an error if failed (but we will
// keep the program running as one might just want to run CPU code).
if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) {
LOG(ERROR) << "Cannot create Cublas handle. Cublas won't be available.";
}
// Try to create a curand handler.
if (curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)
!= CURAND_STATUS_SUCCESS ||
curandSetPseudoRandomGeneratorSeed(curand_generator_, cluster_seedgen())
!= CURAND_STATUS_SUCCESS) {
LOG(ERROR) << "Cannot create Curand generator. Curand won't be available.";
}
}
Caffe::~Caffe() { //析构函数
if (cublas_handle_) CUBLAS_CHECK(cublasDestroy(cublas_handle_));
if (curand_generator_) {
CURAND_CHECK(curandDestroyGenerator(curand_generator_));
}
}
//设定随机数种子
void Caffe::set_random_seed(const unsigned int seed) {
// Curand seed
static bool g_curand_availability_logged = false;
if (Get().curand_generator_) {
CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(curand_generator(),
seed));
CURAND_CHECK(curandSetGeneratorOffset(curand_generator(), 0));
} else {
if (!g_curand_availability_logged) {
LOG(ERROR) <<
"Curand not available. Skipping setting the curand seed.";
g_curand_availability_logged = true;
}
}
// RNG seed
Get().random_generator_.reset(new RNG(seed));
}
//没看
void Caffe::SetDevice(const int device_id) {
int current_device;
CUDA_CHECK(cudaGetDevice(¤t_device));
if (current_device == device_id) {
return;
}
// The call to cudaSetDevice must come before any calls to Get, which
// may perform initialization using the GPU.
CUDA_CHECK(cudaSetDevice(device_id));
if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_));
if (Get().curand_generator_) {
CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_));
}
CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_));
CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_,
CURAND_RNG_PSEUDO_DEFAULT));
CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_,
cluster_seedgen()));
}
//没看
void Caffe::DeviceQuery() {
cudaDeviceProp prop;
int device;
if (cudaSuccess != cudaGetDevice(&device)) {
printf("No cuda device present.\n");
return;
}
CUDA_CHECK(cudaGetDeviceProperties(&prop, device));
LOG(INFO) << "Device id: " << device;
LOG(INFO) << "Major revision number: " << prop.major;
LOG(INFO) << "Minor revision number: " << prop.minor;
LOG(INFO) << "Name: " << prop.name;
LOG(INFO) << "Total global memory: " << prop.totalGlobalMem;
LOG(INFO) << "Total shared memory per block: " << prop.sharedMemPerBlock;
LOG(INFO) << "Total registers per block: " << prop.regsPerBlock;
LOG(INFO) << "Warp size: " << prop.warpSize;
LOG(INFO) << "Maximum memory pitch: " << prop.memPitch;
LOG(INFO) << "Maximum threads per block: " << prop.maxThreadsPerBlock;
LOG(INFO) << "Maximum dimension of block: "
<< prop.maxThreadsDim[0] << ", " << prop.maxThreadsDim[1] << ", "
<< prop.maxThreadsDim[2];
LOG(INFO) << "Maximum dimension of grid: "
<< prop.maxGridSize[0] << ", " << prop.maxGridSize[1] << ", "
<< prop.maxGridSize[2];
LOG(INFO) << "Clock rate: " << prop.clockRate;
LOG(INFO) << "Total constant memory: " << prop.totalConstMem;
LOG(INFO) << "Texture alignment: " << prop.textureAlignment;
LOG(INFO) << "Concurrent copy and execution: "
<< (prop.deviceOverlap ? "Yes" : "No");
LOG(INFO) << "Number of multiprocessors: " << prop.multiProcessorCount;
LOG(INFO) << "Kernel execution timeout: "
<< (prop.kernelExecTimeoutEnabled ? "Yes" : "No");
return;
}
//没看
bool Caffe::CheckDevice(const int device_id) {
// This function checks the availability of GPU #device_id.
// It attempts to create a context on the device by calling cudaFree(0).
// cudaSetDevice() alone is not sufficient to check the availability.
// It lazily records device_id, however, does not initialize a
// context. So it does not know if the host thread has the permission to use
// the device or not.
//
// In a shared environment where the devices are set to EXCLUSIVE_PROCESS
// or EXCLUSIVE_THREAD mode, cudaSetDevice() returns cudaSuccess
// even if the device is exclusively occupied by another process or thread.
// Cuda operations that initialize the context are needed to check
// the permission. cudaFree(0) is one of those with no side effect,
// except the context initialization.
bool r = ((cudaSuccess == cudaSetDevice(device_id)) &&
(cudaSuccess == cudaFree(0)));
// reset any error that may have occurred.
cudaGetLastError();
return r;
}
//没看
int Caffe::FindDevice(const int start_id) {
// This function finds the first available device by checking devices with
// ordinal from start_id to the highest available value. In the
// EXCLUSIVE_PROCESS or EXCLUSIVE_THREAD mode, if it succeeds, it also
// claims the device due to the initialization of the context.
int count = 0;
CUDA_CHECK(cudaGetDeviceCount(&count));
for (int i = start_id; i < count; i++) {
if (CheckDevice(i)) return i;
}
return -1;
}
class Caffe::RNG::Generator {
public:
//两个构造函数,但是都利用随机数种子int64_t
Generator() : rng_(new caffe::rng_t(cluster_seedgen())) {}
explicit Generator(unsigned int seed) : rng_(new caffe::rng_t(seed)) {}
//返回rng_t指针
caffe::rng_t* rng() { return rng_.get(); }
private:
shared_ptr<caffe::rng_t> rng_;
};
//生成一个新的generator_
Caffe::RNG::RNG() : generator_(new Generator()) { }
Caffe::RNG::RNG(unsigned int seed) : generator_(new Generator(seed)) { }
Caffe::RNG& Caffe::RNG::operator=(const RNG& other) {//拷贝构造
generator_.reset(other.generator_.get());
return *this;
}
void* Caffe::RNG::generator() { //返回(void*)rng_t
return static_cast<void*>(generator_->rng());
}
const char* cublasGetErrorString(cublasStatus_t error) {
switch (error) {
case CUBLAS_STATUS_SUCCESS:
return "CUBLAS_STATUS_SUCCESS";
case CUBLAS_STATUS_NOT_INITIALIZED:
return "CUBLAS_STATUS_NOT_INITIALIZED";
case CUBLAS_STATUS_ALLOC_FAILED:
return "CUBLAS_STATUS_ALLOC_FAILED";
case CUBLAS_STATUS_INVALID_VALUE:
return "CUBLAS_STATUS_INVALID_VALUE";
case CUBLAS_STATUS_ARCH_MISMATCH:
return "CUBLAS_STATUS_ARCH_MISMATCH";
case CUBLAS_STATUS_MAPPING_ERROR:
return "CUBLAS_STATUS_MAPPING_ERROR";
case CUBLAS_STATUS_EXECUTION_FAILED:
return "CUBLAS_STATUS_EXECUTION_FAILED";
case CUBLAS_STATUS_INTERNAL_ERROR:
return "CUBLAS_STATUS_INTERNAL_ERROR";
#if CUDA_VERSION >= 6000
case CUBLAS_STATUS_NOT_SUPPORTED:
return "CUBLAS_STATUS_NOT_SUPPORTED";
#endif
#if CUDA_VERSION >= 6050
case CUBLAS_STATUS_LICENSE_ERROR:
return "CUBLAS_STATUS_LICENSE_ERROR";
#endif
}
return "Unknown cublas status";
}
const char* curandGetErrorString(curandStatus_t error) {
switch (error) {
case CURAND_STATUS_SUCCESS:
return "CURAND_STATUS_SUCCESS";
case CURAND_STATUS_VERSION_MISMATCH:
return "CURAND_STATUS_VERSION_MISMATCH";
case CURAND_STATUS_NOT_INITIALIZED:
return "CURAND_STATUS_NOT_INITIALIZED";
case CURAND_STATUS_ALLOCATION_FAILED:
return "CURAND_STATUS_ALLOCATION_FAILED";
case CURAND_STATUS_TYPE_ERROR:
return "CURAND_STATUS_TYPE_ERROR";
case CURAND_STATUS_OUT_OF_RANGE:
return "CURAND_STATUS_OUT_OF_RANGE";
case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
case CURAND_STATUS_LAUNCH_FAILURE:
return "CURAND_STATUS_LAUNCH_FAILURE";
case CURAND_STATUS_PREEXISTING_FAILURE:
return "CURAND_STATUS_PREEXISTING_FAILURE";
case CURAND_STATUS_INITIALIZATION_FAILED:
return "CURAND_STATUS_INITIALIZATION_FAILED";
case CURAND_STATUS_ARCH_MISMATCH:
return "CURAND_STATUS_ARCH_MISMATCH";
case CURAND_STATUS_INTERNAL_ERROR:
return "CURAND_STATUS_INTERNAL_ERROR";
}
return "Unknown curand status";
}
#endif // CPU_ONLY
} // namespace caffe