Etcd源码分析-数据字典

channel名称	写入	读取	作用
EtcdServer->readych chan struct{}		文件：etcdmain/etcd.go 方法：startEtcd 变量：e.Server.ReadyNotify	表示加入集群
EtcdServer->done		文件：etcdmain/etcd.go 方法：startEtcdOrProxyV2 变量：stopped	表示退出进程
node->tickc	文件：node.go 方法：Tick() n.tickc <- struct{}{}	文件：node.go 方法：(n *node) run select case <-n.tick:	当读取到数据则表示定时器超时。例如：当选举定时器超时后执行回调函数，进行选举
node->readyc	文件：node.go 方法：(n *node) run select case readyc <- rd:	文件：etcdserver/raft.go 方法：start select case rd := <- r.Ready()	用leader、candidate发送消息到follower，还有一些其他作用。
raftNode->ticker.C	golang sleep.go sendTime()	文件：etcdserver/raft.go 方法：start case <-r.ticker.C:	心跳时间超时

包含当前EtcdServer以及监听器
type Etcd struct {
	Peers   []*peerListener  /* 集群成员监听器 */
	Clients []net.Listener    /* 客户端监听器 */
	Server  *etcdserver.EtcdServer

	cfg   Config
	stopc chan struct{}
	errc  chan error
	sctxs map[string]*serveCtx /* 服务上下文件 我理解成session，上面客户端监听器来自此字段 */

	closeOnce sync.Once
}

// EtcdServer is the production implementation of the Server interface
// 实现了Raf Interface接口
type EtcdServer struct {
// inflightSnapshots holds count the number of snapshots currently inflight.
inflightSnapshots int64 // must use atomic operations to access; keep 64-bit aligned.
appliedIndex uint64 // must use atomic operations to access; keep 64-bit aligned.
committedIndex uint64 // must use atomic operations to access; keep 64-bit aligned.
// consistIndex used to hold the offset of current executing entry
// It is initialized to 0 before executing any entry.
consistIndex consistentIndex // must use atomic operations to access; keep 64-bit aligned.
Cfg *ServerConfig

readych chan struct{} /* 表示已经加入集群 */
r raftNode /* 表示集群节点 */

snapCount uint64

w wait.Wait

readMu sync.RWMutex
// read routine notifies etcd server that it waits for reading by sending an empty struct to
// readwaitC
readwaitc chan struct{}
// readNotifier is used to notify the read routine that it can process the request
// when there is no error
readNotifier *notifier

// stop signals the run goroutine should shutdown.
stop chan struct{}
// stopping is closed by run goroutine on shutdown.
stopping chan struct{}
// done is closed when all goroutines from start() complete.
// 用于退出进程
done chan struct{}

errorc chan error
id types.ID
attributes membership.Attributes

cluster *membership.RaftCluster

store store.Store
snapshotter *snap.Snapshotter

applyV2 ApplierV2

// applyV3 is the applier with auth and quotas
applyV3 applierV3
// applyV3Base is the core applier without auth or quotas
applyV3Base applierV3
applyWait wait.WaitTime

kv mvcc.ConsistentWatchableKV
lessor lease.Lessor
bemu sync.Mutex
be backend.Backend
authStore auth.AuthStore
alarmStore *alarm.AlarmStore

stats *stats.ServerStats
lstats *stats.LeaderStats

SyncTicker *time.Ticker
// compactor is used to auto-compact the KV.
compactor *compactor.Periodic

// peerRt used to send requests (version, lease) to peers.
peerRt http.RoundTripper
reqIDGen *idutil.Generator

// forceVersionC is used to force the version monitor loop
// to detect the cluster version immediately.
forceVersionC chan struct{}

// wgMu blocks concurrent waitgroup mutation while server stopping
wgMu sync.RWMutex
// wg is used to wait for the go routines that depends on the server state
// to exit when stopping the server.
wg sync.WaitGroup

// ctx is used for etcd-initiated requests that may need to be canceled
// on etcd server shutdown.
ctx context.Context
cancel context.CancelFunc

leadTimeMu sync.RWMutex
leadElectedTime time.Time
}

type raftNode struct {
	// Cache of the latest raft index and raft term the server has seen.
	// These three unit64 fields must be the first elements to keep 64-bit
	// alignment for atomic access to the fields.
	index uint64
	term  uint64
	lead  uint64

	raftNodeConfig /* 匿名组合 */

	// a chan to send/receive snapshot
	msgSnapC chan raftpb.Message

	// a chan to send out apply
	applyc chan apply

	// a chan to send out readState
	readStateC chan raft.ReadState

	// utility
	ticker *time.Ticker
	// contention detectors(探测器) for raft heartbeat message
	td *contention.TimeoutDetector

	stopped chan struct{}
	done    chan struct{}
}

type raftNodeConfig struct {
	// to check if msg receiver is removed from cluster
	isIDRemoved func(id uint64) bool
	raft.Node  /* 匿名组合 */
	raftStorage *raft.MemoryStorage  /* 动态存储 内存 */
	storage     Storage            /* 静态存储 磁盘 包括WAL文件和Snapshot文件*/
	heartbeat   time.Duration // for logging
	// transport specifies the transport to send and receive msgs to members.
	// Sending messages MUST NOT block. It is okay to drop messages, since
	// clients should timeout and reissue their messages.
	// If transport is nil, server will panic.
	transport rafthttp.Transporter
}

// Node represents a node in a raft cluster.
type Node interface {
// Tick increments the internal logical clock for the Node by a single tick. Election
// timeouts and heartbeat timeouts are in units of ticks.
Tick()
// Campaign causes the Node to transition to candidate state and start campaigning to become leader.
Campaign(ctx context.Context) error
// Propose proposes that data be appended to the log.
Propose(ctx context.Context, data []byte) error
// ProposeConfChange proposes config change.
// At most one ConfChange can be in the process of going through consensus.
// Application needs to call ApplyConfChange when applying EntryConfChange type entry.
ProposeConfChange(ctx context.Context, cc pb.ConfChange) error
// Step advances the state machine using the given message. ctx.Err() will be returned, if any.
// 接收到消息 进行处理，进行状态机迁移
Step(ctx context.Context, msg pb.Message) error

// Ready returns a channel that returns the current point-in-time state.
// Users of the Node must call Advance after retrieving the state returned by Ready.
//
// NOTE: No committed entries from the next Ready may be applied until all committed entries
// and snapshots from the previous one have finished.
Ready() <-chan Ready

// Advance notifies the Node that the application has saved progress up to the last Ready.
// It prepares the node to return the next available Ready.
//
// The application should generally call Advance after it applies the entries in last Ready.
//
// However, as an optimization, the application may call Advance while it is applying the
// commands. For example. when the last Ready contains a snapshot, the application might take
// a long time to apply the snapshot data. To continue receiving Ready without blocking raft
// progress, it can call Advance before finishing applying the last ready.
Advance()
// ApplyConfChange applies config change to the local node.
// Returns an opaque ConfState protobuf which must be recorded
// in snapshots. Will never return nil; it returns a pointer only
// to match MemoryStorage.Compact.
ApplyConfChange(cc pb.ConfChange) *pb.ConfState

// TransferLeadership attempts to transfer leadership to the given transferee.
TransferLeadership(ctx context.Context, lead, transferee uint64)

// ReadIndex request a read state. The read state will be set in the ready.
// Read state has a read index. Once the application advances further than the read
// index, any linearizable read requests issued before the read request can be
// processed safely. The read state will have the same rctx attached.
ReadIndex(ctx context.Context, rctx []byte) error

// Status returns the current status of the raft state machine.
Status() Status
// ReportUnreachable reports the given node is not reachable for the last send.
ReportUnreachable(id uint64)
// ReportSnapshot reports the status of the sent snapshot.
ReportSnapshot(id uint64, status SnapshotStatus)
// Stop performs any necessary termination of the Node.
Stop()
}

集群对象 保存已加入集群成员以及从集群中掉线的
type RaftCluster struct {
	id    types.ID
	token string   //集群唯一标识

	store store.Store
	be    backend.Backend

	sync.Mutex // guards the fields below
	version    *semver.Version
	members    map[types.ID]*Member
	// removed contains the ids of removed members in the cluster.
	// removed id cannot be reused.
	removed map[types.ID]bool
}

// node is the canonical implementation of the Node interface
type node struct {
	propc      chan pb.Message
	recvc      chan pb.Message
	confc      chan pb.ConfChange
	confstatec chan pb.ConfState
	readyc     chan Ready   //表示完成
	advancec   chan struct{}
	tickc      chan struct{}  //各种定时器超时 例如：选举定时器，超时后进行选举
	done       chan struct{}
	stop       chan struct{}
	status     chan chan Status

	logger Logger
}

type raft struct {
id uint64 // 集群节点id 唯一标识

Term uint64 //任期
Vote uint64 //可能保存的是id,含义是要为这个id进行投票

readStates []ReadState

// the log
raftLog *raftLog

maxInflight int
maxMsgSize uint64
prs map[uint64]*Progress

state StateType /* raft角色 */

votes map[uint64]bool /* key -- 对端raft id value -- true表示投票给自己 false表示没有投票*/

msgs []pb.Message /* 消息队列 所有发送消息均保存在这里 */

// the leader id
lead uint64
// leadTransferee is id of the leader transfer target when its value is not zero.
// Follow the procedure defined in raft thesis 3.10.
leadTransferee uint64
// New configuration is ignored if there exists unapplied configuration.
pendingConf bool

readOnly *readOnly

// number of ticks since it reached last electionTimeout when it is leader
// or candidate.
// number of ticks since it reached last electionTimeout or received a
// valid message from current leader when it is a follower.
electionElapsed int

// number of ticks since it reached last heartbeatTimeout.
// only leader keeps heartbeatElapsed.
heartbeatElapsed int

checkQuorum bool
preVote bool

heartbeatTimeout int
electionTimeout int
// randomizedElectionTimeout is a random number between
// [electiontimeout, 2 * electiontimeout - 1]. It gets reset
// when raft changes its state to follower or candidate.
randomizedElectionTimeout int

/* 超时定时器回调函数 例如：选举超时定时器，超时后进行选举，成为leader后变成心跳定时器 */
tick func()
step stepFunc

logger Logger
}

// unstable.entries[i] has raft log position i+unstable.offset.
// Note that unstable.offset may be less than the highest log
// position in storage; this means that the next write to storage
// might need to truncate the log before persisting unstable.entries.
//保存未提交的entries
//下一个可写位置为 i+unstable.offset
type unstable struct {
	// the incoming unstable snapshot, if any.
	snapshot *pb.Snapshot
	// all entries that have not yet been written to storage.
	entries []pb.Entry
	offset  uint64

	logger Logger
}

type raftLog struct {
	// storage contains all stable entries since the last snapshot.
	// 保存自最后一个snapshot之后所有稳定的entries
	// MemoryStorage
	storage Storage

	// unstable contains all unstable entries and snapshot.
	// they will be saved into storage.
	// 未提交的entries，最后会写到Storage，即MemoryStore
	unstable unstable

	// committed is the highest log position that is known to be in
	// stable storage on a quorum of nodes.
	// 最后一次提交的索引
	committed uint64

	// applied is the highest log position that the application has
	// been instructed to apply to its state machine.
	// Invariant: applied <= committed
	// 表示应用 已经把entry应用到状态机中 最后一个提交索引，applied始终小于等于committed
	applied uint64

	logger Logger
}

这篇基本上没有什么技术含量，只是把一些数据结构总结一下，用于方便查找与理解。

Etcd源码分析-数据字典

猜你喜欢