keepalived source code parsing -- vrrp_dispatcher_read()

The vrrp thread will continuously call vrrp dispatcher read() to obtain the vrrp notification:
1. Call recvmsg() to get vrrp notification;
2. Get vrrp header;
3. According to the VRID in the vrrp header, judge whether the local vrrp and the opposite vrrp are in the same virtual route;
4. If the two ends are not in the same virtual route, the notification will be discarded;
5. According to the current status of the local vrrp (master/back), the notifications are analyzed and processed. If the current status is yes, you need to judge whether to switch to back.

/* Our read packet dispatcher */
static int
vrrp_read_dispatcher_thread(thread_ref_t thread)
{
	sock_t *sock;
	int fd;

	/* Fetch thread arg */
	sock = THREAD_ARG(thread);

	/* Dispatcher state handler */
	if (thread->type == THREAD_READ_TIMEOUT || sock->fd_in == -1)
		fd = vrrp_dispatcher_read_timeout(sock);
	else
		fd = vrrp_dispatcher_read(sock);/* Receive vrrp notification */

	/* register next dispatcher thread */
	if (fd != -1)
		/* Call epoll? CTL to register socket Sock - > FD? In to master - > epoll? FD */
		sock->thread = thread_add_read_sands(thread->master, vrrp_read_dispatcher_thread,
						     sock, fd, vrrp_compute_timer(sock), false);

	return 0;
}
/* Handle dispatcher read packet */
static int
vrrp_dispatcher_read(sock_t *sock)
{
	vrrp_t *vrrp;
	const vrrphdr_t *hd;
	ssize_t len = 0;
	int prev_state = 0;
	struct sockaddr_storage src_addr = { .ss_family = AF_UNSPEC };
	vrrp_t vrrp_lookup;
#ifdef _NETWORK_TIMESTAMP_
	char control_buf[128];
#else
	char control_buf[64];
#endif
	struct iovec iovec = { .iov_base = vrrp_buffer, .iov_len = vrrp_buffer_len };
	struct msghdr msghdr = { .msg_name = &src_addr, .msg_namelen = sizeof(src_addr),
				 .msg_iov = &iovec, .msg_iovlen = 1,
				 .msg_control = control_buf, .msg_controllen = sizeof(control_buf) };
	struct cmsghdr *cmsg;
	bool expected_cmsg;
	unsigned eintr_count;
	unsigned long rx_vrid_map[BIT_WORD(256 + BIT_PER_LONG - 1)] = { 0 };
	bool terminate_receiving = false;
#ifdef DEBUG_RECVMSG
	unsigned recv_data_count = 0;
#endif

	/* Strategy here is to handle incoming adverts pending into socket recvq
	 * but stop if receive 2nd advert for a VRID on socket (this applies to
	 * both configured and unconfigured VRIDs).
	 * Seems a good tradeoff while simulating */
	while (!terminate_receiving) {
		/* read & affect received buffer */
		eintr_count = 0;
		/*
		 MSG_TRUNC: If the user's buffer size is not enough to fully copy the data in the buffer, the data will be truncated, only the user's buffer size data will be copied, and other data will be discarded.
		 MSG_CTRUNC: If the buffer space is insufficient, some control data has been discarded.
		*/
		while ((len = recvmsg(sock->fd_in, &msghdr, MSG_TRUNC | MSG_CTRUNC)) == -1 &&
		       check_EINTR(errno) && eintr_count++ < 10);
		
		/* Error reading data, errno indicates the error reason */
		if (len < 0) {
#ifdef DEBUG_RECVMSG
			if (check_EINTR(errno))
				log_message(LOG_INFO, "recvmsg(%d) looped %u times due to EINTR before terminating loop"
						    , sock->fd_in, eintr_count);
#endif
			/*
			#define check_EAGAIN(xx)      ((xx) == EAGAIN || (xx) == EWOULDBLOCK)
			EAGAIN: Please try again later.
			EWOULDBLOCK: During asynchronous socket operations, such as connect, recv, send, etc., the asynchronous operation was not completed at that time,
			And a state of return. This state does not indicate that the current operation failed, but that the operation is in progress.
			*/
			if (!check_EAGAIN(errno))
				log_message(LOG_INFO, "recvmsg(%d) returned %d (%m)"
						    , sock->fd_in, errno);
#ifdef DEBUG_RECVMSG
			else if (recv_data_count == 0)
				log_message(LOG_INFO, "recvmsg(%d) returned EAGAIN without any data being received"
						    , sock->fd_in);

			if (recv_data_count != 1)
				log_message(LOG_INFO, "recvmsg(%d) loop received %u packets"
						    , sock->fd_in, recv_data_count);
#endif
			break;
		}

#ifdef DEBUG_RECVMSG
		if (eintr_count)
			log_message(LOG_INFO, "recvmsg(%d) looped %u times due to EINTR before returning %ld"
					    , sock->fd_in, eintr_count, len);
#endif

		/* Data not read */
		if (len == 0) {
			log_message(LOG_INFO, "recvmsg(%d) returned data length 0", sock->fd_in);
			continue;
		}
		
		/* Read to data */
#ifdef DEBUG_RECVMSG
		recv_data_count++;
#endif
		
		/* Data is truncated, ignoring received data */
		if (msghdr.msg_flags & MSG_TRUNC) {
			log_message(LOG_INFO, "recvmsg(%d) message truncated from %zd to %zu bytes"
					    , sock->fd_in, len, vrrp_buffer_len);
			continue;
		}
		
		/* Control data is truncated */
		if (msghdr.msg_flags & MSG_CTRUNC) {
			log_message(LOG_INFO, "recvmsg(%d), control message truncated from %zu to %" PRI_MSG_CONTROLLEN " bytes"
					    , sock->fd_in, sizeof(control_buf), msghdr.msg_controllen);
			msghdr.msg_controllen = 0;
		}

		/* Get VRRP header */
		if (!(hd = vrrp_get_header(sock->family, vrrp_buffer, len)))
			break;

		/* Defense strategy here is to handle no more than one advert
		 * per VRID in order to flush socket rcvq...
		 * This is a best effort mitigation */
		if (__test_and_set_bit(hd->vrid, rx_vrid_map))
			terminate_receiving = true;
		
		/* The identity of the virtual router. Routers in the same virtual router have the same VRID.*/
		vrrp_lookup.vrid = hd->vrid;
		vrrp = rb_search(&sock->rb_vrid, &vrrp_lookup, rb_vrid, vrrp_vrid_cmp);

		/* No instance found => ignore the advert */
		if (!vrrp) {
			if (global_data->log_unknown_vrids)
				log_message(LOG_INFO, "Unknown VRID(%d) received on interface(%s). ignoring..."
						    , hd->vrid, IF_NAME(sock->ifp));
			continue;
		}
		
		/* vrrp Ignore received data when in fault or initialization state */
		if (vrrp->state == VRRP_STATE_FAULT || vrrp->state == VRRP_STATE_INIT) {
			/* We just ignore a message received when we are in fault state or
			 * not yet fully initialised */
			continue;
		}

		/* Save non packet data */
		vrrp->pkt_saddr = src_addr;
		vrrp->hop_limit = -1;           /* Default to not received */
		vrrp->multicast_pkt = false;
		
		/* Traverse attached data objects */
		for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
			expected_cmsg = false;
			if (cmsg->cmsg_level == IPPROTO_IPV6) {
				expected_cmsg = true;

#ifdef IPV6_RECVHOPLIMIT
				if (cmsg->cmsg_type == IPV6_HOPLIMIT &&
				    cmsg->cmsg_len - sizeof(struct cmsghdr) == sizeof(unsigned int))
					vrrp->hop_limit = *(unsigned int *)CMSG_DATA(cmsg);
				else
#endif
#ifdef IPV6_RECVPKTINFO
				if (cmsg->cmsg_type == IPV6_PKTINFO &&
				    cmsg->cmsg_len - sizeof(struct cmsghdr) == sizeof(struct in6_pktinfo))
					vrrp->multicast_pkt = IN6_IS_ADDR_MULTICAST(&((struct in6_pktinfo *)CMSG_DATA(cmsg))->ipi6_addr);
				else
#endif
					expected_cmsg = false;
			}
#ifdef _NETWORK_TIMESTAMP_
			else if (do_network_timestamp && cmsg->cmsg_level == SOL_SOCKET) {
				struct timespec *ts = (void *)CMSG_DATA(cmsg);
				char time_buf[9];

				expected_cmsg = true;
				if (cmsg->cmsg_type == SO_TIMESTAMPNS) {
					strftime(time_buf, sizeof time_buf, "%T", localtime(&ts->tv_sec));
					log_message(LOG_INFO, "TIMESTAMPNS (socket %d - VRID %u) %s.%9.9ld"
							    , sock->fd_in, hd->vrid, time_buf, ts->tv_nsec);
				}
#if 0
				if (cmsg->cmsg_type == SO_TIMESTAMP) {
					struct timeval *tv = (void *)CMSG_DATA(cmsg);
					log_message(LOG_INFO, "TIMESTAMP message (%d - %u)  %ld.%9.9ld"
							    , sock->fd_in, hd->vrid, tv->tv_sec, tv->tv_usec);
				}
				else if (cmsg->cmsg_type == SO_TIMESTAMPING) {
					struct timespec *ts = (void *)CMSG_DATA(cmsg);
					log_message(LOG_INFO, "TIMESTAMPING message (%d - %u)  %ld.%9.9ld, raw %ld.%9.9ld"
							    , sock->fd_in, hd->vrid, ts->tv_sec, ts->tv_nsec, (ts+2)->tv_sec, (ts+2)->tv_nsec);
				}
#endif
				else
					expected_cmsg = false;
			}
#endif

			if (!expected_cmsg)
				log_message(LOG_INFO, "fd %d, unexpected control msg len %" PRI_MSG_CONTROLLEN ", level %d, type %d"
						    , sock->fd_in, cmsg->cmsg_len
						    , cmsg->cmsg_level, cmsg->cmsg_type);
		}

		prev_state = vrrp->state;

		if (vrrp->state == VRRP_STATE_BACK)
			/* If the local vrrp is back */
			vrrp_state_backup(vrrp, hd, vrrp_buffer, len);
		else if (vrrp->state == VRRP_STATE_MAST) {
			/* If the local vrrp is master, analyze the vrrp package to determine whether it needs to switch to back */
			if (vrrp_state_master_rx(vrrp, hd, vrrp_buffer, len))
				vrrp_state_leave_master(vrrp, false);
		} else
			log_message(LOG_INFO, "(%s) In dispatcher_read with state %d"
					    , vrrp->iname, vrrp->state);


		/* handle instance synchronization */
#ifdef _TSM_DEBUG_
		if (do_tsm_debug)
			log_message(LOG_INFO, "Read [%s] TSM transition : [%d,%d] Wantstate = [%d]"
					    , vrrp->iname, prev_state, vrrp->state, vrrp->wantstate);
#endif
		VRRP_TSM_HANDLE(prev_state, vrrp);

		/* If we have sent an advert, reset the timer */
		if (vrrp->state != VRRP_STATE_MAST || !vrrp->lower_prio_no_advert)
			vrrp_init_instance_sands(vrrp);
	}

	return sock->fd_in;
}
59 original articles published, praised 19, visited 50000+
Private letter follow

Tags: socket

Posted on Fri, 07 Feb 2020 11:28:54 -0500 by phrygius