banner
ekko

ekko's blog

时间不在于你拥有多少,而在于你怎样使用
github
xbox
email

ivshmem虛擬設備通信原理

本篇主要講解 ivshmem 虛擬設備後端與前端利用共享內存和中斷進行通信的原理及過程分析。
Pasted image 20250124101752
上圖中左側為 OEE 側,右側為 RTOS,通信過程主要通過中斷及共享內存實現,中斷處理及共享內存的初始化部分由 ivshmem pcie 驅動完成,本文後續內容建立在中斷可互相通知,共享內存可直接使用的基礎上。

後續內容中device端也是指 OEE,driver端是指 RTOS。

一、virtio_ring 結構體介紹#

#include <openamp/virtio_ring.h>
/**
 * @brief The virtqueue layout structure
 *
 * Each virtqueue consists of; descriptor table, available ring, used ring,
 * where each part is physically contiguous in guest memory.
 * 
 * 每個virtqueue包含:描述符環、可用環、已用環,每個部分在共享內存中是連續的。
 *
 * When the driver wants to send a buffer to the device, it fills in a slot in
 * the descriptor table (or chains several together), and writes the descriptor
 * index into the available ring. It then notifies the device. When the device
 * has finished a buffer, it writes the descriptor index into the used ring,
 * and sends an interrupt.
 * 
 * 當驅動端想要發送一個消息到設備端時,它會填充描述符環裡的一個(或將幾個鏈接在一起),並寫入可用環的索引。
 * 然後會發送中斷通知設備端。當設備完成接收消息後,它會將收到的描述符索引寫入已使用的環中,並發送中斷通知驅動端。  
 *
 * The standard layout for the ring is a continuous chunk of memory which
 * looks like this.  We assume num is a power of 2.
 * vring在共享內存中的數據排列如下:
 * struct vring {
 *      // The actual descriptors (16 bytes each)
 *      struct vring_desc desc[num];
 *
 *      // A ring of available descriptor heads with free-running index.
 *      __u16 avail_flags;
 *      __u16 avail_idx;
 *      __u16 available[num];
 *      __u16 used_event_idx;
 *
 *      // Padding to the next align boundary.
 *      char pad[];
 *
 *      // A ring of used descriptor heads with free-running index.
 *      __u16 used_flags;
 *      __u16 used_idx;
 *      struct vring_used_elem used[num];
 *      __u16 avail_event_idx;
 * };
 *
 * NOTE: for VirtIO PCI, align is 4096.
 */
struct vring {
	/**
	 * The maximum number of buffer descriptors in the virtqueue.
	 * The value is always a power of 2.
	 */
	unsigned int num;//描述符數量,可以理解為最大共享內存可容納的消息數量

	/** The actual buffer descriptors, 16 bytes each */
	struct vring_desc *desc;//指向共享內存中num個消息,描述符環

	/** A ring of available descriptor heads with free-running index */
	struct vring_avail *avail;//可用描述符環,驅動端寫,設備端讀

	/** A ring of used descriptor heads with free-running index */
	struct vring_used *used;//已用描述符環,設備端寫,驅動端讀
};

struct vring是實際通信過程中控制數據傳輸的一個數據結構,後端前端的數據通信都由該數據結構實現。
其中三個指針在初始化時都會指向共享內存區域。

/**
 * @brief VirtIO ring descriptors.
 *
 * The descriptor table refers to the buffers the driver is using for the
 * device. addr is a physical address, and the buffers can be chained via \ref next.
 * Each descriptor describes a buffer which is read-only for the device
 * (“device-readable”) or write-only for the device (“device-writable”), but a
 * chain of descriptors can contain both device-readable and device-writable
 * buffers.
 */
METAL_PACKED_BEGIN
struct vring_desc {
	/** Address (guest-physical) */
	uint64_t addr;//實際消息地址,在共享內存中

	/** Length */
	uint32_t len;//實際消息長度

	/** Flags relevant to the descriptors */
	uint16_t flags;

	/** We chain unused descriptors via this, too */
	uint16_t next;//0->1,1->2,環形索引
} METAL_PACKED_END;
/**
 * @brief Used to offer buffers to the device.
 *
 * Each ring entry refers to the head of a descriptor chain. It is only
 * written by the driver and read by the device.
 */
METAL_PACKED_BEGIN
struct vring_avail {
	/** Flag which determines whether device notifications are required */
	uint16_t flags;

	/**
	 * Indicates where the driver puts the next descriptor entry in the
	 * ring (modulo the queue size)
	 */
	uint16_t idx;//driver寫入下一个描述符索引

	/** The ring of descriptors */
	uint16_t ring[0];//描述符索引數組
} METAL_PACKED_END;
/* uint32_t is used here for ids for padding reasons. */
METAL_PACKED_BEGIN
struct vring_used_elem {
	union {
		uint16_t event;
		/* Index of start of used descriptor chain. */
		uint32_t id;
	};
	/* Total length of the descriptor chain which was written to. */
	uint32_t len;
} METAL_PACKED_END;
/**
 * @brief The device returns buffers to this structure when done with them
 *
 * The structure is only written to by the device, and read by the driver.
 */
METAL_PACKED_BEGIN
struct vring_used {
	/** Flag which determines whether device notifications are required */
	uint16_t flags;

	/**
	 * Indicates where the driver puts the next descriptor entry in the
	 * ring (modulo the queue size)
	 */
	uint16_t idx;//device寫入下一个描述符索引

	/** The ring of descriptors */
	struct vring_used_elem ring[0];
} METAL_PACKED_END;

vring 初始化代碼:

static inline void
vring_init(struct vring *vr, unsigned int num, uint8_t *p, unsigned long align)
{
	vr->num = num;
	vr->desc = (struct vring_desc *)p;//傳入共享內存起始地址
	vr->avail = (struct vring_avail *)(p + num * sizeof(struct vring_desc));//描述符的結束地址
	vr->used = (struct vring_used *)
	    (((unsigned long)&vr->avail->ring[num] + sizeof(uint16_t) +
	      align - 1) & ~(align - 1));//avail結束地址+對齊偏移
}

二、virtio_ring 通信流程#

1. 驅動端發送消息#

1.1 填入消息內容#

//當前空閒的描述符索引
u16 desc_index;
//獲取空閒的描述符指針
struct vring_desc *curr_desc = vring.desc[desc_index];
//填入的地址為相對共享內存地址的偏移量,每個索引的描述符對應的地址可自行規定確認方式
//後續傳輸的數據即放在共享內存的此塊內存
curr_desc->addr = OFFSET_SHMEM + MSG_LENGTH * desc_index;
//傳輸數據的長度
curr_desc->len = msg_len;
//0為消息的最後一個描述符,1為後續還有消息描述符
curr_desc->flags = 0;
//填入消息內容
memcpy(shmem + curr_desc->addr, msg_buff, msg_len);

1.2 發送消息#

以下所有索引相關的變量都有取餘描述符數量的操作,後續不作單獨說明

//當前可用索引指向剛剛填入的消息索引
vring.avail->ring[vring.avail->idx] = desc_index;
//空閒描述符索引更新,N為上一步傳輸的消息個數
desc_index += N;
//可用索引更新
vring.avail->idx++;
//中斷通知
ivshmem_notify();

2. 設備端接收消息#

//接收中斷通知
waitfor_notify();
//當前可用描述符索引
u16 avail_index;
//可用描述符索引未更新,說明沒有新消息
if(avail_index == vring.avail->idx)
	return;
//獲取消息描述符索引
u16 desc_index = vring.avail->ring[avail_index];
//獲取消息描述符
get:
struct vring_desc *curr_desc = vring.desc[desc_index];
//讀取消息內容
memcpy(msg_buff, shmem + curr_desc->addr, curr_desc->len);
//後續是否還有消息
if(curr_desc->flags != 0)
{
	desc_index++;
	goto get;
}

3. 設備端發送回覆#

//更新已用內容
vring.used->ring[vring.used->idx].id = desc_index;
vring.used->ring[vring.used->idx].len = curr_desc->len;
avail_index++;
vring.used->idx++;
//中斷通知
ivshmem_notify();

4. 驅動端接收回覆#

//接收中斷通知
waitfor_notify();
//當前已用描述符索引
u16 used_index;
//已用描述符索引未更新,說明沒有回覆內容
if(used_index == vring.used->idx)
	return;
//獲取回覆內容
u16 id = vring.used->ring[used_index].id;
u16 len = vring.used->ring[used_index].len;
used_index++;

三、兩端初始化 virtio_ring#

以下結構體為 linux 端通用,在共享內存中存放於起始地址

struct virtio_ivshmem_common_header {
	uint32_t revision;
	uint32_t size;

	uint32_t write_transaction;// 更新偏移量

	uint32_t device_features;
	uint32_t device_features_sel;
	uint32_t driver_features;
	uint32_t driver_features_sel;

	uint32_t queue_sel;//vring index

	uint16_t queue_size;//vring.num
	uint16_t queue_device_vector;
	uint16_t queue_driver_vector;
	uint16_t queue_enable;
	uint64_t queue_desc; //vring.desc
	uint64_t queue_driver;//vring.avail
	uint64_t queue_device;//vring.used

	uint8_t config_event;
	uint8_t queue_event;
	uint8_t __reserved[2];
	uint32_t device_status;//設為0xf後即為初始化完成

	uint32_t config_generation;
};

以下程序為 linux 端虛擬串口後端應用中的初始化部分,利用該結構體即可將 RTOS 端初始化好的 vring 的參數傳遞給 linux 端:

static int process_write_transaction(void)
{
	unsigned int new_queue;

	switch (vc->write_transaction) {
	case 0:
		return 0;
	case VI_REG_OFFSET(device_features_sel):
		printf("device_features_sel: %d\n", vc->device_features_sel);
		if (vc->device_features_sel == 1) {
			vc->device_features =
				(1 << (VIRTIO_F_VERSION_1 - 32)) |
				(1 << (VIRTIO_F_IOMMU_PLATFORM - 32)) |
				(1 << (VIRTIO_F_ORDER_PLATFORM - 32));
		} else {
			vc->device_features = 1 << VIRTIO_CONSOLE_F_SIZE;
		}
		break;
	case VI_REG_OFFSET(driver_features_sel):
		printf("driver_features_sel: %d\n", vc->driver_features_sel);
		break;
	case VI_REG_OFFSET(driver_features):
		printf("driver_features[%d]: 0x%x\n", vc->driver_features_sel,
		       vc->driver_features);
		break;
	case VI_REG_OFFSET(queue_sel):
		new_queue = vc->queue_sel;
		printf("queue_sel: %d\n", new_queue);
		if (new_queue > 1)
			break;

		if (current_queue >= 0)
			memcpy(&queue_config[current_queue], &vc->queue_config,
			    sizeof(struct virtio_queue_config));

		current_queue = new_queue;
		memcpy(&vc->queue_config, &queue_config[current_queue],
		       sizeof(struct virtio_queue_config));
		break;
	case VI_REG_OFFSET(queue_config.size):
		printf("queue size: %d\n", vc->queue_config.size);
		break;
	case VI_REG_OFFSET(queue_config.driver_vector):
		printf("queue driver vector: %d\n",
		       vc->queue_config.driver_vector);
		break;
	case VI_REG_OFFSET(queue_config.enable):
		printf("queue enable: %d\n", vc->queue_config.enable);
		if (current_queue >= 0 && vc->queue_config.enable) {
			memcpy(&queue_config[current_queue], &vc->queue_config,
			    sizeof(struct virtio_queue_config));
			vring[current_queue].num = vc->queue_config.size;
			vring[current_queue].desc =
				shmem + vc->queue_config.desc;
			vring[current_queue].avail =
				shmem + vc->queue_config.driver;
			vring[current_queue].used =
				shmem + vc->queue_config.device;
			next_idx[current_queue] = 0;
		}
		break;
	case VI_REG_OFFSET(queue_config.desc):
		printf("queue desc: 0x%llx\n",
		       (unsigned long long)vc->queue_config.desc);
		break;
	case VI_REG_OFFSET(queue_config.driver):
		printf("queue driver: 0x%llx\n",
		       (unsigned long long)vc->queue_config.driver);
		break;
	case VI_REG_OFFSET(queue_config.device):
		printf("queue device: 0x%llx\n",
		       (unsigned long long)vc->queue_config.device);
		break;
	case VI_REG_OFFSET(device_status):
		printf("device_status: 0x%x\n", vc->device_status);
		if (vc->device_status == 0xf) {
			vc->config_event = 1;
			__sync_synchronize();
			mmio_write32(&regs->doorbell, peer_id << 16);
		}
		break;
	default:
		printf("unknown write transaction for %x\n",
		       vc->write_transaction);
		break;
	}

	__sync_synchronize();
	vc->write_transaction = 0;

	return 1;
}

//device端主程序循環
while (state[peer_id] == VIRTIO_STATE_READY) {
			event = process_write_transaction();

			if (vc->device_status == 0xf) {
				event |= process_rx_queue();
				event |= process_tx_queue();
			}

			if (!event) {
				ret = poll(pollfd, 2, -1);
				if (ret < 0)
					error(1, errno, "poll failed");
				if (pollfd[1].revents & POLLIN)
					wait_for_interrupt(regs);
			}
		}

四、共享內存數據排列#

虛擬串口為例

地址數據功能
低地址struct virtio_ivshmem_header用於配置初始化及參數同步
vring[0]用於數據發送
vring[1]用於數據接收
高地址後續地址可用於 vring 描述符消息數據存儲用於實際數據存放

五、參考資料#

  1. https://kvm-forum.qemu.org/2019/KVM-Forum19_ivshmem2.pdf
  2. 深入淺出 vhostuser 傳輸模型 | REXROCK
載入中......
此文章數據所有權由區塊鏈加密技術和智能合約保障僅歸創作者所有。