This article mainly explains the principles and process analysis of communication between the ivshmem virtual device backend and frontend using shared memory and interrupts.
In the above image, the left side represents the OEE side, and the right side represents the RTOS. The communication process is mainly achieved through interrupts and shared memory. The interrupt handling and shared memory initialization parts are completed by the ivshmem PCIe driver. The subsequent content of this article is based on the assumption that interrupts can notify each other and that shared memory can be used directly.
In the subsequent content, the device
side refers to OEE, and the driver
side refers to RTOS.
I. Introduction to virtio_ring Structure#
#include <openamp/virtio_ring.h>
/**
* @brief The virtqueue layout structure
*
* Each virtqueue consists of; descriptor table, available ring, used ring,
* where each part is physically contiguous in guest memory.
*
* Each virtqueue contains: descriptor ring, available ring, used ring, each part is contiguous in shared memory.
*
* When the driver wants to send a buffer to the device, it fills in a slot in
* the descriptor table (or chains several together), and writes the descriptor
* index into the available ring. It then notifies the device. When the device
* has finished a buffer, it writes the descriptor index into the used ring,
* and sends an interrupt.
*
* When the driver wants to send a message to the device, it fills in one (or chains several) in the descriptor ring and writes the index into the available ring.
* Then it sends an interrupt to notify the device. When the device finishes receiving the message, it writes the received descriptor index into the used ring and sends an interrupt to notify the driver.
*
* The standard layout for the ring is a continuous chunk of memory which
* looks like this. We assume num is a power of 2.
* The data arrangement of vring in shared memory is as follows:
* struct vring {
* // The actual descriptors (16 bytes each)
* struct vring_desc desc[num];
*
* // A ring of available descriptor heads with free-running index.
* __u16 avail_flags;
* __u16 avail_idx;
* __u16 available[num];
* __u16 used_event_idx;
*
* // Padding to the next align boundary.
* char pad[];
*
* // A ring of used descriptor heads with free-running index.
* __u16 used_flags;
* __u16 used_idx;
* struct vring_used_elem used[num];
* __u16 avail_event_idx;
* };
*
* NOTE: for VirtIO PCI, align is 4096.
*/
struct vring {
/**
* The maximum number of buffer descriptors in the virtqueue.
* The value is always a power of 2.
*/
unsigned int num; // Number of descriptors, can be understood as the maximum number of messages that shared memory can hold
/** The actual buffer descriptors, 16 bytes each */
struct vring_desc *desc; // Pointer to num messages in shared memory, descriptor ring
/** A ring of available descriptor heads with free-running index */
struct vring_avail *avail; // Available descriptor ring, written by driver, read by device
/** A ring of used descriptor heads with free-running index */
struct vring_used *used; // Used descriptor ring, written by device, read by driver
};
struct vring
is a data structure that controls data transmission during actual communication, and data communication between the backend and frontend is implemented by this data structure.
All three pointers will point to the shared memory area during initialization.
/**
* @brief VirtIO ring descriptors.
*
* The descriptor table refers to the buffers the driver is using for the
* device. addr is a physical address, and the buffers can be chained via \ref next.
* Each descriptor describes a buffer which is read-only for the device
* (“device-readable”) or write-only for the device (“device-writable”), but a
* chain of descriptors can contain both device-readable and device-writable
* buffers.
*/
METAL_PACKED_BEGIN
struct vring_desc {
/** Address (guest-physical) */
uint64_t addr; // Actual message address in shared memory
/** Length */
uint32_t len; // Actual message length
/** Flags relevant to the descriptors */
uint16_t flags;
/** We chain unused descriptors via this, too */
uint16_t next; // 0->1, 1->2, circular index
} METAL_PACKED_END;
/**
* @brief Used to offer buffers to the device.
*
* Each ring entry refers to the head of a descriptor chain. It is only
* written by the driver and read by the device.
*/
METAL_PACKED_BEGIN
struct vring_avail {
/** Flag which determines whether device notifications are required */
uint16_t flags;
/**
* Indicates where the driver puts the next descriptor entry in the
* ring (modulo the queue size)
*/
uint16_t idx; // Driver writes the next descriptor index
/** The ring of descriptors */
uint16_t ring[0]; // Descriptor index array
} METAL_PACKED_END;
/* uint32_t is used here for ids for padding reasons. */
METAL_PACKED_BEGIN
struct vring_used_elem {
union {
uint16_t event;
/* Index of start of used descriptor chain. */
uint32_t id;
};
/* Total length of the descriptor chain which was written to. */
uint32_t len;
} METAL_PACKED_END;
/**
* @brief The device returns buffers to this structure when done with them
*
* The structure is only written to by the device, and read by the driver.
*/
METAL_PACKED_BEGIN
struct vring_used {
/** Flag which determines whether device notifications are required */
uint16_t flags;
/**
* Indicates where the driver puts the next descriptor entry in the
* ring (modulo the queue size)
*/
uint16_t idx; // Device writes the next descriptor index
/** The ring of descriptors */
struct vring_used_elem ring[0];
} METAL_PACKED_END;
vring initialization code:
static inline void
vring_init(struct vring *vr, unsigned int num, uint8_t *p, unsigned long align)
{
vr->num = num;
vr->desc = (struct vring_desc *)p; // Pass in the starting address of shared memory
vr->avail = (struct vring_avail *)(p + num * sizeof(struct vring_desc)); // End address of descriptors
vr->used = (struct vring_used *)
(((unsigned long)&vr->avail->ring[num] + sizeof(uint16_t) +
align - 1) & ~(align - 1)); // End address of avail + alignment offset
}
II. virtio_ring Communication Process#
1. Driver Sends Message#
1.1 Fill in Message Content#
// Current free descriptor index
u16 desc_index;
// Get pointer to free descriptor
struct vring_desc *curr_desc = vring.desc[desc_index];
// The filled address is the offset relative to shared memory, the address corresponding to each index descriptor can be confirmed by its own method
// The subsequent transmitted data is placed in this block of shared memory
curr_desc->addr = OFFSET_SHMEM + MSG_LENGTH * desc_index;
// Length of data to be transmitted
curr_desc->len = msg_len;
// 0 means this is the last descriptor of the message, 1 means there are more message descriptors
curr_desc->flags = 0;
// Fill in message content
memcpy(shmem + curr_desc->addr, msg_buff, msg_len);
1.2 Send Message#
All index-related variables below have modulo operations with the number of descriptors, which will not be explained separately later.
// Current available index points to the just filled message index
vring.avail->ring[vring.avail->idx] = desc_index;
// Update free descriptor index, N is the number of messages transmitted in the previous step
desc_index += N;
// Update available index
vring.avail->idx++;
// Interrupt notification
ivshmem_notify();
2. Device Receives Message#
// Receive interrupt notification
waitfor_notify();
// Current available descriptor index
u16 avail_index;
// If the available descriptor index has not been updated, it means there are no new messages
if(avail_index == vring.avail->idx)
return;
// Get message descriptor index
u16 desc_index = vring.avail->ring[avail_index];
// Get message descriptor
get:
struct vring_desc *curr_desc = vring.desc[desc_index];
// Read message content
memcpy(msg_buff, shmem + curr_desc->addr, curr_desc->len);
// Check if there are more messages
if(curr_desc->flags != 0)
{
desc_index++;
goto get;
}
3. Device Sends Reply#
// Update used content
vring.used->ring[vring.used->idx].id = desc_index;
vring.used->ring[vring.used->idx].len = curr_desc->len;
avail_index++;
vring.used->idx++;
// Interrupt notification
ivshmem_notify();
4. Driver Receives Reply#
// Receive interrupt notification
waitfor_notify();
// Current used descriptor index
u16 used_index;
// If the used descriptor index has not been updated, it means there is no reply content
if(used_index == vring.used->idx)
return;
// Get reply content
u16 id = vring.used->ring[used_index].id;
u16 len = vring.used->ring[used_index].len;
used_index++;
III. Both Ends Initialize virtio_ring#
The following structure is common on the Linux side and is stored at the starting address in shared memory.
struct virtio_ivshmem_common_header {
uint32_t revision;
uint32_t size;
uint32_t write_transaction; // Update offset
uint32_t device_features;
uint32_t device_features_sel;
uint32_t driver_features;
uint32_t driver_features_sel;
uint32_t queue_sel; // vring index
uint16_t queue_size; // vring.num
uint16_t queue_device_vector;
uint16_t queue_driver_vector;
uint16_t queue_enable;
uint64_t queue_desc; // vring.desc
uint64_t queue_driver; // vring.avail
uint64_t queue_device; // vring.used
uint8_t config_event;
uint8_t queue_event;
uint8_t __reserved[2];
uint32_t device_status; // Set to 0xf to indicate initialization is complete
uint32_t config_generation;
};
The following program is the initialization part in the Linux side virtual serial port backend application, which uses this structure to pass the parameters of the initialized vring from the RTOS side to the Linux side:
static int process_write_transaction(void)
{
unsigned int new_queue;
switch (vc->write_transaction) {
case 0:
return 0;
case VI_REG_OFFSET(device_features_sel):
printf("device_features_sel: %d\n", vc->device_features_sel);
if (vc->device_features_sel == 1) {
vc->device_features =
(1 << (VIRTIO_F_VERSION_1 - 32)) |
(1 << (VIRTIO_F_IOMMU_PLATFORM - 32)) |
(1 << (VIRTIO_F_ORDER_PLATFORM - 32));
} else {
vc->device_features = 1 << VIRTIO_CONSOLE_F_SIZE;
}
break;
case VI_REG_OFFSET(driver_features_sel):
printf("driver_features_sel: %d\n", vc->driver_features_sel);
break;
case VI_REG_OFFSET(driver_features):
printf("driver_features[%d]: 0x%x\n", vc->driver_features_sel,
vc->driver_features);
break;
case VI_REG_OFFSET(queue_sel):
new_queue = vc->queue_sel;
printf("queue_sel: %d\n", new_queue);
if (new_queue > 1)
break;
if (current_queue >= 0)
memcpy(&queue_config[current_queue], &vc->queue_config,
sizeof(struct virtio_queue_config));
current_queue = new_queue;
memcpy(&vc->queue_config, &queue_config[current_queue],
sizeof(struct virtio_queue_config));
break;
case VI_REG_OFFSET(queue_config.size):
printf("queue size: %d\n", vc->queue_config.size);
break;
case VI_REG_OFFSET(queue_config.driver_vector):
printf("queue driver vector: %d\n",
vc->queue_config.driver_vector);
break;
case VI_REG_OFFSET(queue_config.enable):
printf("queue enable: %d\n", vc->queue_config.enable);
if (current_queue >= 0 && vc->queue_config.enable) {
memcpy(&queue_config[current_queue], &vc->queue_config,
sizeof(struct virtio_queue_config));
vring[current_queue].num = vc->queue_config.size;
vring[current_queue].desc =
shmem + vc->queue_config.desc;
vring[current_queue].avail =
shmem + vc->queue_config.driver;
vring[current_queue].used =
shmem + vc->queue_config.device;
next_idx[current_queue] = 0;
}
break;
case VI_REG_OFFSET(queue_config.desc):
printf("queue desc: 0x%llx\n",
(unsigned long long)vc->queue_config.desc);
break;
case VI_REG_OFFSET(queue_config.driver):
printf("queue driver: 0x%llx\n",
(unsigned long long)vc->queue_config.driver);
break;
case VI_REG_OFFSET(queue_config.device):
printf("queue device: 0x%llx\n",
(unsigned long long)vc->queue_config.device);
break;
case VI_REG_OFFSET(device_status):
printf("device_status: 0x%x\n", vc->device_status);
if (vc->device_status == 0xf) {
vc->config_event = 1;
__sync_synchronize();
mmio_write32(®s->doorbell, peer_id << 16);
}
break;
default:
printf("unknown write transaction for %x\n",
vc->write_transaction);
break;
}
__sync_synchronize();
vc->write_transaction = 0;
return 1;
}
// Device side main program loop
while (state[peer_id] == VIRTIO_STATE_READY) {
event = process_write_transaction();
if (vc->device_status == 0xf) {
event |= process_rx_queue();
event |= process_tx_queue();
}
if (!event) {
ret = poll(pollfd, 2, -1);
if (ret < 0)
error(1, errno, "poll failed");
if (pollfd[1].revents & POLLIN)
wait_for_interrupt(regs);
}
}
IV. Shared Memory Data Arrangement#
Taking the virtual serial port as an example
Address | Data | Function |
---|---|---|
Low Address | struct virtio_ivshmem_header | Used for configuration initialization and parameter synchronization |
vring[0] | Used for data sending | |
vring[1] | Used for data receiving | |
High Address | Subsequent addresses can be used for vring descriptor message data storage | Used for actual data storage |