CS-develop-qemu-device-入门

qemu device demo

device code

首先下载qemu的源码.

1
2
3
wget https://download.qemu.org/qemu-7.1.0.tar.xz
tar -xvJf qemu-7.1.0.tar.xz
cd qemu-7.1.0

hw/misc里添加的device代码.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
// hw/misc/w4rd3n.c
#include "qemu/osdep.h"
#include "hw/pci/pci.h"

struct W4rd3nState{
PCIDevice parent_obj;
// PCI设备固有
MemoryRegion io;
MemoryRegion mmio;
// 使用MemoryRegion表示对应的内存空间,同时由对应的MemoryRegionOps来描述其操作
qemu_irq irq;
unsigned int dma_size;
char * dma_buf;
int threw_irq;
int id;
};

#define TYPE_PCI_W4RD3N_DEV "pci-w4rd3ndev"
typedef struct W4rd3nState W4rd3nState;
DECLARE_INSTANCE_CHECKER(W4rd3nState, W4RD3N, TYPE_PCI_W4RD3N_DEV)

static void w4rd3n_iowrite(void * opaque, hwaddr addr, uint64_t value, unsigned size) {
int i;
W4rd3nState * d = (W4rd3nState *)opaque;
PCIDevice * pci_dev = (PCIDevice *)opaque;
printf("Write Ordered, addr = %x, value = %lu, size = %d\n", (unsigned)addr, value, size);
switch (addr) {
case 0:
if (value) {
printf("irq assert\n");
d->threw_irq = 1;
pci_irq_assert(pci_dev);
} else {
printf("irq deassert\n");
pci_irq_deassert(pci_dev);
d->threw_irq = 0;
}
break;
case 4:
for ( i = 0; i < d->dma_size; ++i)
d->dma_buf[i] = rand();
pci_dma_write(&(d->parent_obj), value, (void *)d->dma_buf, d->dma_size);
break;
default:
printf("Io not used\n");
}
}

static uint64_t w4rd3n_ioread(void * opaque, hwaddr addr, unsigned size) {
W4rd3nState * d = (W4rd3nState *)opaque;
printf("Read Ordered, addr = %x, size = %d\n", (unsigned)addr, size);
switch (addr) {
case 0:
return d->threw_irq;
case 4:
printf("id\n");
return d->id;
default:
printf("Io not used\n");
return 0x0;
}
}

static uint64_t w4rd3n_mmioread(void * opaque, hwaddr addr, unsigned size) {
W4rd3nState * d = (W4rd3nState *)opaque;
printf("MMIO Read Ordered, addr = %x, size = %d\n", (unsigned)addr, size);
switch (addr) {
case 0:
printf("irq_status\n");
return d->threw_irq;
case 4:
printf("id\n");
return d->id;
default:
printf("MMIO not used\n");
return 0x0;
}
}

static void w4rd3n_mmiowrite(void * opaque, hwaddr addr, uint64_t value, unsigned size) {
W4rd3nState * d = (W4rd3nState *)opaque;
printf("MMIO write Ordered, addr = %x, value = %lu, size = %d\n", (unsigned)addr, value, size);
switch (addr) {
case 4:
d->id = value;
break;
default:
printf("MMIO not writable or not used\n");
}
}

static const MemoryRegionOps w4rd3n_mmio_ops = {
.read = w4rd3n_mmioread,
.write = w4rd3n_mmiowrite,
.endianness = DEVICE_NATIVE_ENDIAN,
.valid = {
.min_access_size = 4,
.max_access_size = 4,
},
};

static const MemoryRegionOps w4rd3n_io_ops = {
.read = w4rd3n_ioread,
.write = w4rd3n_iowrite,
.endianness = DEVICE_NATIVE_ENDIAN,
.valid = {
.min_access_size = 4,
.max_access_size = 4,
},
};

static void pci_w4rd3n_realize(PCIDevice * pdev, Error ** errp) {
W4rd3nState * pw4rd3n = W4RD3N(pdev);
// W4RD3N方法实现了在继承链之间的强制转换
pw4rd3n->dma_size = 0x1ffff * sizeof(char);
pw4rd3n->dma_buf = malloc(pw4rd3n->dma_size);
pw4rd3n->id = 0x1337;
pw4rd3n->threw_irq = 0;

memory_region_init_io(&pw4rd3n->mmio, OBJECT(pw4rd3n), &w4rd3n_mmio_ops, pw4rd3n, "w4rd3n-mmio", 1 << 6);
memory_region_init_io(&pw4rd3n->io, OBJECT(pw4rd3n), &w4rd3n_io_ops, pw4rd3n, "w4rd3n-io", 1 << 4);
pci_register_bar(pdev, 0, PCI_BASE_ADDRESS_SPACE_IO, &pw4rd3n->io);
pci_register_bar(pdev, 1, PCI_BASE_ADDRESS_SPACE_MEMORY, &pw4rd3n->mmio);
// 调用pci_register_bar来注册BAR等信息
// pci设备会在内部检测共享内存空间读写是否越界
uint8_t *pci_conf = pdev->config;
pci_config_set_interrupt_pin(pci_conf, 2);
printf("W4rd3n Device loaded\n");
}

static void pci_w4rd3n_uninit(PCIDevice * pdev) {
W4rd3nState * pw4rd3n = W4RD3N(pdev);
free(pw4rd3n->dma_buf);
printf("W4rd3n Device unloaded\n");
}

static void w4rd3n_class_init(ObjectClass * class, void * data) {
PCIDeviceClass * k = PCI_DEVICE_CLASS(class);
// 初始化一个ObjectClass
k->realize = pci_w4rd3n_realize;
k->exit = pci_w4rd3n_uninit;
// 设置构造函数与析构函数
k->vendor_id = PCI_VENDOR_ID_QEMU;
k->device_id = 0x300;
// 设置vendor_id和device_id
k->revision = 0x10;
k->class_id = PCI_CLASS_OTHERS;
}

static void w4rd3n_init(void) {
static InterfaceInfo interfaces[] = {
{ INTERFACE_CONVENTIONAL_PCI_DEVICE },
{ },
};
static const TypeInfo w4rd3n_info = {
.name = TYPE_PCI_W4RD3N_DEV,
.parent = TYPE_PCI_DEVICE,
.instance_size = sizeof(W4rd3nState),
.class_init = w4rd3n_class_init,
.interfaces = interfaces,
};
type_register_static(&w4rd3n_info);
}

type_init(w4rd3n_init)

编译与运行

添加编译选项.

1
2
3
echo "softmmu_ss.add(when: 'CONFIG_W4RD3N', if_true: files('w4rd3n.c'))" >> ./hw/misc/meson.build
echo -e "config W4RD3N\nbool\ndepends on PCI" >> ./hw/misc/Kconfig
echo "CONFIG_W4RD3N=y" >> ./configs/devices/i386-softmmu/default.mak

编译.

1
2
3
sudo apt install ninja-build libglib2.0-dev libpixman-1-dev -y
./configure --enable-kvm --enable-debug --target-list=x86_64-softmmu
make -j $(nproc)

检查是否编译成功.

1
strings ./build/qemu-system-x86_64 | grep w4rd3n_iowrite

运行.

1
2
3
4
5
6
7
8
9
./build/qemu-system-x86_64 \
-m 1G \
-kernel bzImage \
-append "console=ttyS0 root=/dev/sda rw" \
-drive "file=./rootfs.ext2,format=raw" \
-enable-kvm \
-nographic \
-device w4rd3n
# 加载对应设备

设备信息

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
lspci
00:00.0 Host bridge: Intel Corporation 440FX - 82441FX PMC [Natoma] (rev 02)
# xx:yy:z => 对应总线:设备:功能
# 查看设备号,可以通过代码中声明的device_id和vendor_id索引
lspci -v -s 00:03.0
# 查看设备详细信息
ls -la /sys/devices/pci0000:00/0000:00:03.0/
# ...
-r--r--r-- 1 root root 4096 Jan 19 11:51 resource
-rw------- 1 root root 256 Jan 19 14:36 resource0
-rw------- 1 root root 8 Jan 19 14:36 resource1
# ...
# 可以通过linux kernel提供的sysfs查看相关信息
# resource0对应MMIO空间,resource1对应PMIO空间,resource中数据格式是start end flags

cat /proc/ioports
# 查看设备ioport
cat /proc/iomem
# 查看设备mmio地址

与设备通讯

可以通过LKM来与设备通讯.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
// w4rd3ndrv.c
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/delay.h>
#include <asm/io.h>

MODULE_LICENSE("Dual BSD/GPL");

void write_io(unsigned int value, unsigned short io_port, unsigned char len) {
if(len == 4){
outl(value, io_port);
}
else if(len == 2){
outw(value, io_port);
}
else if(len == 1){
outb(value, io_port);
}
}

unsigned int read_io(unsigned short io_port, unsigned char len) {
if(len == 4){
return (unsigned int)inl(io_port);
}
else if(len == 2){
return (unsigned int)inw(io_port);
}
else if(len == 1){
return (unsigned int)inb(io_port);
}
return -1;
}

void read_phy_mem(unsigned long phy_addr, unsigned int len, unsigned char * dest) {
char * regs = NULL;
int i = 0;
regs = ioremap(phy_addr, len);
if(!regs) {
return;
}
if(len == 8) {
*(unsigned int *)dest = readl(regs);
*(unsigned int *)(dest + 4) = readl(regs + 4);
} else if(len == 4) {
*(unsigned int *)dest = readl(regs);
} else if(len == 2) {
*(unsigned short *)dest = readw(regs);
} else if(len == 1) {
*dest = readb(regs);
} else {
for(i = 0; i < len; i++) {
dest[i] = readb(regs + i);
}
}
iounmap(regs);
}

void write_phy_mem(unsigned long phy_addr, unsigned int len, unsigned char * src) {
char * regs = NULL;
int i = 0;
regs = ioremap(phy_addr, len);
if(len == 8) {
writel(*(unsigned int *)src, regs);
writel(*(unsigned int *)(src + 4), regs + 4);
} else if(len == 4) {
writel(*(unsigned int *)src, regs);
} else if(len == 2) {
writew(*(unsigned short *)src, regs);
} else if(len == 1) {
writeb(*src, regs);
} else {
for(i = 0; i < len; i++) {
writeb(*(src + i), regs + i);
}
}
iounmap(regs);
}

unsigned int read_pci_reg(unsigned int bus_num, unsigned int dev_num, unsigned int func_num, unsigned int reg_num) {
unsigned int addr = (1 << 31) | ((bus_num & 0xff) << 16) | ((dev_num & 0x1f) << 11) | ((func_num & 0x7) << 8) | (reg_num & 0xfc);
write_io(addr, 0xcf8, 4);
return read_io(0xcfc, 4);
}

void write_pci_reg(unsigned int bus_num, unsigned int dev_num, unsigned int func_num, unsigned int reg_num, unsigned int val) {
unsigned int addr = (1 << 31) | ((bus_num & 0xff) << 16) | ((dev_num & 0x1f) << 11) | ((func_num & 0x7) << 8) | (reg_num & 0xfc);
write_io(addr, 0xcf8, 4);
write_io(val, 0xcfc, 4);
}

static int w4rd3n_init(void){
unsigned int OFF_BAR0 = 0x10;
unsigned int OFF_BAR1 = 0x14;
unsigned short io_port = 0;
unsigned long mmio_addr = 0;
unsigned char * dmabuf = NULL;
unsigned long dmabuf_phyaddr = 0;
int i = 0;
unsigned int id = 0xcccccccc;

io_port = read_pci_reg(0, 4, 0, OFF_BAR0) & (~0xf);
mmio_addr = read_pci_reg(0, 4, 0, OFF_BAR1) & (~0xf);
printk("w4rd3n ioport: 0x%x\n", io_port);
printk("w4rd3n mmio: 0x%lx\n", mmio_addr);

write_phy_mem(mmio_addr + 4, sizeof(id), (unsigned char *)&id);
id = read_io(io_port + 4, sizeof(id));
printk("w4rd3n id: 0x%x\n", id);

dmabuf = (unsigned char*)kmalloc(0x20000, GFP_KERNEL);
memset(dmabuf, 0, 0x20000);
dmabuf_phyaddr = virt_to_phys(dmabuf);
printk("dma addr: 0x%lx\n", dmabuf_phyaddr);
write_io((unsigned int)dmabuf_phyaddr, io_port + 4, 4);

for(i = 0; i < 0x10; i += 4){
printk("w4rd3n dma[%x]: 0x%x\n", i, *(unsigned int *)(dmabuf + i));
}

return 0;
}

static void w4rd3n_exit(void){
return;
}

module_init(w4rd3n_init);
module_exit(w4rd3n_exit);

Makefile.

1
2
3
4
5
6
7
8
9
10
PWD         := $(shell pwd) 
KERNEL_DIR = /home/vm/ctf/linux-5.15/

MODULE_NAME = w4rd3ndrv
obj-m := $(MODULE_NAME).o

all:
make -C $(KERNEL_DIR) M=$(PWD) modules
clean:
make -C $(KERNEL_DIR) M=$(PWD) clean

隐藏IO

基于pci的设备可以通过lspci,/proc/ioports,/proc/iomem等查看设备的io portmmio,但是如果不用pci,操作系统就无法自动识别.

device代码进行如下修改.

1
2
3
4
memory_region_add_subregion(get_system_io(), 0x100, &pw4rd3n->io);
memory_region_add_subregion(get_system_memory(), 0x2000, &pw4rd3n->mmio);
// pci_register_bar(pdev, 0, PCI_BASE_ADDRESS_SPACE_IO, &pw4rd3n->io);
// pci_register_bar(pdev, 1, PCI_BASE_ADDRESS_SPACE_MEMORY, &pw4rd3n->mmio);

LKM代码进行如下修改.

1
2
3
4
io_port = 0x100;
mmio_addr = 0x2000;
// io_port = read_pci_reg(0, 4, 0, OFF_BAR0) & (~0xf);
// mmio_addr = read_pci_reg(0, 4, 0, OFF_BAR1) & (~0xf);

相关基础

PCIDevice

PCIDevice记录设备的详细信息,大小为256字节,前64字节是PCI标准规定的.

比较关键的是其6Base Address Registers,BAR记录了设备所需要的地址空间的类型,基址以及其他属性.

设备可以申请两类地址空间,memory spaceI/O space,用BAR的最后一位标识.

memory space:bit 21表示采用64位地址,为0表示采用32位地址;bit 11表示区间大小超过1M,为0表示不超过1M;bit 3表示是否支持可预取.

通过memory space访问设备I/O的方式称为MMIO,这种情况下,CPU直接使用普通访存指令即可访问设备I/O.

通过I/O space访问设备I/O的方式称为PMIO,这种情况下CPU需要使用专门的I/O指令如IN/OUT访问I/O端口.

QOM

QOMqemu实现的基于c的一套面向对象机制,负责将device,bus等设备都抽象成为对象.

https://github.com/qemu/qemu/blob/master/include/qom/object.h.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
struct TypeInfo {       
const char *name;
const char *parent; //父类
size_t instance_size;
// 会为设备Object分配instance_size
size_t instance_align;
void (*instance_init)(Object *obj); //实例构造函数
void (*instance_post_init)(Object *obj);
void (*instance_finalize)(Object *obj); //实例析构函数
bool abstract; //是否为抽象类
size_t class_size;
void (*class_init)(ObjectClass *klass, void *data); //类初始化,可虚函数覆写
void (*class_base_init)(ObjectClass *klass, void *data);
void *class_data;
InterfaceInfo *interfaces;
};

struct TypeImpl {
const char *name;
size_t class_size;
size_t instance_size;
size_t instance_align;
void (*class_init)(ObjectClass *klass, void *data);
void (*class_base_init)(ObjectClass *klass, void *data);
void *class_data;
void (*instance_init)(Object *obj);
void (*instance_post_init)(Object *obj);
void (*instance_finalize)(Object *obj);
bool abstract;
const char *parent;
TypeImpl *parent_type;
ObjectClass *class;
int num_interfaces;
InterfaceImpl interfaces[MAX_INTERFACES];
};

//所有class的基类
struct ObjectClass {
Type type;
GSList *interfaces;
const char *object_cast_cache[OBJECT_CLASS_CAST_CACHE];
const char *class_cast_cache[OBJECT_CLASS_CAST_CACHE];
ObjectUnparent *unparent;
GHashTable *properties;
};

//所有类实例对象的基类
struct Object {
ObjectClass *class;
ObjectFree *free;
GHashTable *properties;
uint32_t ref;
Object *parent;
};

type_init会将w4rd3n_init注册成一个新moduleinit函数.

1
2
3
4
5
#define type_init(function) module_init(function, MODULE_INIT_QOM)
#define module_init(function, type) \
static void __attribute__((constructor)) do_qemu_init_ ## function(void) { \
register_module_init(function,type); \
}

qemu会在真正启动之前,会将所有初始化函数插入init_type_list,接下来,在qemu启动阶段main函数调用的时候,会对init_type_list链表中的每一个初始化函数进行调用.

type_register_static最终调用type_register_internal(&w4rd3n_info),根据TypeInfo信息创建一个TypeImpl对象,将其注册到全局哈希表type_table中.

1
2
3
4
5
6
static TypeImpl *type_register_internal(const TypeInfo *info) {
TypeImpl *ti;
ti = type_new(info);
type_table_add(ti);
return ti;
}

当所有qemu总线,设备等的type_register_static执行完成后,qemu就会调用type_initialize函数来根据其TypeImpl对象完成ObjectClass的初始化.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
static void type_initialize(TypeImpl *ti) {
TypeImpl *parent;
if (ti->class) {
//已经初始化
return;
}
ti->class_size = type_class_get_size(ti);
ti->instance_size = type_object_get_size(ti);
if (ti->instance_size == 0) {
//标记为抽象类
ti->abstract = true;
}
if (type_is_ancestor(ti, type_interface)) {
//...
}
ti->class = g_malloc0(ti->class_size);
parent = type_get_parent(ti);
if (parent) {
// 类的定义中父类都在第一个字段,使得父类与子类可以直接实现转换
// 一个类初始化时会先初始化它的父类,然后将相应的字段拷贝至子类同时将子类其余字段赋值为0,再进一步赋值
type_initialize(parent);
//初始化父类
GSList *e;
int i;
g_assert(parent->class_size <= ti->class_size);
g_assert(parent->instance_size <= ti->instance_size);
memcpy(ti->class, parent->class, parent->class_size);
//用父类数据初始化
ti->class->interfaces = NULL;
for (e = parent->class->interfaces; e; e = e->next) {
InterfaceClass *iface = e->data;
ObjectClass *klass = OBJECT_CLASS(iface);
type_initialize_interface(ti, iface->interface_type, klass->type);
//初始化对应接口
}
for (i = 0; i < ti->num_interfaces; i++) {
TypeImpl *t = type_get_by_name(ti->interfaces[i].typename);
if (!t) {
error_report("missing interface '%s' for object '%s'", ti->interfaces[i].typename, parent->name);
abort();
}
for (e = ti->class->interfaces; e; e = e->next) {
TypeImpl *target_type = OBJECT_CLASS(e->data)->type;
if (type_is_ancestor(target_type, t)) {
break;
}
}
if (e) {
continue;
}
type_initialize_interface(ti, t, t);
}
}
ti->class->properties = g_hash_table_new_full(g_str_hash, g_str_equal, NULL, object_property_free);
ti->class->type = ti;
while (parent) {
if (parent->class_base_init) {
parent->class_base_init(ti->class, ti->class_data);
}
parent = type_get_parent(parent);
}
if (ti->class_init) {
ti->class_init(ti->class, ti->class_data);
// w4rd3n_class_init
}
}

加载设备的时候会调用对应的Object类的构造函数与析构函数(在Objectclass构造函数中注册的).

0%
;