本文所用qemu为1.5版本的,不是android emulator的。
之前几篇文章介绍的都是android emulator中的设备模拟。一些是android自己加的platform bus上的虚拟设备;一些是qemu自己的虚拟设备,但是这部分代码很旧,没有使用到QOM模型。
qemu1.1以及之后的qemu开始有了QOM模型。QOM很大一部分代码是为了实现了C++的继承,公用的东西放到ObjectClass里,只有一个实例;其他的放Object里,可以有多个实例。
PS:android emulator的代码对应了qemu 0.1x的代码,但是又有一些新版本的qemu的代码porting上去了。
QOM设备模型可以看:
1、Qemu中的设备注册:http://ytliu.info/blog/2015/01/10/qemushe-bei-chu-shi-hua/
第二篇pdf讲的非常详细了,但是最后关于PMIO地址和读写函数如何对应起来的,还是有些没清楚的地方。
本文针对这个问题进行一些补充。
初始化内存空间
在memory_map_init(main->cpu_exec_init_all->memory_map_init)中,会设置MemoryRegion改变时的回调函数,memory_map_init是在设备注册之前调用的:
static void memory_map_init(void)
{
system_memory = g_malloc(sizeof(*system_memory));
memory_region_init(system_memory, "system", INT64_MAX);
address_space_init(&address_space_memory, system_memory);
address_space_memory.name = "memory";
system_io = g_malloc(sizeof(*system_io));
memory_region_init(system_io, "io", 65536);
address_space_init(&address_space_io, system_io);
address_space_io.name = "I/O";
memory_listener_register(&core_memory_listener, &address_space_memory);
memory_listener_register(&io_memory_listener, &address_space_io);
memory_listener_register(&tcg_memory_listener, &address_space_memory);
dma_context_init(&dma_context_memory, &address_space_memory,
NULL, NULL, NULL);
}
这个是PMIO的listener,PMIO的MemoryRegion改变后,会调用io_region_add函数,映射PMIO地址和设备读写函数。
普通内存是其他的listener。
static MemoryListener io_memory_listener = {
.region_add = io_region_add,
.region_del = io_region_del,
.priority = 0,
};
把注册的listener添加到全局的memory_listeners链表中:
void memory_listener_register(MemoryListener *listener, AddressSpace *filter)
{
MemoryListener *other = NULL;
AddressSpace *as;
listener->address_space_filter = filter; // listener是处理那个AddressSpace的
if (QTAILQ_EMPTY(&memory_listeners)
|| listener->priority >= QTAILQ_LAST(&memory_listeners,
memory_listeners)->priority) {
QTAILQ_INSERT_TAIL(&memory_listeners, listener, link);
} else {
QTAILQ_FOREACH(other, &memory_listeners, link) {
if (listener->priority < other->priority) {
break;
}
}
QTAILQ_INSERT_BEFORE(other, listener, link);
}
QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
listener_add_address_space(listener, as);
}
}
对AddressSpace(也就是根MemoryRegion)中的每一个MemoryRegion进行一下listener->region_add
static void listener_add_address_space(MemoryListener *listener,
AddressSpace *as)
{
FlatRange *fr;
if (listener->address_space_filter
&& listener->address_space_filter != as) {
return;
}
if (global_dirty_log) {
if (listener->log_global_start) {
listener->log_global_start(listener);
}
}
FOR_EACH_FLAT_RANGE(fr, as->current_map) {
MemoryRegionSection section = {
.mr = fr->mr,
.address_space = as,
.offset_within_region = fr->offset_in_region,
.size = int128_get64(fr->addr.size),
.offset_within_address_space = int128_get64(fr->addr.start),
.readonly = fr->readonly,
};
if (listener->region_add) {
listener->region_add(listener, secion);
}
}
}
添加PIT设备
MemoryRegion有修改,更新,调用io_region_add函数
添加pit设备时,会调用到memory_region_add_subregion函数,MemoryRegion被修改了,然后会调用到memory_region_transaction_commit函数,更新地址空间,调用listener,是在这里映射PMIO地址和设备读写函数的。
注意memory_region_transaction_depth的使用,保证多层调用时,只需要更新一次。
void memory_region_transaction_commit(void)
{
AddressSpace *as;
assert(memory_region_transaction_depth);
--memory_region_transaction_depth;
if (!memory_region_transaction_depth && memory_region_update_pending) {
memory_region_update_pending = false;
MEMORY_LISTENER_CALL_GLOBAL(begin, Forward);
QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
address_space_update_topology(as);
}
MEMORY_LISTENER_CALL_GLOBAL(commit, Forward);
}
}
真正的更新操作:
static void address_space_update_topology(AddressSpace *as)
{
FlatView old_view = *as->current_map;
FlatView new_view = generate_memory_topology(as->root);
address_space_update_topology_pass(as, old_view, new_view, false);
address_space_update_topology_pass(as, old_view, new_view, true);
*as->current_map = new_view;
flatview_destroy(&old_view);
address_space_update_ioeventfds(as);
}
static FlatView generate_memory_topology(MemoryRegion *mr)
{
FlatView view;
flatview_init(&view);
if (mr) {
render_memory_region(&view, mr, int128_zero(),
addrrange_make(int128_zero(), int128_2_64()), false);
}
flatview_simplify(&view);
return view;
}
FlatView有点像把链表描述的MemoryRegion搞成了FlatRange数组(FlatRange中记录了MemoryRegion,自然可以获得MemoryRegion中的pit_ioport_ops):
static void render_memory_region(FlatView *view,
MemoryRegion *mr,
Int128 base,
AddrRange clip,
bool readonly)
{
MemoryRegion *subregion;
unsigned i;
hwaddr offset_in_region;
Int128 remain;
Int128 now;
FlatRange fr;
AddrRange tmp;
if (!mr->enabled) {
return;
}
int128_addto(&base, int128_make64(mr->addr));
readonly |= mr->readonly;
tmp = addrrange_make(base, mr->size);
if (!addrrange_intersects(tmp, clip)) {
return;
}
clip = addrrange_intersection(tmp, clip);
if (mr->alias) {
int128_subfrom(&base, int128_make64(mr->alias->addr));
int128_subfrom(&base, int128_make64(mr->alias_offset));
render_memory_region(view, mr->alias, base, clip, readonly);
return;
}
/* Render subregions in priority order. */
QTAILQ_FOREACH(subregion, &mr->subregions, subregions_link) {
render_memory_region(view, subregion, base, clip, readonly);
}
if (!mr->terminates) {
return;
}
offset_in_region = int128_get64(int128_sub(clip.start, base));
base = clip.start;
remain = clip.size;
/* Render the region itself into any gaps left by the current view. */
for (i = 0; i < view->nr && int128_nz(remain); ++i) {
if (int128_ge(base, addrrange_end(view->ranges[i].addr))) {
continue;
}
if (int128_lt(base, view->ranges[i].addr.start)) {
now = int128_min(remain,
int128_sub(view->ranges[i].addr.start, base));
fr.mr = mr;
fr.offset_in_region = offset_in_region;
fr.addr = addrrange_make(base, now);
fr.dirty_log_mask = mr->dirty_log_mask;
fr.readable = mr->readable;
fr.readonly = readonly;
flatview_insert(view, i, &fr);
++i;
int128_addto(&base, now);
offset_in_region += int128_get64(now);
int128_subfrom(&remain, now);
}
now = int128_sub(int128_min(int128_add(base, remain),
addrrange_end(view->ranges[i].addr)),
base);
int128_addto(&base, now);
offset_in_region += int128_get64(now);
int128_subfrom(&remain, now);
}
if (int128_nz(remain)) {
fr.mr = mr;
fr.offset_in_region = offset_in_region;
fr.addr = addrrange_make(base, remain);
fr.dirty_log_mask = mr->dirty_log_mask;
fr.readable = mr->readable;
fr.readonly = readonly;
flatview_insert(view, i, &fr);
}
}
MemoryRegion被修改的话,如果是有添加,那么会调用到MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, region_add),添加PMIO地址和设备的映射关系。
static void address_space_update_topology_pass(AddressSpace *as,
FlatView old_view,
FlatView new_view,
bool adding)
{
unsigned iold, inew;
FlatRange *frold, *frnew;
/* Generate a symmetric difference of the old and new memory maps.
* Kill ranges in the old map, and instantiate ranges in the new map.
*/
iold = inew = 0;
while (iold < old_view.nr || inew < new_view.nr) {
if (iold < old_view.nr) {
frold = &old_view.ranges[iold];
} else {
frold = NULL;
}
if (inew < new_view.nr) {
frnew = &new_view.ranges[inew];
} else {
frnew = NULL;
}
if (frold
&& (!frnew
|| int128_lt(frold->addr.start, frnew->addr.start)
|| (int128_eq(frold->addr.start, frnew->addr.start)
&& !flatrange_equal(frold, frnew)))) {
/* In old, but (not in new, or in new but attributes changed). */
if (!adding) {
MEMORY_LISTENER_UPDATE_REGION(frold, as, Reverse, region_del);
}
++iold;
} else if (frold && frnew && flatrange_equal(frold, frnew)) {
/* In both (logging may have changed) */
if (adding) {
MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, region_nop);
if (frold->dirty_log_mask && !frnew->dirty_log_mask) {
MEMORY_LISTENER_UPDATE_REGION(frnew, as, Reverse, log_stop);
} else if (frnew->dirty_log_mask && !frold->dirty_log_mask) {
MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, log_start);
}
}
++iold;
++inew;
} else {
/* In new */
if (adding) {
MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, region_add);
}
++inew;
}
}
}
#define MEMORY_LISTENER_UPDATE_REGION(fr, as, dir, callback) \
MEMORY_LISTENER_CALL(callback, dir, (&(MemoryRegionSection) { \
.mr = (fr)->mr, \
.address_space = (as), \
.offset_within_region = (fr)->offset_in_region, \
.size = int128_get64((fr)->addr.size), \
.offset_within_address_space = int128_get64((fr)->addr.start), \
.readonly = (fr)->readonly, \
}))
调用了region_add函数,也就是io_region_add函数:
#define MEMORY_LISTENER_CALL(_callback, _direction, _section, _args...) \
do { \
MemoryListener *_listener; \
\
switch (_direction) { \
case Forward: \
QTAILQ_FOREACH(_listener, &memory_listeners, link) { \
if (_listener->_callback \
&& memory_listener_match(_listener, _section)) { \
_listener->_callback(_listener, _section, ##_args); \
} \
} \
break; \
case Reverse: \
QTAILQ_FOREACH_REVERSE(_listener, &memory_listeners, \
memory_listeners, link) { \
if (_listener->_callback \
&& memory_listener_match(_listener, _section)) { \
_listener->_callback(_listener, _section, ##_args); \
} \
} \
break; \
default: \
abort(); \
} \
} while (0)
io_region_add函数处理PMIO地址和设备读写函数的映射关系
static void io_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
mrio->mr = section->mr;
mrio->offset = section->offset_within_region;
iorange_init(&mrio->iorange, &memory_region_iorange_ops,
section->offset_within_address_space, section->size);
ioport_register(&mrio->iorange);
}
memory_region_iorange_ops就是IORange->ops,其中读函数为memory_region_iorange_read,真正的设备读写函数保存在mr中:
static void memory_region_iorange_read(IORange *iorange,
uint64_t offset,
unsigned width,
uint64_t *data)
{
MemoryRegionIORange *mrio
= container_of(iorange, MemoryRegionIORange, iorange);
MemoryRegion *mr = mrio->mr;
offset += mrio->offset;
if (mr->ops->old_portio) {
const MemoryRegionPortio *mrp = find_portio(mr, offset - mrio->offset,
width, false);
*data = ((uint64_t)1 << (width * 8)) - 1;
if (mrp) {
*data = mrp->read(mr->opaque, offset);
} else if (width == 2) {
mrp = find_portio(mr, offset - mrio->offset, 1, false);
assert(mrp);
*data = mrp->read(mr->opaque, offset) |
(mrp->read(mr->opaque, offset + 1) << 8);
}
return;
}
*data = 0;
access_with_adjusted_size(offset, data, width,
mr->ops->impl.min_access_size,
mr->ops->impl.max_access_size,
memory_region_read_accessor, mr);
}
这里执行真正的读写函数,也就是pit_ioport_ops。
static void memory_region_write_accessor(void *opaque,
hwaddr addr,
uint64_t *value,
unsigned size,
unsigned shift,
uint64_t mask)
{
MemoryRegion *mr = opaque;
uint64_t tmp;
if (mr->flush_coalesced_mmio) {
qemu_flush_coalesced_mmio_buffer();
}
tmp = (*value >> shift) & mask;
mr->ops->write(mr->opaque, addr, tmp, size);
}
注册设备的三组读写函数:
void ioport_register(IORange *ioport)
{
register_ioport_read(ioport->base, ioport->len, 1,
ioport_readb_thunk, ioport);
register_ioport_read(ioport->base, ioport->len, 2,
ioport_readw_thunk, ioport);
register_ioport_read(ioport->base, ioport->len, 4,
ioport_readl_thunk, ioport);
register_ioport_write(ioport->base, ioport->len, 1,
ioport_writeb_thunk, ioport);
register_ioport_write(ioport->base, ioport->len, 2,
ioport_writew_thunk, ioport);
register_ioport_write(ioport->base, ioport->len, 4,
ioport_writel_thunk, ioport);
ioport_destructor_table[ioport->base] = iorange_destructor_thunk;
}
int register_ioport_read(pio_addr_t start, int length, int size,
IOPortReadFunc *func, void *opaque)
{
int i, bsize;
if (ioport_bsize(size, &bsize)) {
hw_error("register_ioport_read: invalid size");
return -1;
}
for(i = start; i < start + length; ++i) {
ioport_read_table[bsize][i] = func;
if (ioport_opaque[i] != NULL && ioport_opaque[i] != opaque)
hw_error("register_ioport_read: invalid opaque for address 0x%x",
i);
ioport_opaque[i] = opaque;
}
return 0;
}
通过opaque可以获取读函数IORange->ops->read(也就是memory_region_iorange_read)。
static uint32_t ioport_readb_thunk(void *opaque, uint32_t addr)
{
IORange *ioport = opaque;
uint64_t data;
ioport->ops->read(ioport, addr - ioport->base, 1, &data);
return data;
}
对于读PMIO,KVM_EXIT_IO之后的流程是:
kvm_handle_io
->cpu_inb
->ioport_read
->ioport_read_table[0][addr](也就是ioport_readb_thunk)
->memory_region_iorange_ops(也就是IORange->ops)
->access_with_adjusted_size(需要mr,保存了pit_ioport_ops)
->memory_region_read_accessor
->mr-ops
PS:
1、Object的parent可能是用来搞总线结构的,比如Object是bus上的设备,parent是bus。
2、ObjectProperty里面type为child<的应该就是Object用来记录子Object的,也就是bus记录上面挂的设备的。
3、注意QObject和QType(比C语言的type多了ref),用来折腾ObjectProperty的属性设置的,和之前的Object,ObjectClass不同。4、设置属性都是通过object_property_set_qobject来设置的,会生成visitor,然后调用void object_property_set(Object *obj, Visitor *v, const char *name,
Error **errp)。
5、isa_create中创建了Object,调用了pit_class_initfn等初始化函数。
6、isa bus的address_space_io就是系统的system_io:
static void pc_init_isa(QEMUMachineInitArgs *args)
{
ram_addr_t ram_size = args->ram_size;
const char *cpu_model = args->cpu_model;
const char *kernel_filename = args->kernel_filename;
const char *kernel_cmdline = args->kernel_cmdline;
const char *initrd_filename = args->initrd_filename;
const char *boot_device = args->boot_device;
has_pvpanic = false;
if (cpu_model == NULL)
cpu_model = "486";
disable_kvm_pv_eoi();
enable_compat_apic_id_mode();
pc_init1(get_system_memory(),
get_system_io(),
ram_size, boot_device,
kernel_filename, kernel_cmdline,
initrd_filename, cpu_model, 0, 1);
}
if (pci_enabled) {
pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, &isa_bus, gsi,
system_memory, system_io, ram_size,
below_4g_mem_size,
0x100000000ULL - below_4g_mem_size,
0x100000000ULL + above_4g_mem_size,
(sizeof(hwaddr) == 4
? 0
: ((uint64_t)1 << 62)),
pci_memory, ram_memory);
} else {
pci_bus = NULL;
i440fx_state = NULL;
isa_bus = isa_bus_new(NULL, system_io);
no_hpet = 1;
}