Io_proxy设备为透传设备,因为其工厂类中将pass_thru属性设置为自身:
F() { pass_thru = this; } |
bind_irq方法
void Io_proxy::bind_irq(Vmm::Guest *vmm, Vmm::Virt_bus *vbus, Gic::Ic *ic, Dt_node const &self, unsigned dt_idx, unsigned io_irq) { auto dt_irq = ic->dt_get_interrupt(self, dt_idx); //根据node中断编号(irq0~9/A~Z),获取gic内部中断号
info.printf("IO device %p:'%s' - registering irq%d=0x%x -> 0x%x\n", this, self.get_name(), dt_idx, io_irq, dt_irq); if (!ic->get_irq_source(dt_irq)) { //若没有中断源已经被注册来处理此中断,则新建一个 auto irq_svr = Vdev::make_device<Vdev::Irq_svr>(io_irq); //根据资源起始地址,创建中断服务设备(Irq_svr) ZCore::chkcap(vmm->registry()->register_irq_obj(irq_svr.get())); //注册一个中断处理对象(Irq_svr)
// We have a 1:1 association, so if the irq is not bound yet we // should be able to bind the icu irq ZCore::chksys(vbus->icu()->bind(io_irq, irq_svr->obj_cap())); //通过IPC机制绑定中断号和中断服务程序,见下表分析 vbus->icu()->set_mode(io_irq, l4::Icu::F_level_low); //设置模式
// Point irq_svr to ic:dt_irq for upstream events (like // interrupt delivery) irq_svr->set_sink(ic, dt_irq); //待仔细分析
// Point ic to irq_svr for downstream events (like eoi handling) ic->bind_irq_source(dt_irq, irq_svr); //待仔细分析 _irqs[io_irq] = irq_svr; //保存到静态中断处理表中
irq_svr->eoi(); //模拟一次中断结束的ack消息 return; }
warn.printf("irq%d=0x%x -> 0x%x already registered\n", dt_idx, io_irq, dt_irq);
// Ensure we have the correct binding of the currently registered source //中断源的注册在哪里? auto irq_source = ic->get_irq_source(dt_irq); auto other_svr = dynamic_cast<Irq_svr const *>(irq_source.get()); if (other_svr && (io_irq == other_svr->get_io_irq())) return;
if (other_svr) //irq是一一映射,若出现多对一则报错 Err().printf("bind_irq: ic:0x%x -> 0x%x -- " "irq already bound to different io irq: 0x%x \n", dt_irq, io_irq, other_svr->get_io_irq()); else Err().printf("ic:0x%x is bound to a different irq type\n", dt_irq); throw l4::Runtime_error(-l4_EEXIST); } |
init_device方法
void Io_proxy::init_device(Device_lookup const *devs, Dt_node const &self) { //主要是中断处理 if (!self.get_prop<fdt32_t>("interrupts", nullptr)) return;
cxx::Ref_ptr<Device> dev; auto try_cnt = 0; auto irq_ctl = self.find_irq_parent(); do{ //逐级查找,获取中断父节点设备 if (irq_ctl.is_valid()) dev = devs->device_from_node(irq_ctl); //根据node获取dev try_cnt++; if (!dev) { irq_ctl = irq_ctl.find_irq_parent(); continue; } else { break; } }while(try_cnt < 10);
if (!dev) { Err().printf("virtio proxy - '%s': irq parent %s not found\n", self.get_name(), irq_ctl.is_valid() ? "device" : "node"); throw l4::Runtime_error(-l4_ENODEV); }
// XXX need dynamic cast for Ref_ptr here auto *ic = dynamic_cast<Gic::Ic *>(dev.get());
if (!ic) { info.printf("%s: Irqs are handled by %s, ignoring irq assignments\n", self.get_name(), irq_ctl.get_name()); return; }
auto const *devinfo = devs->vbus()->find_device(this); assert (devinfo); int id = 0; int name_len; int numint = ic->dt_get_num_interrupts(self); //获取设备中断个数 char const *pci_addr = self.get_prop<char>("pci_addr", &name_len); if (!pci_addr){ //arm架构走这块 for (unsigned i = 0; i < devinfo->dev_info.num_resources; ++i) { //处理irq资源 l4vbus_resource_t res; ZCore::chksys(_dev.get_resource(i, &res)); char const *resname = reinterpret_cast<char const *>(&res.id); // Interrupts: id must be 'irqX' where X is the index into // the device trees interrupts resource description if (res.type != l4VBUS_RESOURCE_IRQ) continue; if (strncmp(resname, "irq", 3)) { warn.printf("IRQ resource '%s' of device '%.64s' ignored. " "Should be named 'irq[0-9A-Z]'.\n", resname, devinfo->dev_info.name); continue; } id = decode_resource_id(resname[3]); if (id == -1) { Err().printf("IO device '%.64s' has invalid irq resource id. " "Expected 'irq[0-9A-Z]', got '%.4s'\n", devinfo->dev_info.name, resname); ZCore::chksys(-l4_EINVAL); } auto irq = res.start; if (id < numint) bind_irq(devs->vmm(), devs->vbus().get(), ic, self, id, irq); //绑定中断,分析见上表 else Err().printf("Error: IO IRQ resource id (%d) is out of bounds\n", id); } }else{ //x86,分析略过 l4_uint32_t _bus = -1, _dev_no = -1, _fn = -1; sscanf(pci_addr, "%x:%x.%x", &_bus, &_dev_no, &_fn); if ((_bus>=256) || (_dev_no>=32) || (_fn>=8)) { Err().printf(" pci_addr wrong, %x:%x.%x\n", _bus, _dev_no, _fn); return; } auto pcidev = devs->pci_bus()->find_pass_pci_dev((_bus<<8) | (_dev_no<<3) | _fn); if (pcidev == nullptr) { Err().printf("BUG init_device, pcidev invalid !!!!\n"); return; } for (unsigned i = 0; i < devinfo->dev_info.num_resources; ++i) { l4vbus_resource_t res; ZCore::chksys(_dev.get_resource(i, &res));
// Interrupts: id must be 'irqX' where X is the index into // the device trees interrupts resource description if (res.type != l4VBUS_RESOURCE_IRQ) continue; auto irq = res.start; if (_irqs[irq]) { pcidev->get_pci_hdr().irq_line = _irqs[irq]->get_irq(); return; } if (id < numint) bind_irq(devs->vmm(), devs->vbus().get(), ic, self, id, irq); else Err().printf("Error: IO IRQ resource id (%d) is out of bounds\n", id); pcidev->get_pci_hdr().irq_line = _irqs[irq]->get_irq(); } } } |
工厂类F
struct F : Factory { //处理vm_pass.vbus和io.cfg文件中设备节点 cxx::Ref_ptr<Device> create(Device_lookup const *devs, Dt_node const &node) override { // we can proxy memory and interrupts, check whether resources are // present if (!node.needs_vbus_resources()) return nullptr;
auto *vbus = devs->vbus().get();
auto *vd = vbus->find_unassigned_dev(node); //寻找未分配使用的设备,分析见下表 if (!vd) return nullptr;
auto proxy = make_device<Io_proxy>(vd->io_dev); vd->proxy = proxy; //设置代理
int name_len; char const *bus_name = node.get_prop<char>("l4vmm,vbus", &name_len); if (bus_name && !strcmp(bus_name, "pci")) { //x86虚拟化走这段(不重点分析) char const *pci_addr = node.get_prop<char>("pci_addr", &name_len); if (!pci_addr) return nullptr;
l4_uint32_t _bus = -1, _dev = -1, _fn = -1; sscanf(pci_addr, "%x:%x.%x", &_bus, &_dev, &_fn); //拆分pci_addr if ((_bus>=256) || (_dev>=32) || (_fn>=8)) { Err().printf("pci_addr wrong, %x:%x.%x\n", _bus, _dev, _fn); return nullptr; }
//填充pci设备头结构体 pci_device_header pci_hdr; if (read_spec_pci_hdr(vbus->bus().cap(), ((_bus<<8)|(_dev<<3)|_fn), pci_hdr)) return nullptr; //fill pci_hdr bar_size for (auto i = 0; i < 6; i++) { if (!pci_hdr.bar[i]) continue;
for (unsigned j = 0; j < vd->dev_info.num_resources; ++j) { l4vbus_resource_t res; ZCore::chksys(vd->io_dev.get_resource(j, &res));
if ((res.type != l4VBUS_RESOURCE_MEM) && (res.type != l4VBUS_RESOURCE_PORT)) continue;
if ((pci_hdr.bar[i]&PCI_BASE_ADDRESS_MEM_MASK) == res.start) { pci_hdr.bar_size[i] = res.end - res.start + 1; } } }
cxx::Ref_ptr<Device> dev; auto irq_ctl = node.find_irq_parent(); if (irq_ctl.is_valid()) dev = devs->device_from_node(irq_ctl); if (!dev){ char const *node_name = node.get_name(); if(node_name) Err().printf("virtio proxy - '%s': irq parent %s not found\n", node_name, irq_ctl.is_valid() ? "device" : "node"); throw l4::Runtime_error(-l4_ENODEV); }
// XXX need dynamic cast for Ref_ptr here auto pci_dev = make_device<Pci_dev>(); //创建pci设备 pci_dev->fill_pci_hdr(pci_hdr); //填充 pci_dev->set_pass_through(vd, devs, (_bus<<8)|(_dev<<3)|_fn); //透传pci设备
l4_uint32_t save_bar[6]; for (unsigned i = 0; i < 6; i++) save_bar[i] = pci_hdr.bar[i];
devs->pci_bus()->register_pci_dev(pci_dev); //注册pci设备
for (unsigned i = 0; i < 6; ++i) { if (!pci_hdr.bar_size[i]) continue;
auto bar_addr = pci_dev->get_pci_hdr().bar[i]; bar_addr &= PCI_BASE_ADDRESS_MEM_MASK;
if (save_bar[i] & 0x1) { //ioport不做处理 } else { //mmio设备,调用add_mmio_device进行处理 auto handler = Vdev::make_device<Direct_mmio_handler>(vbus->io_ds(), 0, pci_hdr.bar_size[i],save_bar[i]&PCI_BASE_ADDRESS_MEM_MASK);
auto region = Region::ss(bar_addr, pci_hdr.bar_size[i]); devs->vmm()->add_mmio_device(region, cxx::move(handler)); } } } else{ //arm走这块 for (unsigned i = 0; i < vd->dev_info.num_resources; ++i){ //逐个资源块处理 l4vbus_resource_t res;
ZCore::chksys(vd->io_dev.get_resource(i, &res));
char const *resname = reinterpret_cast<char const *>(&res.id);
// MMIO memory: id must be 'regX' where X is the index into the // device tree's 'reg' resource description if (res.type != l4VBUS_RESOURCE_MEM) continue;
if (strncmp(resname, "reg", 3)) { warn.printf("MMIO resource '%s' of device '%.64s' ignored. " "Should be named 'reg[0-9A-Z]'.\n", resname, vd->dev_info.name); continue; }
int id = Io_proxy::decode_resource_id(resname[3]); //解析regX中的“X” if (id == -1) { Err().printf("IO device '%.64s' has invalid mmio resource id. " "Expected 'reg[0-9A-Z]', got '%.4s'.\n", vd->dev_info.name, resname); ZCore::chksys(-l4_EINVAL); }
info.printf("Adding MMIO resource 0x%lx/0x%lx\n", res.start, res.end); //创建Ds_handler设备 auto handler = Vdev::make_device<Ds_handler>(vbus->io_ds(), 0, res.end - res.start + 1, res.start); //注册mmio设备,让虚拟机使能这段内存的访问,具体见generic_guest.cc中实现 devs->vmm()->register_mmio_device(handler, node, id); }
int prop_sz; auto prop_start = node.get_prop<fdt32_t>("smmu-id", &prop_sz); if(prop_start) //具有smmu属性,dma设备 { auto src_id = node.get_prop_val(prop_start, prop_sz, 0); devs->ram()->bind_kern_iommu(src_id); //iommu处理,使能 } } return proxy; }
F() { pass_thru = this; } } |
寻找未分配使用的设备find_unassigned_dev
Virt_bus::Devinfo * //此函数寻找vbus中与dtb指定节点对应的设备信息进行返回 Virt_bus::find_unassigned_dev(Vdev::Dt_node const &node) { //此处node为dtb文件中获取的节点信息 if (!node.has_compatible()) return nullptr;
int num_compatible = node.stringlist_count("compatible"); //兼容属性个数
for (int c = 0; c < num_compatible; ++c) { auto *hid = node.stringlist_get("compatible", c, nullptr); //获取兼容属性值 assert(hid);
for (auto &iodev: _devices) //从vbus的设备列表中查找 if (!iodev.proxy && iodev.io_dev.is_compatible(hid) > 0) { //iodev未设置proxy属性(即unassigned),且兼容属性与dtb的node匹配 auto *regs = node.get_prop<fdt32_t>("reg", nullptr); if (!regs) //node没有reg属性,直接将iodev分配给它 return &iodev;
l4_uint64_t base, size; if (node.get_reg_val(0, &base, &size) < 0) return &iodev; //reg属性获取失败,直接分配
if(size == 0) //size为0,直接分配 return &iodev;
for (unsigned i = 0; i < iodev.dev_info.num_resources; ++i) { l4vbus_resource_t res; ZCore::chksys(iodev.io_dev.get_resource(i, &res)); //获取vbus的资源
char const *resname = reinterpret_cast<char const *>(&res.id);
if (res.type != l4VBUS_RESOURCE_MEM || strncmp(resname, "reg0", 4)) continue; //资源类型不为mem或名字不是reg0,则继续查找 if (node.get_reg_val(0, &base, &size) < 0) continue; //此处是否与前面重复?
if (base == res.start) //node的基址与vbus资源的起始地址相等,分配 return &iodev; } } }
return nullptr; } |
vbus->icu()->bind函数调用“pkg\zcore-core\l4sys\include\icu.h”中l4_icu_bind_u函数——》l4_ipc_call——》l4_ipc——》调用"svc #0x0"进入内核态,下面具体分析。
通过硬件手册得知SVC异常的中断向量表偏移为0x08:
向量表位于“kernel\fiasco\src\kern\arm\64\ivt.S”,第8项为arm_esr_entry,其定义在“kernel\fiasco\src\kern\arm\thread-arm.cpp”
void Thread::arm_esr_entry(Return_frame *rf) { …… switch (esr.ec()) { …… case 0x12: // HVC case 0x11: // SVC case 0x15: // SVC from aarch64 current_thread()->handle_svc(ts); return; …… |
handle_svc定义在“kernel\fiasco\src\kern\arm\64\thread-arm-64.cpp”
PRIVATE inline void Thread::handle_svc(Trap_state *ts) { extern void slowtrap_entry(Trap_state *ts) asm ("slowtrap_entry"); Mword state = this->state(); state_del(Thread_cancel); if (state & (Thread_vcpu_user | Thread_alien)) { if (state & Thread_dis_alien) { state_del_dirty(Thread_dis_alien); do_syscall();
ts->error_code |= 1 << 16; // ts->esr().alien_after_syscall() = 1; } else ts->pc -= 2 << ts->esr.il();
slowtrap_entry(ts); return; }
do_syscall(); } |
PRIVATE inline void Thread::do_syscall() { typedef void Syscall(void); extern Syscall *sys_call_table[]; sys_call_table[0](); }
|
可以看出handle_svc——》handle_svc——》do_syscall——》sys_call_table,而sys_call_table定义在“kernel\fiasco\src\kern\arm\64\ivt.S”中
.align 3 .global sys_call_table sys_call_table: .8byte sys_ipc_wrapper |
fiasco系统调用只有一个,ipc底层也是走的这个系统调用——sys_ipc_wrapper,其定义在“kernel\fiasco\src\kern\syscalls.cpp”
IMPLEMENT void FIASCO_FLATTEN sys_ipc_wrapper() { assert (!(current()->state() & Thread_drq_ready));
Thread *curr = current_thread(); //获取当前线程 Syscall_frame *f = curr->regs()->syscall_frame(); //当前系统调用栈帧
#ifndef NDEBUG if ((current()->state() & Thread_vcpu_enabled) && (current()->vcpu_state().access()->state & Vcpu_state::F_irqs) && (f->ref().have_recv() || f->tag().items() || f->tag().words())) WARN("VCPU makes syscall with IRQs enabled: PC=%lx\n", current()->regs()->ip()); #endif
Obj_cap obj = f->ref(); //定义分析见下表,获取目标对象的cap Utcb *utcb = curr->utcb().access(true); //utcb获取 L4_fpage::Rights rights; Kobject_iface *o = obj.deref(&rights); //获取本地Kobject_iface对象 L4_msg_tag e; if (EXPECT_TRUE(o!=0)) o->invoke(obj, rights, f, utcb); //调用其invoke函数 else f->tag(curr->commit_error(utcb, L4_error::Not_existent)); } |
Syscall_frame::ref()定义
IMPLEMENT inline L4_obj_ref Syscall_frame::ref() const { return L4_obj_ref::from_raw(r[2]); } //r[2]表示第二个寄存器值(x2),l4_ipc的dest即存在这个寄存器中——“register l4_umword_t _dest __asm__("x2") = dest | flags;” |
我们这里(中断绑定)传入的cap为“Icu+l4_PROTO_IRQ”,Icu定义在“pkg\zcore-core\l4sys\include\irq”
class Icu : public Kobject_t<Icu, Irq_eoi, l4_PROTO_IRQ, Type_info::Demand_t<1> > { …… |
具体到内核对象Icu(kernel\fiasco\src\kern\irq_controller.cpp),继承自Icu_h,然后调用其父类Kobject_h的invoke函数
void invoke(L4_obj_ref self, L4_fpage::Rights rights, Syscall_frame *f, Utcb *u) { L4_msg_tag res(no_reply()); if (EXPECT_TRUE(self.op() & L4_obj_ref::Ipc_send)) res = static_cast<T*>(this)->T::kinvoke(self, rights, f, (Utcb const *)u, self.have_recv() ? u : Kobject_helper_base::utcb_dummy()); //模板T中的kinvoke函数
if (EXPECT_FALSE(res.has_error())) { f->tag(res); return; }
if (self.have_recv()) //如果有消息要接收,则调用do_ipc进行处理 { if (!res.do_switch()) { Thread *t = current_thread(); Sender *s = (self.op() & L4_obj_ref::Ipc_open_wait) ? 0 : _sender(t, static_cast<T*>(this)); t->do_ipc(f->tag(), 0, 0, true, s, f->timeout(), f, rights); return; } else f->tag(res); } } |
kinvoke函数
PUBLIC template< typename REAL_ICU > L4_msg_tag Icu_h<REAL_ICU>::kinvoke(L4_obj_ref ref, L4_fpage::Rights rights, Syscall_frame *f, Utcb const *in, Utcb *out) { L4_msg_tag tag = f->tag();
if (EXPECT_FALSE(tag.proto() != L4_msg_tag::Label_irq)) return Kobject_iface::commit_result(-L4_err::EBadproto);
return icu_invoke(ref, rights, f, in, out); } |
PUBLIC template< typename REAL_ICU > inline L4_msg_tag Icu_h<REAL_ICU>::icu_invoke(L4_obj_ref, L4_fpage::Rights /*rights*/, Syscall_frame *f, Utcb const *utcb, Utcb *out) { L4_msg_tag tag = f->tag();
switch (utcb->values[0]) { //在前面调用l4_icu_bind_u时,这个值为l4_ICU_OP_BIND= 0 case Op_bind: return Msg_icu_bind::call(this_icu(), tag, utcb, out);
case Op_unbind: return Msg_icu_unbind::call(this_icu(), tag, utcb, out);
case Op_info: return Msg_icu_get_info::call(this_icu(), tag, utcb, out);
case Op_msi_info: return Msg_icu_msi_info::call(this_icu(), tag, utcb, out);
case Op_unmask: case Op_mask: if (tag.words() < 2) return Kobject_h<REAL_ICU>::no_reply();
this_icu()->icu_mask_irq(utcb->values[0] == Op_mask, utcb->values[1]); return Kobject_h<REAL_ICU>::no_reply();
case Op_set_mode: return Msg_icu_set_mode::call(this_icu(), tag, utcb, out);
default: return Kobject_iface::commit_result(-L4_err::ENosys); } } |
Msg_icu_bind::call函数
Msg_icu_bind是通过宏定义的拼接类,见RPC定义:
L4_RPC(Op_bind, icu_bind, (Mword irqnum, Ko::Cap<Irq> irq));
/// RPC Message for the function arguments of the given signature type template<typename SIG> struct Sig_msg; template<typename ...ARGS> struct Sig_msg<L4_msg_tag (ARGS...)> : Msg<ARGS...> {}; /** * Define a message class `Msg_##name` for the given arguments. */ #define L4_RPC(opcode, name, fargs...) \ struct Msg_##name : Ko::Sig_msg<L4_msg_tag fargs> \ { \ enum : Mword { Op = opcode }; \ template<typename OBJ> struct Fwd \ { \ OBJ *o; \ constexpr Fwd(OBJ *o) : o(o) {} \ template<typename ...ARGS> \ L4_msg_tag operator () (ARGS ...a) const \ { return o->op_##name(a...); } \ }; \ template<typename OBJ, typename ...ARGS> \ static L4_msg_tag call(OBJ *o, L4_msg_tag tag, \ Utcb const *in, Utcb *out, \ ARGS &&...args) \ { \ typedef Ko::Sig_msg<L4_msg_tag fargs> Self; \ return Self::call(tag, in, out, Fwd<OBJ>(o), \ cxx::forward<ARGS>(args)...); \ } \ }; |
可以看出,Msg_icu_bind::call函数会调用Ko::Sig_msg<L4_msg_tag fargs>的call函数;而Sig_msg继承Msg类,调用其call函数,并将 Fwd<OBJ>(o)作为func传入,如下:
template<typename F, typename ...EXTRA> static L4_msg_tag call(L4_msg_tag tag, Utcb const *in, Utcb *out, F &&func, EXTRA &&...args) { Self msg; if (!msg.read(&tag, in, out)) return tag;
L4_msg_tag t = Detail::Call<Msg>::call(msg, func, cxx::forward<EXTRA>(args)...);
// !do_switch means do not send an answer... if (EXPECT_FALSE(t.has_error() || !t.do_switch())) return t;
if (EXPECT_FALSE(t.proto() < 0)) return t;
return L4_msg_tag(message_words(msg.out_total_size), msg.out_total_items, t.flags(), t.proto()); } |
最终会调用func函数,下面看一下传入的func函数——Fwd<OBJ>(o),其中OBJ被实例化为this_icu()(见前面icu_invoke函数),即Icu对象;而struct Fwd从前面的宏定义,可以看出,会调用一个拼接函数——o->op_##name(a...),即:Icu::op_icu_bind函数
template<typename OBJ> struct Fwd \ { \ OBJ *o; \ constexpr Fwd(OBJ *o) : o(o) {} \ template<typename ...ARGS> \ L4_msg_tag operator () (ARGS ...a) const \ { return o->op_##name(a...); } \ }; |
Icu::op_icu_bind函数
L4_msg_tag Icu::op_icu_bind(unsigned irqnum, Ko::Cap<Irq> const &irq) { if (!Ko::check_rights(irq.rights, Ko::Rights::CW())) return commit_result(-L4_err::EPerm);
auto g = lock_guard(irq.obj->irq_lock()); irq.obj->unbind();
if (!Irq_mgr::mgr->alloc(irq.obj, irqnum)) return commit_result(-L4_err::EPerm);
return commit_result(0); } |
Irq_mgr::alloc函数
PUBLIC inline bool Irq_mgr::alloc(Irq_base *irq, Mword global_irq) { Irq i = chip(global_irq); if (!i.chip) return false;
if (i.chip->alloc(irq, i.pin)) { i.chip->set_cpu(i.pin, Cpu_number::boot_cpu()); return true; } return false; } |
该函数的主要代码如下所示,首先调用Irq_mgr_multi_chip<Bits_per_entry>::chip()获取Irq_mgr::Irq对象,其维护了中断号与Irq_chip_icu的关联关系。再调用Irq_chip_gen::alloc()设置Irq_chip_gen的_irqs数组,将中断号与对应Irq_base进行关联。最终调用Gic::set_cpu()在GIC中对中断进行设置,并将中断绑定到CPU0上。
通过如上操作后就已经将外设中断设置好了,当发生外设中断时会先在fiasco内核中处理,然后再向Irq_sender的_irq_thread指定的线程发IPC消息。