[转载]qemu-kvm virtio 虚拟化-----Linux客户机 virtio设备初始化

virtio设备物理上连接在pci物理总线上,逻辑上连接在virtio虚拟总线。做为pci设备便于资源分配与配置,逻辑设备模型中,便于管理与组织。
1.qemu-kvm提供的virtio pci设备
virtio-blk(硬盘),virtio-net(网络),virtio-balloon(气球)等pci设备,这些设备连接在pci总线上。代码位于qemu: hw/virtio-pci.c
static PCIDeviceInfo virtio_info[] = {
    {
        .qdev.name = "virtio-blk-pci",
    },{
        .qdev.name  = "virtio-net-pci",
    },{
        .qdev.name = "virtio-serial-pci",
    },{
        .qdev.name = "virtio-balloon-pci",
       },
}
static void virtio_pci_register_devices(void)
{
    pci_qdev_register_many(virtio_info);
}

2.客户机PCI设备进行枚举和资源分配
当Linux客户机系统启动时,对PCI设备进行枚举和资源分配(配置PCI的配置空间),通常由BIOS完成。不过对Linux系统提供方式,一种由BIOS实现,另一种自己实现枚举和资源分配功能。代码位于kernel:arch/x86/pci/init.c
static __init int pci_arch_init(void)
{
#ifdef CONFIG_PCI_DIRECT
        int type = 0;

        type = pci_direct_probe();
#endif

#endif
#ifdef CONFIG_PCI_BIOS
        pci_pcbios_init();
#endif
}
真正设备枚举和资源分配由这里开始
static int __init pci_legacy_init(void)
{
        printk("PCI: Probing PCI hardware\n");
        pci_root_bus = pcibios_scan_root(0);
        if (pci_root_bus)
                pci_bus_add_devices(pci_root_bus);

        return 0;
}
pcibios_scan_root()---->pci_scan_bus_parented()---->pci_scan_child_bus()--->pci_scan_slot()--->pci_scan_single_device()----->pci_device_add()
将PCI总线上的设备添加到链表
void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
{
        /*
         * Add the device to our list of discovered devices
         * and the bus list for fixup functions, etc.
         */
        down_write(&pci_bus_sem);
        list_add_tail(&dev->bus_list, &bus->devices);
        up_write(&pci_bus_sem);
}
上述过程执行完成,在/sys/devices/pci0000:00目录下,创建virtio pci设备。并且在/sys/bus/pci/devices/目录下,创建相应对于pci设备的符号连接,同时在/sys/bus/pci /drivers/目录下,创建virtio-pci目录,目录下存在支持设备符号连接文件。

3.virtio总线定义与注册,virtio总线为虚拟的总线,目的为了设备管理与组织需要。代码位于:
static struct bus_type virtio_bus = {
        .name  = "virtio",
        .match = virtio_dev_match,
        .dev_attrs = virtio_dev_attrs,
        .uevent = virtio_uevent,
        .probe = virtio_dev_probe,
        .remove = virtio_dev_remove,
};

static int virtio_init(void)
{
        if (bus_register(&virtio_bus) != 0)
                panic("virtio bus registration failed");
        return 0;
}   
上述注册函数调用执行完成,在/sys/bus/目录下,创建了一个新的目录virtio,在该目录下同时创建了两个文件夹为devices和drivers。表示创建virtio总线,总线支持设备与驱动devices和drivers目录下。  

4. virtio-pci设备驱动加载
static struct pci_driver virtio_pci_driver = {
        .name           = "virtio-pci",
        .id_table       = virtio_pci_id_table,
        .probe          = virtio_pci_probe,
        .remove         = virtio_pci_remove,
#ifdef CONFIG_PM
        .suspend        = virtio_pci_suspend,
        .resume         = virtio_pci_resume,
#endif
};
static int __init virtio_pci_init(void)
{
        
        virtio_pci_root = root_device_register("virtio-pci");
        err = pci_register_driver(&virtio_pci_driver);
        return err;
}
上述注册函数调用执行完成,在/sys/bus/pci/drivers和/sys/devices目录下创建了virtio-pci文件夹

5,virtio总线子设备注册
上面步骤2,对PCI设备进行枚举和资源分配中介绍了,枚举的设备,已经关联到总线链表中。对函数调用 pci_register_driver(&virtio_pci_driver)就是对链表的每一个pci设备进行探测,该驱动是否支持该设 备,如果支持进,调用驱动probe函数,完成启用该pci设备,同时在virtio总线进行注册设备。
        bus_for_each_dev(drv->bus, NULL, drv, __driver_attach);
        if (drv->probe) {
                ret = drv->probe(dev);
        }

static int __devinit virtio_pci_probe(struct pci_dev *pci_dev,
                                      const struct pci_device_id *id)
{
        struct virtio_pci_device *vp_dev;
        int err;
        
        /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */
        if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f)
                return -ENODEV;
        
        /* allocate our structure and fill it out */
        vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL);
        if (vp_dev == NULL)
                return -ENOMEM;

        vp_dev->vdev.dev.parent = virtio_pci_root;
        vp_dev->vdev.dev.release = virtio_pci_release_dev;
        vp_dev->vdev.config = &virtio_pci_config_ops;
        vp_dev->pci_dev = pci_dev;
        INIT_LIST_HEAD(&vp_dev->virtqueues);
        spin_lock_init(&vp_dev->lock);
        /* Disable MSI/MSIX to bring device to a known good state. */
        pci_msi_off(pci_dev);

        /* enable the device */
        err = pci_enable_device(pci_dev);
        if (err)
                goto out;

        err = pci_request_regions(pci_dev, "virtio-pci");
        if (err)
                goto out_enable_device;

        vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0);
        if (vp_dev->ioaddr == NULL)
                goto out_req_regions;

        pci_set_drvdata(pci_dev, vp_dev);

        /* we use the subsystem vendor/device id as the virtio vendor/device
         * id.  this allows us to use the same PCI vendor/device id for all
         * virtio devices and to identify the particular virtio driver by
         * the subsytem ids */
        vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
        vp_dev->vdev.id.device = pci_dev->subsystem_device;
        /* finally register the virtio device */
        err = register_virtio_device(&vp_dev->vdev);
        if (err)
                goto out_set_drvdata;

        return 0;

}
上述注册函数调用执行完成,/sys/devices/virtio-pci/创建相应子设备{virtio1,virtio2,virtio3},同时 在/sys/bus/virtio/devices下面创建三个符号连接文件{virtio1,virtio2,virtio3}

6. virtio总线子设备驱动注册。
当virtio总线进行注册设备register_virtio_device,将调用virtio总线的probe函数:virtio_dev_probe()。该函数遍历驱动,找到支持驱动关联到该设备。
   register_virtio_device()--->bus_probe_device()---->device_attach();
   bus_for_each_drv(dev->bus, NULL, dev, __device_attach);
      if (dev->bus->probe) {
                ret = dev->bus->probe(dev);
        }

static int virtio_dev_probe(struct device *_d)
{       
        int err, i;
        struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
        struct virtio_driver *drv = container_of(dev->dev.driver,
                                                 struct virtio_driver, driver);
        u32 device_features;

        /* We have a driver! */
        add_status(dev, VIRTIO_CONFIG_S_DRIVER);

        /* Figure out what features the device supports. */
        device_features = dev->config->get_features(dev);
        
        /* Features supported by both device and driver into dev->features. */
        memset(dev->features, 0, sizeof(dev->features));
        for (i = 0; i < drv->feature_table_size; i++) {
                unsigned int f = drv->feature_table[i];
                BUG_ON(f >= 32);
                if (device_features & (1 << f))
                        set_bit(f, dev->features);
        }

        /* Transport features always preserved to pass to finalize_features. */
        for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++)
                if (device_features & (1 << i))
                        set_bit(i, dev->features);

        dev->config->finalize_features(dev);
       err = drv->probe(dev);
        if (err)
                add_status(dev, VIRTIO_CONFIG_S_FAILED);
        else
                add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);

        return err;
}
//virtio_balloon设备驱动实例
static struct virtio_driver virtio_balloon_driver = {
        .feature_table = features,
        .feature_table_size = ARRAY_SIZE(features),
        .driver.name =  KBUILD_MODNAME,
        .driver.owner = THIS_MODULE,
        .id_table =     id_table,
        .probe =        virtballoon_probe,
        .remove =       __devexit_p(virtballoon_remove),
        .config_changed = virtballoon_changed,
};

static int __init init(void)
{
        return register_virtio_driver(&virtio_balloon_driver);
}      
同时在/sys/bus/virtio/drivers下面创建三个文件{virtio_balloon,virtio_blk,virtio_console},并且与设备发生关联
//////////////////////////////
热插拔事件的产生往往是由总线驱动级的逻辑处理,所以总线一般提供事件发送函数。例如virtio总线事件函数virtio_uevent。
static int virtio_uevent(struct device *_dv, struct kobj_uevent_env *env)
{
        struct virtio_device *dev = container_of(_dv,struct virtio_device,dev);

        return add_uevent_var(env, "MODALIAS=virtio:d%08Xv%08X",
                              dev->id.device, dev->id.vendor);
}
下面函数工作流程如下:
1.由设备对象往上查找,直到找到包含kset的kobject(总线包含着kset)
2.判断kobject对象是否提供filter,name,uevent函数,如果提供,调用它。
3.分配一个kobj_uevent_env,并开始填充env环境变量:ACTION,DEVPATH,SUBSYSTEM,SEQNUM,MODALIAS
4.通过netlink发送到用户空间
register_virtio_device()---->device_register()---->device_add()---->kobject_uevent()---->kobject_uevent_env()
int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
                       char *envp_ext[])
{
      /* search the kset we belong to */
        top_kobj = kobj;
        while (!top_kobj->kset && top_kobj->parent)
                top_kobj = top_kobj->parent;
        kset = top_kobj->kset;
        uevent_ops = kset->uevent_ops;

        /* skip the event, if the filter returns zero. */
        if (uevent_ops && uevent_ops->filter)
                if (!uevent_ops->filter(kset, kobj)) {
                        pr_debug("kobject: ‘%s‘ (%p): %s: filter function "
                                 "caused the event to drop!\n",
                                 kobject_name(kobj), kobj, __func__);
                        return 0;
                }

        /* originating subsystem */
        if (uevent_ops && uevent_ops->name)
                subsystem = uevent_ops->name(kset, kobj);
        else
                subsystem = kobject_name(&kset->kobj);
       /* environment buffer */
        env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL);
        if (!env)
                return -ENOMEM;

        /* complete object path */
        devpath = kobject_get_path(kobj, GFP_KERNEL);
        if (!devpath) {
                retval = -ENOENT;
                goto exit;
        }

        /* default keys */
        retval = add_uevent_var(env, "ACTION=%s", action_string);
        if (retval)
                goto exit;
        retval = add_uevent_var(env, "DEVPATH=%s", devpath);
        if (retval)
                goto exit;
        retval = add_uevent_var(env, "SUBSYSTEM=%s", subsystem);
        if (retval)
                goto exit;

        /* keys passed in from the caller */
        if (envp_ext) {
                for (i = 0; envp_ext[i]; i++) {
                        retval = add_uevent_var(env, "%s", envp_ext[i]);
                        if (retval)
                                goto exit;
                }
        }
         /* let the kset specific function add its stuff */
        if (uevent_ops && uevent_ops->uevent) {
                retval = uevent_ops->uevent(kset, kobj, env);
                if (retval) {
                        pr_debug("kobject: ‘%s‘ (%p): %s: uevent() returned "
                                 "%d\n", kobject_name(kobj), kobj,
                                 __func__, retval);
                        goto exit;
                }
        }

        /*
         * Mark "add" and "remove" events in the object to ensure proper
         * events to userspace during automatic cleanup. If the object did
         * send an "add" event, "remove" will automatically generated by
         * the core, if not already done by the caller.
         */
        if (action == KOBJ_ADD)
                kobj->state_add_uevent_sent = 1;
        else if (action == KOBJ_REMOVE)
                kobj->state_remove_uevent_sent = 1;
    /* we will send an event, so request a new sequence number */
        spin_lock(&sequence_lock);
        seq = ++uevent_seqnum;
        spin_unlock(&sequence_lock);
        retval = add_uevent_var(env, "SEQNUM=%llu", (unsigned long long)seq);
        if (retval)
                goto exit;
          /* send netlink message */
        if (uevent_sock) {
                struct sk_buff *skb;
                size_t len;

                /* allocate message with the maximum possible size */
                len = strlen(action_string) + strlen(devpath) + 2;
                skb = alloc_skb(len + env->buflen, GFP_KERNEL);
                if (skb) {
                        char *scratch;

                        /* add header */
                        scratch = skb_put(skb, len);
                        sprintf(scratch, "%s@%s", action_string, devpath);

                        /* copy keys to our continuous event payload buffer */
                        for (i = 0; i < env->envp_idx; i++) {
                                len = strlen(env->envp[i]) + 1;
                                scratch = skb_put(skb, len);
                                strcpy(scratch, env->envp[i]);
                        }

                        NETLINK_CB(skb).dst_group = 1;
                        retval = netlink_broadcast(uevent_sock, skb, 0, 1,
                                                   GFP_KERNEL);
                        /* ENOBUFS should be handled in userspace */
                        if (retval == -ENOBUFS)
                                retval = 0;
                } else
                        retval = -ENOMEM;
        }
用户空间
    当发送信息达到了用户空间,用户空间的udevd守护进程,接受到此信息。在udev规则文件里匹配,相应的规则。

 

文章来源:http://blog.csdn.net/zhuriyuxiao/article/details/9357823

郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。