Linux内核设计的艺术-关于缓冲块的进程等待队列

浏览数：49 / 时间：2015年06月20日

进程A是一个读盘进程，目的是将hello.txt文件中的100字节读入buffer[100]。

代码如下：

void FunA();
void main()
{
	...
	FunA();
	...
}

void FunA()
{
	char buffer[100];
	int i,j;
	int fd = open("/mnt/user/user1/user2/hello.txt",O_RDWR,0644);
	read(fd,buffer,sizeof(buffer));
	close(fd);
	
	for(i=0;i<1000000;i++)
	{
		for(j=0;i<100000;j++)
		{
			;
		}
	}
}

进程B也是一个读盘进程，目的是将hello.txt文件中的200字节读入buffer[200]。

void FunB();
void main()
{
	...
	FunB();
	...
}

void FunB()
{
	char buffer[200];
	int i,j;
	int fd = open("/mnt/user/user1/user2/hello.txt",O_RDWR,0644);
	read(fd,buffer,sizeof(buffer));
	close(fd);
	
	for(i=0;i<1000000;i++)
	{
		for(j=0;i<100000;j++)
		{
			;
		}
	}
}

进程C是一个写盘进程，目的是往hello.txt文件中写入str[]中的字符“ABCDE”。

代码如下：

void FunC();
void main()
{
	...
	FunC();
	...
}

void FunC()
{
	char str1[]="ABCDE";
	int i,j;
	int fd = open("/mnt/user/user1/user2/hello.txt",O_RDWR,0644);
	write(fd,str1,strlen(str1));
	close(fd);
	
	for(i=0;i<1000000;i++)
	{
		for(j=0;i<100000;j++)
		{
			;
		}
	}
}

这三个进程执行顺序为：进程A先执行，之后进程B执行，最后进程C执行。这三个进程没有父子关系。

进程A启动后，执行open函数，最终会映射到sys_open函数区执行。

代码路径：fs/open.c

nt sys_open(const char * filename,int flag,int mode)
{
	.../寻找空闲的file，和inode
	(current->filp[fd]=f)->f_count++;
	if ((i=open_namei(filename,flag,mode,&inode))<0) {
	        ...
	}
        ...
	f->f_mode = inode->i_mode;
	f->f_flags = flag;
	f->f_count = 1;
	f->f_inode = inode;
	f->f_pos = 0;
	return (fd);
}

之后开始执行read函数，read函数最终会映射到sys_read()函数去执行。

代码路径：fs/read_write.c

int sys_read(unsigned int fd,char * buf,int count)
{
	struct file * file;
	struct m_inode * inode;

	if (fd>=NR_OPEN || count<0 || !(file=current->filp[fd]))
		return -EINVAL;
	...
	inode = file->f_inode;
	...
	if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode)) {
		if (count+file->f_pos > inode->i_size)
			count = inode->i_size - file->f_pos;
		if (count<=0)
			return 0;
		return file_read(inode,file,buf,count);
	}
	printk("(Read)inode->i_mode=%06o\n\r",inode->i_mode);
	return -EINVAL;
}

之后sys_read函数调用file_read()来读取文件内容。

代码路径：fs/file_dev.c

int file_read(struct m_inode * inode, struct file * filp, char * buf, int count)
{
	int left,chars,nr;
	struct buffer_head * bh;

	if ((left=count)<=0)
		return 0;
	while (left) {
		if ((nr = bmap(inode,(filp->f_pos)/BLOCK_SIZE))) {
			if (!(bh=bread(inode->i_dev,nr)))
				break;
		} else
			bh = NULL;
		nr = filp->f_pos % BLOCK_SIZE;
		chars = MIN( BLOCK_SIZE-nr , left );
		filp->f_pos += chars;
		left -= chars;
		if (bh) {
			char * p = nr + bh->b_data;
			while (chars-->0)
				put_fs_byte(*(p++),buf++);
			brelse(bh);
		} else {
			while (chars-->0)
				put_fs_byte(0,buf++);
		}
	}
	inode->i_atime = CURRENT_TIME;
	return (count-left)?(count-left):-ERROR;
}

file_read()函数调用bread()函数从硬盘上读取数据。

代码路径：fs/buffer.c

struct buffer_head * bread(int dev,int block)
{
	struct buffer_head * bh;

	if (!(bh=getblk(dev,block)))//申请了一个空闲的缓冲块
		panic("bread: getblk returned NULL\n");
	if (bh->b_uptodate)//uptodate为0
		return bh;
	ll_rw_block(READ,bh);//将缓冲块加锁并与请求项绑定，发送读盘指令
	wait_on_buffer(bh);
	if (bh->b_uptodate)
		return bh;
	brelse(bh);
	return NULL;
}

getblk申请了一个空闲的缓冲块，然后调用ll_rw_block，将缓冲块加锁并与请求项绑定，发送读盘指令。
代码路径：kernel/blk_drv/ll_rw_block.c

void ll_rw_block(int rw, struct buffer_head * bh)
{
	unsigned int major;

	if ((major=MAJOR(bh->b_dev)) >= NR_BLK_DEV ||
	!(blk_dev[major].request_fn)) {
		printk("Trying to read nonexistent block-device\n\r");
		return;
	}
	make_request(major,rw,bh);
}

static void make_request(int major,int rw, struct buffer_head * bh)
{
	...
	lock_buffer(bh);//加锁
        if ((rw == WRITE && !bh->b_dirt) || (rw == READ && bh->b_uptodate)) {//为写且不脏，或者读且uptodate为1，则直接返回，进程B和进程C就是这种情况 
            unlock_buffer(bh);  
            return;  
        } 
	...
	add_request(major+blk_dev,req);//发送读盘请求
}

static inline void lock_buffer(struct buffer_head * bh)
{
	cli();
	while (bh->b_lock)//如果已经上了锁，就要等待了
		sleep_on(&bh->b_wait);
	bh->b_lock=1;
	sti();
}

加锁，发出请求后，硬盘就开始工作了，把硬盘中的数据独到硬盘缓冲区，每读完一块就会发出中断。

程序接着往下执行到wait_on_buffer。

代码路径：fs/buffer.c

static inline void wait_on_buffer(struct buffer_head * bh)
{
	cli();
	while (bh->b_lock)//已经上锁了
		sleep_on(&bh->b_wait);
	sti();
}

代码路径：kernel/sched.c

void sleep_on(struct task_struct **p)
{
	struct task_struct *tmp;

	if (!p)
		return;
	if (current == &(init_task.task))
		panic("task[0] trying to sleep");
	tmp = *p;//此时tmp中保存的是NULL
	*p = current;//bh->wait保存的是进程A的task_struct指针
	current->state = TASK_UNINTERRUPTIBLE;//将进程A设置为不可中断等待状态
	schedule();//切换进程
	if (tmp)
		tmp->state=0;
}

进程A被挂起后，调用schedule，切换到进程B执行。与此同时，硬盘也正在向数据寄存器端口中传递数据。

进程B的执行流程与进程A大致一致，不过open_namei中获取的hello.txt文件的i节点有所不同，找到了现成的hello.txt文件的i节点，引用计数增加。

还有一处不同点就是getblk，申请缓冲块，此时在哈希表中可以找到指定的缓冲块，直接返回。

执行的ll_rw_block时，然后执行make_request，然后执行lock_buffer时，代码如下：

static inline void lock_buffer(struct buffer_head * bh)
{
	cli();
	while (bh->b_lock)//如果已经上了锁，就要等待了
		sleep_on(&bh->b_wait);
	bh->b_lock=1;
	sti();
}

void sleep_on(struct task_struct **p)
{
	struct task_struct *tmp;

	if (!p)
		return;
	if (current == &(init_task.task))
		panic("task[0] trying to sleep");
	tmp = *p;//此时tmp中保存的是进程A的task_struct指针
	*p = current;//bh->wait保存的是进程B的task_struct指针
	current->state = TASK_UNINTERRUPTIBLE;//将进程B设置为不可中断等待状态
	schedule();//切换进程
	if (tmp)
		tmp->state=0;
}

然后，切换到进程C，与此同时，硬盘也正在向数据寄存器端口中传递数据。

进程C的大致流程和进程B都一样，只不过write调用sys_write，然后又调用file_write。代码如下：

代码路径：fs/file_dev.c

int file_write(struct m_inode * inode, struct file * filp, char * buf, int count)
{
	off_t pos;
	int block,c;
	struct buffer_head * bh;
	char * p;
	int i=0;

/*
 * ok, append may not work when many processes are writing at the same time
 * but so what. That way leads to madness anyway.
 */
	if (filp->f_flags & O_APPEND)
		pos = inode->i_size;
	else
		pos = filp->f_pos;
	while (i<count) {
		if (!(block = create_block(inode,pos/BLOCK_SIZE)))
			break;
		if (!(bh=bread(inode->i_dev,block)))
			break;
		c = pos % BLOCK_SIZE;
		p = c + bh->b_data;
		bh->b_dirt = 1;
		c = BLOCK_SIZE-c;
		if (c > count-i) c = count-i;
		pos += c;
		if (pos > inode->i_size) {
			inode->i_size = pos;
			inode->i_dirt = 1;
		}
		i += c;
		while (c-->0)
			*(p++) = get_fs_byte(buf++);
		brelse(bh);
	}
	inode->i_mtime = CURRENT_TIME;
	if (!(filp->f_flags & O_APPEND)) {
		filp->f_pos = pos;
		inode->i_ctime = CURRENT_TIME;
	}
	return (i?i:-1);
}

与进程B一样，bread->ll_rw_block->make_request->lock_buffer->sleep_on，代码如下：

void sleep_on(struct task_struct **p)
{
	struct task_struct *tmp;

	if (!p)
		return;
	if (current == &(init_task.task))
		panic("task[0] trying to sleep");
	tmp = *p;//此时tmp中保存的是进程B的task_struct指针
	*p = current;//bh->wait保存的是进程C的task_struct指针
	current->state = TASK_UNINTERRUPTIBLE;//将进程C设置为不可中断等待状态
	schedule();//切换进程
	if (tmp)
		tmp->state=0;
}

进程C被挂起后，调用schedule函数，此时系统中已经没有就绪的进程了，因此切换到进程0执行。与此同时，硬盘也正在向数据寄存器端口中传递数据。

此时进程A、进程B、进程C都已经被挂起了，系统中所有的进程又都处于非就绪态了。所以默认切换到进程0去执行，知道数据读取完毕，硬盘产生中断。

硬盘中断产生后，中断服务程序将开始工作，此时硬盘已经将指定的数据全部载入缓冲块。中断服务程序开始工作后，将bh缓冲块解锁，并调用wake_up函数，将bh中wait字段所对应的进程（进程C）唤醒。执行代码如下：

代码路径：kernel/blk_drv/blk.h

static inline void end_request(int uptodate)
{
	DEVICE_OFF(CURRENT->dev);
	if (CURRENT->bh) {
		CURRENT->bh->b_uptodate = uptodate;//update为1
		unlock_buffer(CURRENT->bh);//执行这里，将缓冲块解锁
	}
	if (!uptodate) {
		printk(DEVICE_NAME " I/O error\n\r");
		printk("dev %04x, block %d\n\r",CURRENT->dev,
			CURRENT->bh->b_blocknr);
	}
	wake_up(&CURRENT->waiting);
	wake_up(&wait_for_request);
	CURRENT->dev = -1;
	CURRENT = CURRENT->next;
}

代码路径：kernel/blk_drv/ll_rw_blk.c

static inline void unlock_buffer(struct buffer_head * bh)
{
	if (!bh->b_lock)
		printk("ll_rw_block.c: buffer not locked\n\r");
	bh->b_lock = 0;
	wake_up(&bh->b_wait);
}

代码路径：kernel/sched.c

void wake_up(struct task_struct **p)
{
	if (p && *p) {
		(**p).state=0;//这里将进程C设置为就绪态
		*p=NULL;//bh->wait为NULL
	}
}

中断服务程序结束后，再次返回进程0中，并切换到就绪的进程C，进程C是在sleep_on函数中，调用了schedule函数进程进程切换到，因为接着往下执行：

void sleep_on(struct task_struct **p)
{
	...
	current->state = TASK_UNINTERRUPTIBLE;//将进程C设置为不可中断等待状态
	schedule();//切换进程
	if (tmp)
		tmp->state=0;//将进程B设置为就绪态
}

进程C，接着执行make_request，正如上面代码注释的一样，此时是写且非脏，所以bread函数返回，执行真正的写代码，如下：

while (c-->0)
			*(p++) = get_fs_byte(buf++);

之后返回进程C的用户程序，消耗时间片

	for(i=0;i<1000000;i++)
	{
		for(j=0;i<100000;j++)
		{
			;
		}
	}

进程C的时间片消减为0，要切换进程，现在只有B和C处于就绪态，进程C的时间片用完了，所以切换到进程B。进程B也是sleep_on函数中被切换的，请看下面的代码：

void sleep_on(struct task_struct **p)
{
	...
	current->state = TASK_UNINTERRUPTIBLE;//将进程B设置为不可中断等待状态
	schedule();//切换进程
	if (tmp)
		tmp->state=0;//将进程A设置为就绪态
}

进程B，接着执行make_request，正如上面代码注释的一样，此时是读且uptodate为1，所以bread函数返回，执行真正的读代码，如下：

while (chars-->0)
				put_fs_byte(*(p++),buf++);

之后返回进程C的用户程序，消耗时间片，直到为0，此时就切换到进程A了。同样是在sleep_on函数切走的。

void sleep_on(struct task_struct **p)
{
	...
	current->state = TASK_UNINTERRUPTIBLE;//将进程A设置为不可中断等待状态
	schedule();//切换进程
	if (tmp)//此时已经为NULL了
		tmp->state=0;
}

进程A，执行完wait_on_buffer，所以bread函数返回，执行真正的读代码，如下：

while (chars-->0)
				put_fs_byte(*(p++),buf++);

至此，就全部分析完了。

Linux内核设计的艺术-关于缓冲块的进程等待队列,古老的榕树,5-wow.com

郑重声明：本站内容如果来自互联网及其他传播媒体，其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享，并不代表本站赞同其观点和对其真实性负责，也不构成任何其他建议。

Linux内核设计的艺术-关于缓冲块的进程等待队列

标签： color class 代码 blog tr 表文件 nb sp ip static color class 代码 blog tr 表文件 nb sp ip static

Linux内核设计的艺术-关于缓冲块的进程等待队列

相关文章

随机文章

您可能还喜欢

您可能还喜欢

最新图文

您可能还喜欢

您可能还喜欢

文摘排行

文章排行

推荐文章

图文排行

推荐图文