go语言的GC

mark&sweep, 2分钟保证至少一次GC过程,如果分配的总内存超过上次分配的总内存一定比例(默认100%)后进行一次GC
进行mark的过程中,会停止一切G的运行,mark的过程是多任务并发的
sweep的过程是分散的

mark过程

整个程序内存块包括 .data, .bss, 每个G的stack, SpecialFinalizer
每段内存都有其相应的bitmap,用来表示每个word(8BYTE)的标志位,每word需要4bit的标志位
mark的过程就是递归遍历内存块bitmap的过程
word标志位有如下3种类型:

  1. 标量
  2. 指针
  3. 连续两个word表示一个iface或eface

    // scanblock scans a block of n bytes starting at pointer b for references
    // to other objects, scanning any it finds recursively until there are no
    // unscanned objects left. Instead of using an explicit recursion, it keeps
    // a work list in the Workbuf* structures and loops in the main function
    // body. Keeping an explicit work list is easier on the stack allocator and
    // more efficient.
    static void
    scanblock(byte b, uintptr n, byte ptrmask)
    {
    byte obj, obj0, p, arena_start, *arena_used, **wp, scanbuf[8], ptrbitp, bitp;
    uintptr i, j, nobj, size, idx, x, off, scanbufpos, bits, xbits, shift;
    Workbuf
    wbuf;
    Iface iface;
    Eface
    eface;
    Type typ;
    MSpan
    s;
    pageID k;
    bool keepworking;

    // Cache memory arena parameters in local vars.
    arena_start = runtime·mheap.arena_start;
    arena_used = runtime·mheap.arena_used;
    
    wbuf = getempty(nil);
    nobj = wbuf->nobj;
    wp = &wbuf->obj[nobj];
    keepworking = b == nil;
    scanbufpos = 0;
    for(i = 0; i < nelem(scanbuf); i++)
    	scanbuf[i] = nil;
    
    ptrbitp = nil;
    
    // ptrmask can have 2 possible values:
    // 1. nil - obtain pointer mask from GC bitmap.
    // 2. pointer to a compact mask (for stacks and data).
    if(b != nil)
    	goto scanobj;
    for(;;) {
    	if(nobj == 0) {
    		// Out of work in workbuf.
    		// First, see is there is any work in scanbuf.
    		for(i = 0; i < nelem(scanbuf); i++) {
    			b = scanbuf[scanbufpos];
    			scanbuf[scanbufpos++] = nil;
    			scanbufpos %= nelem(scanbuf);
    			if(b != nil) {
    				n = arena_used - b; // scan until bitBoundary or BitsDead
    				ptrmask = nil; // use GC bitmap for pointer info
    				goto scanobj;
    			}
    		}
    		if(!keepworking) {
    			putempty(wbuf);
    			return;
    		}
    		// Refill workbuf from global queue.
    		wbuf = getfull(wbuf);
    		if(wbuf == nil)
    			return;
    		nobj = wbuf->nobj;
    		wp = &wbuf->obj[nobj];
    	}
    
    	// If another proc wants a pointer, give it some.
    	if(runtime·work.nwait > 0 && nobj > 4 && runtime·work.full == 0) {
    		wbuf->nobj = nobj;
    		wbuf = handoff(wbuf);
    		nobj = wbuf->nobj;
    		wp = &wbuf->obj[nobj];
    	}
    
    	wp--;
    	nobj--;
    	b = *wp;
    	n = arena_used - b; // scan until next bitBoundary or BitsDead
    	ptrmask = nil; // use GC bitmap for pointer info
    
    scanobj:
    	if(DebugPtrs)
    		runtime·printf("scanblock %p +%p %p\n", b, n, ptrmask);
    	// Find bits of the beginning of the object.
    	if(ptrmask == nil) {
    		off = (uintptr*)b - (uintptr*)arena_start;
    		ptrbitp = arena_start - off/wordsPerBitmapByte - 1;
    	}
    	for(i = 0; i < n; i += PtrSize) {
    		obj = nil;
    		// Find bits for this word.
            ......
    
    		if(bits <= BitsScalar) // BitsScalar || BitsDead
    			continue;
    		if(bits == BitsPointer) {
    			obj = *(byte**)(b+i);
    			obj0 = obj;
    			goto markobj;
    		}
    
    		// With those three out of the way, must be multi-word.
    		if(Debug && bits != BitsMultiWord)
    			runtime·throw("unexpected garbage collection bits");
    		// Find the next pair of bits.
    		if(ptrmask == nil) {
    			bits = *ptrbitp;
    			j = ((uintptr)b+i+PtrSize)/PtrSize & 1;
    			ptrbitp -= j;
    			bits >>= gcBits*j;
    			bits = (bits>>2)&BitsMask;
    		} else
    			bits = (ptrmask[((i+PtrSize)/PtrSize)/4]>>((((i+PtrSize)/PtrSize)%4)*BitsPerPointer))&BitsMask;
    
    		if(Debug && bits != BitsIface && bits != BitsEface)
    			runtime·throw("unexpected garbage collection bits");
    
    		if(bits == BitsIface) {
    			iface = (Iface*)(b+i);
    			if(iface->tab != nil) {
    				typ = iface->tab->type;
    				if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
    					obj = iface->data;
    			}
    		} else {
    			eface = (Eface*)(b+i);
    			typ = eface->type;
    			if(typ != nil) {
    				if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
    					obj = eface->data;
    			}
    		}
    
    		i += PtrSize;
    
    		obj0 = obj;
    	markobj:
    		// At this point we have extracted the next potential pointer.
    		// Check if it points into heap.
    		if(obj == nil)
    			continue;
    		if(obj < arena_start || obj >= arena_used) {
    			if((uintptr)obj < PhysPageSize && runtime·invalidptr) {
    				s = nil;
    				goto badobj;
    			}
    			continue;
    		}
    		// Mark the object.
    		obj = (byte*)((uintptr)obj & ~(PtrSize-1));
    		off = (uintptr*)obj - (uintptr*)arena_start;
    		bitp = arena_start - off/wordsPerBitmapByte - 1;
    		shift = (off % wordsPerBitmapByte) * gcBits;
    		xbits = *bitp;
    		bits = (xbits >> shift) & bitMask;
    		if((bits&bitBoundary) == 0) {
    			// Not a beginning of a block, consult span table to find the block beginning.
                ......
    
    			obj = p;
    			goto markobj;
    		}
    		if(DebugPtrs)
    			runtime·printf("scan *%p = %p => base %p\n", b+i, obj0, obj);
    
    		if(nbadblock > 0 && (uintptr)obj == badblock[nbadblock-1]) {
    			// Running garbage collection again because
    			// we want to find the path from a root to a bad pointer.
    			// Found possible next step; extend or finish path.
    			for(j=0; j<nbadblock; j++)
    				if(badblock[j] == (uintptr)b)
    					goto AlreadyBad;
    			runtime·printf("runtime: found *(%p+%p) = %p+%p\n", b, i, obj0, (uintptr)(obj-obj0));
    			if(ptrmask != nil)
    				runtime·throw("bad pointer");
    			if(nbadblock >= nelem(badblock))
    				runtime·throw("badblock trace too long");
    			badblock[nbadblock++] = (uintptr)b;
    		AlreadyBad:;
    		}
    
    		// Now we have bits, bitp, and shift correct for
    		// obj pointing at the base of the object.
    		// Only care about not marked objects.
    		if((bits&bitMarked) != 0)
    			continue;
    		// If obj size is greater than 8, then each byte of GC bitmap
    		// contains info for at most one object. In such case we use
    		// non-atomic byte store to mark the object. This can lead
    		// to double enqueue of the object for scanning, but scanning
    		// is an idempotent operation, so it is OK. This cannot lead
    		// to bitmap corruption because the single marked bit is the
    		// only thing that can change in the byte.
    		// For 8-byte objects we use non-atomic store, if the other
    		// quadruple is already marked. Otherwise we resort to CAS
    		// loop for marking.
    		if((xbits&(bitMask|(bitMask<<gcBits))) != (bitBoundary|(bitBoundary<<gcBits)) ||
    			runtime·work.nproc == 1)
    			*bitp = xbits | (bitMarked<<shift);
    		else
    			runtime·atomicor8(bitp, bitMarked<<shift);
    
    		if(((xbits>>(shift+2))&BitsMask) == BitsDead)
    			continue;  // noscan object
    
    		// Queue the obj for scanning.
    		PREFETCH(obj);
    		p = scanbuf[scanbufpos];
    		scanbuf[scanbufpos++] = obj;
    		scanbufpos %= nelem(scanbuf);
    		if(p == nil)
    			continue;
    
    		// If workbuf is full, obtain an empty one.
    		if(nobj >= nelem(wbuf->obj)) {
    			wbuf->nobj = nobj;
    			wbuf = getempty(wbuf);
    			nobj = wbuf->nobj;
    			wp = &wbuf->obj[nobj];
    		}
    		*wp = p;
    		wp++;
    		nobj++;
    	}
        ............
    }

    }

    static void
    markroot(ParFor desc, uint32 i)
    {
    FinBlock
    fb;
    MSpan s;
    uint32 spanidx, sg;
    G
    gp;
    void *p;
    uint32 status;
    bool restart;

    USED(&desc);
    // Note: if you add a case here, please also update heapdump.c:dumproots.
    switch(i) {
    case RootData:
    	scanblock(runtime·data, runtime·edata - runtime·data, runtime·gcdatamask.bytedata);
    	break;
    
    case RootBss:
    	scanblock(runtime·bss, runtime·ebss - runtime·bss, runtime·gcbssmask.bytedata);
    	break;
    
    case RootFinalizers:
    	for(fb=runtime·allfin; fb; fb=fb->alllink)
    		scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), finptrmask);
    	break;
    
    case RootSpans:
    	// mark MSpan.specials
        ......
    	break;
    
    case RootFlushCaches:
    	flushallmcaches();
    	break;
    
    default:
    	// the rest is scanning goroutine stacks
    	if(i - RootCount >= runtime·allglen)
    		runtime·throw("markroot: bad index");
    	gp = runtime·allg[i - RootCount];
    	// remember when we've first observed the G blocked
    	// needed only to output in traceback
    	status = runtime·readgstatus(gp);
    	if((status == Gwaiting || status == Gsyscall) && gp->waitsince == 0)
    		gp->waitsince = runtime·work.tstart;
    	// Shrink a stack if not much of it is being used.
    	runtime·shrinkstack(gp);
    	if(runtime·readgstatus(gp) == Gdead) 
    		gp->gcworkdone = true;
    	else 
    		gp->gcworkdone = false; 
    	restart = runtime·stopg(gp);
    	scanstack(gp);
    	if(restart)
    		runtime·restartg(gp);
    	break;
    }

    }

sweep过程

sweep扫描span里面每一个对象是否marked,将未marked的对象放入span的freelist中
如果span中的所有对象都进入了freelist,那么会将span的内存释放到heap中。

// sweeps one span
// returns number of pages returned to heap, or -1 if there is nothing to sweep
uintptr
runtime·sweepone(void)
{
    MSpan *s;
    uint32 idx, sg;
    uintptr npages;
 
    // increment locks to ensure that the goroutine is not preempted
    // in the middle of sweep thus leaving the span in an inconsistent state for next GC
    g->m->locks++;
    sg = runtime·mheap.sweepgen;
    for(;;) {
    	idx = runtime·xadd(&runtime·sweep.spanidx, 1) - 1;
    	if(idx >= runtime·work.nspan) {
    		runtime·mheap.sweepdone = true;
    		g->m->locks--;
    		return -1;
    	}
    	s = runtime·work.spans[idx];
    	if(s->state != MSpanInUse) {
    		s->sweepgen = sg;
    		continue;
    	}
    	if(s->sweepgen != sg-2 || !runtime·cas(&s->sweepgen, sg-2, sg-1))
    		continue;
    	npages = s->npages;
    	if(!runtime·MSpan_Sweep(s, false))
    		npages = 0;
    	g->m->locks--;
    	return npages;
    }
}
 
// Sweep frees or collects finalizers for blocks not marked in the mark phase.
// It clears the mark bits in preparation for the next GC round.
// Returns true if the span was returned to heap.
// If preserve=true, don't return it to heap nor relink in MCentral lists;
// caller takes care of it.
bool
runtime·MSpan_Sweep(MSpan *s, bool preserve)
{
    int32 cl, n, npages, nfree;
    uintptr size, off, step;
    uint32 sweepgen;
    byte *p, *bitp, shift, xbits, bits;
    MCache *c;
    byte *arena_start;
    MLink head, *end, *link;
    Special *special, **specialp, *y;
    bool res, sweepgenset;
 
    // It's critical that we enter this function with preemption disabled,
    // GC must not start while we are in the middle of this function.
    if(g->m->locks == 0 && g->m->mallocing == 0 && g != g->m->g0)
    	runtime·throw("MSpan_Sweep: m is not locked");
    sweepgen = runtime·mheap.sweepgen;
    if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
    	runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
    		s->state, s->sweepgen, sweepgen);
    	runtime·throw("MSpan_Sweep: bad span state");
    }
    arena_start = runtime·mheap.arena_start;
    cl = s->sizeclass;
    size = s->elemsize;
    if(cl == 0) {
    	n = 1;
    } else {
    	// Chunk full of small blocks.
    	npages = runtime·class_to_allocnpages[cl];
    	n = (npages << PageShift) / size;
    }
    res = false;
    nfree = 0;
    end = &head;
    c = g->m->mcache;
    sweepgenset = false;
 
    // Mark any free objects in this span so we don't collect them.
    for(link = s->freelist; link != nil; link = link->next) {
    	off = (uintptr*)link - (uintptr*)arena_start;
    	bitp = arena_start - off/wordsPerBitmapByte - 1;
    	shift = (off % wordsPerBitmapByte) * gcBits;
    	*bitp |= bitMarked<<shift;
    }
 
    // Unlink & free special records for any objects we're about to free.
    specialp = &s->specials;
    special = *specialp;
    while(special != nil) {
    	// A finalizer can be set for an inner byte of an object, find object beginning.
    	p = (byte*)(s->start << PageShift) + special->offset/size*size;
    	off = (uintptr*)p - (uintptr*)arena_start;
    	bitp = arena_start - off/wordsPerBitmapByte - 1;
    	shift = (off % wordsPerBitmapByte) * gcBits;
    	bits = (*bitp>>shift) & bitMask;
    	if((bits&bitMarked) == 0) {
    		// Find the exact byte for which the special was setup
    		// (as opposed to object beginning).
    		p = (byte*)(s->start << PageShift) + special->offset;
    		// about to free object: splice out special record
    		y = special;
    		special = special->next;
    		*specialp = special;
    		if(!runtime·freespecial(y, p, size, false)) {
    			// stop freeing of object if it has a finalizer
    			*bitp |= bitMarked << shift;
    		}
    	} else {
    		// object is still live: keep special record
    		specialp = &special->next;
    		special = *specialp;
    	}
    }
 
    // Sweep through n objects of given size starting at p.
    // This thread owns the span now, so it can manipulate
    // the block bitmap without atomic operations.
    p = (byte*)(s->start << PageShift);
    // Find bits for the beginning of the span.
    off = (uintptr*)p - (uintptr*)arena_start;
    bitp = arena_start - off/wordsPerBitmapByte - 1;
    shift = 0;
    step = size/(PtrSize*wordsPerBitmapByte);
    // Rewind to the previous quadruple as we move to the next
    // in the beginning of the loop.
    bitp += step;
    if(step == 0) {
    	// 8-byte objects.
    	bitp++;
    	shift = gcBits;
    }
    for(; n > 0; n--, p += size) {
    	bitp -= step;
    	if(step == 0) {
    		if(shift != 0)
    			bitp--;
    		shift = gcBits - shift;
    	}
 
    	xbits = *bitp;
    	bits = (xbits>>shift) & bitMask;
 
    	// Allocated and marked object, reset bits to allocated.
    	if((bits&bitMarked) != 0) {
    		*bitp &= ~(bitMarked<<shift);
    		continue;
    	}
    	// At this point we know that we are looking at garbage object
    	// that needs to be collected.
    	if(runtime·debug.allocfreetrace)
    		runtime·tracefree(p, size);
    	// Reset to allocated+noscan.
    	*bitp = (xbits & ~((bitMarked|(BitsMask<<2))<<shift)) | ((uintptr)BitsDead<<(shift+2));
    	if(cl == 0) {
    		// Free large span.
    		if(preserve)
    			runtime·throw("can't preserve large span");
    		runtime·unmarkspan(p, s->npages<<PageShift);
    		s->needzero = 1;
    		// important to set sweepgen before returning it to heap
    		runtime·atomicstore(&s->sweepgen, sweepgen);
    		sweepgenset = true;
    		// NOTE(rsc,dvyukov): The original implementation of efence
    		// in CL 22060046 used SysFree instead of SysFault, so that
    		// the operating system would eventually give the memory
    		// back to us again, so that an efence program could run
    		// longer without running out of memory. Unfortunately,
    		// calling SysFree here without any kind of adjustment of the
    		// heap data structures means that when the memory does
    		// come back to us, we have the wrong metadata for it, either in
    		// the MSpan structures or in the garbage collection bitmap.
    		// Using SysFault here means that the program will run out of
    		// memory fairly quickly in efence mode, but at least it won't
    		// have mysterious crashes due to confused memory reuse.
    		// It should be possible to switch back to SysFree if we also
    		// implement and then call some kind of MHeap_DeleteSpan.
    		if(runtime·debug.efence) {
    			s->limit = nil;	// prevent mlookup from finding this span
    			runtime·SysFault(p, size);
    		} else
    			runtime·MHeap_Free(&runtime·mheap, s, 1);
    		c->local_nlargefree++;
    		c->local_largefree += size;
    		runtime·xadd64(&mstats.next_gc, -(uint64)(size * (runtime·gcpercent + 100)/100));
    		res = true;
    	} else {
    		// Free small object.
    		if(size > 2*sizeof(uintptr))
    			((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll;	// mark as "needs to be zeroed"
    		else if(size > sizeof(uintptr))
    			((uintptr*)p)[1] = 0;
 
    		end->next = (MLink*)p;
    		end = (MLink*)p;
    		nfree++;
    	}
    }
 
    // We need to set s->sweepgen = h->sweepgen only when all blocks are swept,
    // because of the potential for a concurrent free/SetFinalizer.
    // But we need to set it before we make the span available for allocation
    // (return it to heap or mcentral), because allocation code assumes that a
    // span is already swept if available for allocation.
 
    if(!sweepgenset && nfree == 0) {
    	// The span must be in our exclusive ownership until we update sweepgen,
    	// check for potential races.
    	if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
    		runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
    			s->state, s->sweepgen, sweepgen);
    		runtime·throw("MSpan_Sweep: bad span state after sweep");
    	}
    	runtime·atomicstore(&s->sweepgen, sweepgen);
    }
    if(nfree > 0) {
    	c->local_nsmallfree[cl] += nfree;
    	c->local_cachealloc -= nfree * size;
    	runtime·xadd64(&mstats.next_gc, -(uint64)(nfree * size * (runtime·gcpercent + 100)/100));
    	res = runtime·MCentral_FreeSpan(&runtime·mheap.central[cl].mcentral, s, nfree, head.next, end, preserve);
    	// MCentral_FreeSpan updates sweepgen
    }
    return res;
}

本文来自:博客园

感谢作者:richmonkey

查看原文:go语言的GC

郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。