一、问题
经常会在复制的时候遇到这样的问题,需要复制的xlog文件找不到了。那么xlog文件什么时候删除?又会删除多少保留多少个xlog文件?都有哪些xlog文件需要保留?本文将从原理上对这些问题进行解读。
二、原理
每次checkpoint后都会根据需要删除或者回收不再需要的xlog文件。
1、首先估算两次checkpoint之间产生的xlog量,根据这个量会计算出未来最大的日志文件号从而回收不再需要的文件将其重命名为未来即将使用的日志文件号:
1.1 UpdateCheckPointDistanceEstimate估算checkpoint之前产生的日志量:
if (CheckPointDistanceEstimate < nbytes)//上次估算量比这次估算的小,则更新为这次的估算量
CheckPointDistanceEstimate = nbytes;
else//否则,适当增加
CheckPointDistanceEstimate =(0.90 * CheckPointDistanceEstimate + 0.10 * (double) nbytes);
2、计算上一次checkpoint时,所在的文件段号_logSegNo:
XLByteToSeg(PriorRedoPtr, _logSegNo);
3、计算需要保留的文件段号:从该段号_logSegNo开始的文件都不能被删除,之前的需要删除或回收:根据备机请求以及wal_keep_segments计算KeepLogSeg(recptr, &_logSegNo);
4、遍历pg_wal目录下的所有xlog文件,进行删除:RemoveOldXlogFiles
4.1 跳过时间线进行比较,如果pg_wal目录下的文件比_logSegNo小则被删除或回收。那么什么条件下次被回收?
--RemoveXlogFile
4.2 计算回收文件重命名的未来最大文件段号recycleSegNo:
1)如果本次是第一次checkpoint,则未来最大段号recycleSegNo=当前段文件号+10
2)否则调用函数XLOGfileslop计算:
2.1 估算下一次checkpoint结束时日志位置:
distance=(2.0+checkpoint_completion_target)*CheckPointDistanceEstimate
distance*=1.1
recycleSegNo = (XLogSegNo) ceil(((double) PriorRedoPtr + distance) / XLOG_SEG_SIZE);
2.2 minSegNo = PriorRedoPtr / XLOG_SEG_SIZE + ConvertToXSegs(min_wal_size_mb) - 1;
maxSegNo = PriorRedoPtr / XLOG_SEG_SIZE + ConvertToXSegs(max_wal_size_mb) - 1;
2.3 if (recycleSegNo < minSegNo)
recycleSegNo = minSegNo;
if (recycleSegNo > maxSegNo)
recycleSegNo = maxSegNo;
4.3 如果当前段文件号endlogSegNo < recycleSegNo,则调用InstallXLogFileSegment进行回收:
1)在endlogSegNo和recycleSegNo之间找一个free slot num,即没有该段文件号的xlog文件
2)将需要删除的文件名命名为该free slot号的文件名
3)如果没有找到free slot则直接删除该文件
--RemoveXlogFile
三、代码流程
1、checkpoint顶层函数CreateCheckPoint:
CreateCheckPoint: XLogCtlInsert *Insert = &XLogCtl->Insert;//标识插入的位置 curInsert = XLogBytePosToRecPtr(Insert->CurrBytePos);//添加页头大小后的位置 //(((curInsert) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (curInsert) % XLOG_BLCKSZ)) freespace = INSERT_FREESPACE(curInsert);//curInsert所在页是否有空闲空间 if (freespace == 0){ if (curInsert % XLogSegSize == 0)//正好一个xlog段文件用完,即将使用下一个段文件,则跳过36字节 curInsert += SizeOfXLogLongPHD;//36字节 else//xlog段文件中正好一页用完,即将使用下一页,则跳过20字节 curInsert += SizeOfXLogShortPHD;//20字节 } checkPoint.redo = curInsert;//xlog文件上,实际的即将插入位置 RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo; ... //插入checkpoint记录后末尾位置,即下一个xlog开始的位置 recptr = XLogInsert(RM_XLOG_ID,shutdown ? XLOG_CHECKPOINT_SHUTDOWN :XLOG_CHECKPOINT_ONLINE); ... PriorRedoPtr = ControlFile->checkPointCopy.redo;//上一次checkpoint的起始位置 ... if (PriorRedoPtr != InvalidXLogRecPtr){//上一次checkpoint开始到这一次checkpoint开始,产生的XLOG大小为入参 /* CheckPointDistanceEstimate: 1、CheckPointDistanceEstimate<RedoRecPtr - PriorRedoPtr时:RedoRecPtr - PriorRedoPtr 2、CheckPointDistanceEstimate>=RedoRecPtr - PriorRedoPtr时:0.9*CheckPointDistanceEstimate+0.1*(RedoRecPtr - PriorRedoPtr) */ UpdateCheckPointDistanceEstimate(RedoRecPtr - PriorRedoPtr); //_logSegNo = (PriorRedoPtr) / XLogSegSize XLByteToSeg(PriorRedoPtr, _logSegNo); KeepLogSeg(recptr, &_logSegNo); _logSegNo--; RemoveOldXlogFiles(_logSegNo, PriorRedoPtr, recptr); }
2、两个宏定义
#define UsableBytesInPage (XLOG_BLCKSZ - SizeOfXLogShortPHD)//注意:不是文件第一页 #define UsableBytesInSegment ((XLOG_SEG_SIZE / XLOG_BLCKSZ) * UsableBytesInPage - (SizeOfXLogLongPHD - SizeOfXLogShortPHD))
3、函数XLogBytePosToRecPtr
/* * Converts a "usable byte position" to XLogRecPtr. A usable byte position * is the position starting from the beginning of WAL, excluding all WAL * page headers. */ static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos) { //bytepos:不包括xlog页的页头等额外字节占用的大小 fullsegs = bytepos / UsableBytesInSegment; bytesleft = bytepos % UsableBytesInSegment; /* 1、如果bytesleft < XLOG_BLCKSZ-32,则表示定位到第一页上,则文件偏移值跳过第一页页头大小 2、如果bytesleft >= XLOG_BLCKSZ-32,则表示定位不是第一页 */ if (bytesleft < XLOG_BLCKSZ - SizeOfXLogLongPHD){ /* fits on first page of segment */ seg_offset = bytesleft + SizeOfXLogLongPHD; }else{ /* account for the first page on segment with long header */ seg_offset = XLOG_BLCKSZ;//先跳过第一页 bytesleft -= XLOG_BLCKSZ - SizeOfXLogLongPHD;//去掉第一页存放XLOG的大小 fullpages = bytesleft / UsableBytesInPage;//剩下的需要几个页 bytesleft = bytesleft % UsableBytesInPage;//剩下的偏移 // 文件偏移=第一页大小+剩下的几个页大小+剩下的偏移+最后一页的页头 seg_offset += fullpages * XLOG_BLCKSZ + bytesleft + SizeOfXLogShortPHD; } //result=(fullsegs) * XLOG_SEG_SIZE + seg_offset XLogSegNoOffsetToRecPtr(fullsegs, seg_offset, result); return result; }
4、函数KeepLogSeg:
static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo) { //segno为当前xlog即将插入位置在第几个文件上 XLByteToSeg(recptr, segno); //XLogCtl->replicationSlotMinLSN;备机上请求预留的最小值? keep = XLogGetReplicationSlotMinimumLSN(); /* compute limit for wal_keep_segments first */ if (wal_keep_segments > 0){ /* 首先计算wal_keep_segments得到的限制: 1、比如wal_keep_segments值是10,若当前insert的位置的文件号segno为5,那么向前推进到1 2、否则向前推进wal_keep_segments后的segno前的可删除 */ if (segno <= wal_keep_segments) segno = 1; else segno = segno - wal_keep_segments; } /* then check whether slots limit removal further */ //计算slots限制,如果其算出的值小于wal_keep_segments计算出的值,则需要使用slotSegNo,slots还有用,不能删除 if (max_replication_slots > 0 && keep != InvalidXLogRecPtr){ XLByteToSeg(keep, slotSegNo); if (slotSegNo <= 0) segno = 1; else if (slotSegNo < segno) segno = slotSegNo; } /* don't delete WAL segments newer than the calculated segment */ if (segno < *logSegNo) *logSegNo = segno; //note: //如果计算出的segno比上次checkpoint时的文件号logSegNo还有小,则取这次计算的segno //如果计算出的segno比上次checkpoint时的文件号logSegNo大,则取上次checkpoint时的文件号。 //因为恢复时如果是主机,读取最新checkpoint记录失败后,会读取上一次checkpoint记录,如果上次checkpoint的文件被删除,这里就读取不到记录了 }
5、函数RemoveOldXlogFiles
/* * Recycle or remove all log files older or equal to passed segno. * * endptr is current (or recent) end of xlog, and PriorRedoRecPtr is the * redo pointer of the previous checkpoint. These are used to determine * whether we want to recycle rather than delete no-longer-wanted log files. */ static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr PriorRedoPtr, XLogRecPtr endptr) { //首先获取xlog目录 xldir = AllocateDir(XLOGDIR); if (xldir == NULL) ereport(ERROR, (errcode_for_file_access(), errmsg("could not open write-ahead log directory \"%s\": %m", XLOGDIR))); /* 构建一个log文件名,用于判断,该文件之前的xlog可以删除。用不到时间线,所以可以使用0 */ XLogFileName(lastoff, 0, segno); while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL){ /* 忽略非xlog文件 */ if (!IsXLogFileName(xlde->d_name) && !IsPartialXLogFileName(xlde->d_name)) continue; /* 1、跳过时间线进行比较 */ if (strcmp(xlde->d_name + 8, lastoff + 8) <= 0){ if (XLogArchiveCheckDone(xlde->d_name)){//如果没有开启归档:总是TRUE;否则,归档完成后才为TRUE /* Update the last removed location in shared memory first */ //XLogCtl->lastRemovedSegNo = segno; UpdateLastRemovedPtr(xlde->d_name); RemoveXlogFile(xlde->d_name, PriorRedoPtr, endptr); } } } }
6、函数RemoveXlogFile
static void RemoveXlogFile(const char *segname, XLogRecPtr PriorRedoPtr, XLogRecPtr endptr) { XLByteToSeg(endptr, endlogSegNo); if (PriorRedoPtr == InvalidXLogRecPtr) recycleSegNo = endlogSegNo + 10; else recycleSegNo = XLOGfileslop(PriorRedoPtr); snprintf(path, MAXPGPATH, XLOGDIR "/%s", segname); if (endlogSegNo <= recycleSegNo && lstat(path, &statbuf) == 0 && S_ISREG(statbuf.st_mode) && InstallXLogFileSegment(&endlogSegNo, path, true, recycleSegNo, true)) { endlogSegNo++; }else{ rc = durable_unlink(path, LOG); } } 7、函数InstallXLogFileSegment static bool InstallXLogFileSegment(XLogSegNo *segno, char *tmppath, bool find_free, XLogSegNo max_segno, bool use_lock) { XLogFilePath(path, ThisTimeLineID, *segno); /* * We want to be sure that only one process does this at a time. */ if (use_lock) LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); if (!find_free) { /* Force installation: get rid of any pre-existing segment file */ durable_unlink(path, DEBUG1);//删除文件并持久化到磁盘 }else{ /* Find a free slot to put it in */ while (stat(path, &stat_buf) == 0){//获取文件信息并保存到stat_buf中,成功返回0 //在segno和max_segno之间找一个空闲的段号,即目录中没有这个段号的xlog文件 if ((*segno) >= max_segno){ /* Failed to find a free slot within specified range */ if (use_lock) LWLockRelease(ControlFileLock); return false; } (*segno)++; XLogFilePath(path, ThisTimeLineID, *segno); } } if (durable_link_or_rename(tmppath, path, LOG) != 0){//将tmppath重命名为path并持久化 if (use_lock) LWLockRelease(ControlFileLock); /* durable_link_or_rename already emitted log message */ return false; } if (use_lock) LWLockRelease(ControlFileLock); return true; }