Appearance
对象解析实现
Git Doctor 核心解析器的详细实现说明。
源码位置
src/core/git-parser.ts核心函数
parseLooseObject
解析松散对象文件。
typescript
export function parseLooseObject(
gitDir: string,
sha: string
): GitObject | null {
// 构建对象路径
const objectPath = path.join(
gitDir,
'objects',
sha.slice(0, 2),
sha.slice(2)
)
if (!fs.existsSync(objectPath)) {
return null
}
try {
// 读取压缩数据
const compressed = fs.readFileSync(objectPath)
// Zlib 解压
const decompressed = zlib.inflateSync(compressed)
// 找到 NUL 字节分隔符
const nullIndex = decompressed.indexOf(0)
// 解析头部
const header = decompressed.slice(0, nullIndex).toString()
const [type, sizeStr] = header.split(' ')
// 提取内容
const content = decompressed.slice(nullIndex + 1)
return {
type: type as 'blob' | 'tree' | 'commit' | 'tag',
size: parseInt(sizeStr, 10),
content
}
} catch (error) {
console.error(`Failed to parse object ${sha}:`, error)
return null
}
}parseTreeObject
解析 Tree 对象的二进制内容。
typescript
export function parseTreeObject(content: Buffer): TreeEntry[] {
const entries: TreeEntry[] = []
let offset = 0
while (offset < content.length) {
// 1. 找空格,提取 mode
const spaceIndex = content.indexOf(0x20, offset)
if (spaceIndex === -1) break
const mode = content.slice(offset, spaceIndex).toString()
// 2. 找 NUL,提取 name
const nullIndex = content.indexOf(0x00, spaceIndex)
if (nullIndex === -1) break
const name = content.slice(spaceIndex + 1, nullIndex).toString()
// 3. 读取 20 字节 SHA(二进制格式)
const shaBuffer = content.slice(nullIndex + 1, nullIndex + 21)
if (shaBuffer.length < 20) break
const sha = shaBuffer.toString('hex')
entries.push({ mode, name, sha })
offset = nullIndex + 21
}
return entries
}parseCommitObject
解析 Commit 对象。
typescript
export function parseCommitObject(
sha: string,
content: Buffer
): CommitInfo {
const text = content.toString('utf-8')
const lines = text.split('\n')
const info: CommitInfo = {
sha,
tree: '',
parents: [],
author: '',
authorEmail: '',
authorDate: new Date(),
committer: '',
committerEmail: '',
committerDate: new Date(),
message: ''
}
let i = 0
// 解析头部字段(直到空行)
while (i < lines.length && lines[i] !== '') {
const line = lines[i]
if (line.startsWith('tree ')) {
info.tree = line.slice(5)
}
else if (line.startsWith('parent ')) {
info.parents.push(line.slice(7))
}
else if (line.startsWith('author ')) {
// 格式: author Name <email> timestamp timezone
const match = line.match(/^author (.+) <(.+)> (\d+)/)
if (match) {
info.author = match[1]
info.authorEmail = match[2]
info.authorDate = new Date(parseInt(match[3], 10) * 1000)
}
}
else if (line.startsWith('committer ')) {
const match = line.match(/^committer (.+) <(.+)> (\d+)/)
if (match) {
info.committer = match[1]
info.committerEmail = match[2]
info.committerDate = new Date(parseInt(match[3], 10) * 1000)
}
}
i++
}
// 跳过空行,获取 commit message
info.message = lines.slice(i + 1).join('\n').trim()
return info
}walkLooseObjects
遍历所有松散对象。
typescript
export function* walkLooseObjects(
gitDir: string
): Generator<{ sha: string; path: string }> {
const objectsDir = path.join(gitDir, 'objects')
if (!fs.existsSync(objectsDir)) {
return
}
const entries = fs.readdirSync(objectsDir, { withFileTypes: true })
for (const entry of entries) {
// 跳过 pack 和 info 目录
if (!entry.isDirectory() ||
entry.name === 'pack' ||
entry.name === 'info') {
continue
}
// 检查是否是有效的对象目录(2字符十六进制)
if (!/^[0-9a-f]{2}$/.test(entry.name)) {
continue
}
const subDir = path.join(objectsDir, entry.name)
const objects = fs.readdirSync(subDir)
for (const objFile of objects) {
const sha = entry.name + objFile
yield { sha, path: path.join(subDir, objFile) }
}
}
}readRef
读取引用文件,支持符号引用递归解析。
typescript
export function readRef(
gitDir: string,
refPath: string
): string | null {
const fullPath = path.join(gitDir, refPath)
if (!fs.existsSync(fullPath)) {
return null
}
const content = fs.readFileSync(fullPath, 'utf-8').trim()
// 检查是否是符号引用
if (content.startsWith('ref: ')) {
const targetRef = content.slice(5)
return readRef(gitDir, targetRef) // 递归解析
}
return content // 返回 SHA
}walkTree
递归遍历 Tree,获取所有文件。
typescript
export function walkTree(
gitDir: string,
treeSha: string,
basePath: string = ''
): Array<{ path: string; sha: string; mode: string }> {
const files: Array<{ path: string; sha: string; mode: string }> = []
const obj = parseLooseObject(gitDir, treeSha)
if (!obj || obj.type !== 'tree') {
return files
}
const entries = parseTreeObject(obj.content)
for (const entry of entries) {
const fullPath = basePath ? `${basePath}/${entry.name}` : entry.name
if (entry.mode === '40000' || entry.mode === '040000') {
// 目录,递归遍历
files.push(...walkTree(gitDir, entry.sha, fullPath))
} else {
// 文件
files.push({
path: fullPath,
sha: entry.sha,
mode: entry.mode
})
}
}
return files
}getAllCommits
BFS 遍历获取所有提交。
typescript
export async function getAllCommits(gitDir: string): Promise<string[]> {
const commits = new Set<string>()
const queue: string[] = []
// 从所有分支开始
for (const branch of getLocalBranches(gitDir)) {
queue.push(branch.sha)
}
for (const branch of getRemoteBranches(gitDir)) {
queue.push(branch.sha)
}
// BFS 遍历
while (queue.length > 0) {
const sha = queue.shift()!
if (commits.has(sha)) {
continue
}
const obj = parseLooseObject(gitDir, sha)
if (!obj) {
continue // 可能在 pack 文件中
}
if (obj.type === 'commit') {
commits.add(sha)
const commitInfo = parseCommitObject(sha, obj.content)
queue.push(...commitInfo.parents)
}
}
return Array.from(commits)
}getFileHistory
追踪文件的修改历史。
typescript
export async function getFileHistory(
gitDir: string,
filePath: string
): Promise<FileHistoryEntry[]> {
const history: FileHistoryEntry[] = []
const commits = await getAllCommitInfos(gitDir)
let previousBlobSha: string | null = null
// 按时间正序遍历
const sortedCommits = [...commits].sort(
(a, b) => a.authorDate.getTime() - b.authorDate.getTime()
)
for (const commit of sortedCommits) {
const files = walkTree(gitDir, commit.tree)
const file = files.find(f => f.path === filePath)
if (file) {
if (previousBlobSha === null) {
// 文件首次出现
history.push({
commitSha: commit.sha,
changeType: 'added',
author: commit.author,
date: commit.authorDate,
message: commit.message
})
} else if (file.sha !== previousBlobSha) {
// 文件被修改
history.push({
commitSha: commit.sha,
changeType: 'modified',
author: commit.author,
date: commit.authorDate,
message: commit.message
})
}
previousBlobSha = file.sha
} else if (previousBlobSha !== null) {
// 文件被删除
history.push({
commitSha: commit.sha,
changeType: 'deleted',
author: commit.author,
date: commit.authorDate,
message: commit.message
})
previousBlobSha = null
}
}
return history.reverse() // 最新在前
}类型定义
typescript
// types/index.ts
export interface GitObject {
type: 'blob' | 'tree' | 'commit' | 'tag'
size: number
content: Buffer
}
export interface TreeEntry {
mode: string
name: string
sha: string
}
export interface CommitInfo {
sha: string
tree: string
parents: string[]
author: string
authorEmail: string
authorDate: Date
committer: string
committerEmail: string
committerDate: Date
message: string
}
export interface FileHistoryEntry {
commitSha: string
changeType: 'added' | 'modified' | 'deleted'
author: string
authorEmail: string
date: Date
message: string
}性能考虑
缓存策略
typescript
const objectCache = new Map<string, GitObject>()
export function parseLooseObjectCached(
gitDir: string,
sha: string
): GitObject | null {
if (objectCache.has(sha)) {
return objectCache.get(sha)!
}
const obj = parseLooseObject(gitDir, sha)
if (obj) {
objectCache.set(sha, obj)
}
return obj
}并行处理
typescript
// 批量解析提交
async function parseCommitsBatch(
gitDir: string,
shas: string[]
): Promise<CommitInfo[]> {
return Promise.all(
shas.map(async sha => {
const obj = parseLooseObject(gitDir, sha)
if (obj?.type === 'commit') {
return parseCommitObject(sha, obj.content)
}
return null
})
).then(results => results.filter(Boolean) as CommitInfo[])
}