#!/usr/bin/env python3
"""
Enterprise Git Performance Metrics Collector
Collects and reports Git repository performance metrics
"""
import json
import subprocess
import time
import psutil
import os
from datetime import datetime
from pathlib import Path
class GitMetricsCollector:
def __init__(self, repo_path):
self.repo_path = Path(repo_path)
self.metrics = {
"timestamp": datetime.utcnow().isoformat(),
"repository": str(repo_path),
"performance": {},
"health": {},
"resources": {}
}
def collect_repository_stats(self):
"""Collect basic repository statistics"""
os.chdir(self.repo_path)
# Repository size
repo_size = sum(f.stat().st_size for f in self.repo_path.rglob('*') if f.is_file())
# Git object counts
count_objects = subprocess.run(['git', 'count-objects', '-v'],
capture_output=True, text=True)
objects_info = {}
for line in count_objects.stdout.split('\n'):
if ' ' in line:
key, value = line.split(' ', 1)
objects_info[key] = value
self.metrics["health"] = {
"repository_size_bytes": repo_size,
"loose_objects": int(objects_info.get("count", "0")),
"pack_files": int(objects_info.get("packs", "0")),
"size_pack": int(objects_info.get("size-pack", "0")),
"prune_packable": int(objects_info.get("prune-packable", "0"))
}
def benchmark_operations(self):
"""Benchmark common Git operations"""
operations = {
"status": ["git", "status", "--porcelain"],
"log_recent": ["git", "log", "--oneline", "-100"],
"branch_list": ["git", "branch", "-a"],
"remote_list": ["git", "remote", "-v"]
}
performance_results = {}
for op_name, cmd in operations.items():
start_time = time.time()
start_memory = psutil.Process().memory_info().rss
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
success = result.returncode == 0
except subprocess.TimeoutExpired:
success = False
end_time = time.time()
end_memory = psutil.Process().memory_info().rss
performance_results[op_name] = {
"duration_seconds": end_time - start_time,
"memory_delta_bytes": end_memory - start_memory,
"success": success
}
self.metrics["performance"] = performance_results
def collect_system_resources(self):
"""Collect system resource information"""
self.metrics["resources"] = {
"cpu_percent": psutil.cpu_percent(interval=1),
"memory_percent": psutil.virtual_memory().percent,
"disk_usage_percent": psutil.disk_usage(str(self.repo_path)).percent,
"available_memory_gb": psutil.virtual_memory().available / (1024**3),
"disk_free_gb": psutil.disk_usage(str(self.repo_path)).free / (1024**3)
}
def generate_health_score(self):
"""Generate overall health score"""
score = 100
# Deduct points for performance issues
if self.metrics["performance"]["status"]["duration_seconds"] > 1.0:
score -= 10
# Deduct points for repository size issues
repo_size_gb = self.metrics["health"]["repository_size_bytes"] / (1024**3)
if repo_size_gb > 1:
score -= min(20, repo_size_gb * 5) # Up to 20 points for large repos
# Deduct points for too many loose objects
loose_objects = self.metrics["health"]["loose_objects"]
if loose_objects > 1000:
score -= min(30, loose_objects / 100) # Up to 30 points
# Deduct points for resource constraints
if self.metrics["resources"]["memory_percent"] > 80:
score -= 15
if self.metrics["resources"]["disk_usage_percent"] > 90:
score -= 20
self.metrics["health_score"] = max(0, int(score))
# Determine status
if score >= 80:
self.metrics["status"] = "HEALTHY"
elif score >= 60:
self.metrics["status"] = "WARNING"
else:
self.metrics["status"] = "CRITICAL"
def collect_all_metrics(self):
"""Collect all metrics and generate report"""
print("Collecting repository statistics...")
self.collect_repository_stats()
print("Benchmarking Git operations...")
self.benchmark_operations()
print("Collecting system resources...")
self.collect_system_resources()
print("Generating health score...")
self.generate_health_score()
return self.metrics
def save_metrics(self, output_file):
"""Save metrics to JSON file"""
with open(output_file, 'w') as f:
json.dump(self.metrics, f, indent=2)
print(f"Metrics saved to {output_file}")
if __name__ == "__main__":
import sys
repo_path = sys.argv[1] if len(sys.argv) > 1 else "."
output_file = sys.argv[2] if len(sys.argv) > 2 else f"git-metrics-{int(time.time())}.json"
collector = GitMetricsCollector(repo_path)
metrics = collector.collect_all_metrics()
collector.save_metrics(output_file)
# Print summary
print(f"\n=== Performance Summary ===")
print(f"Repository: {metrics['repository']}")
print(f"Health Score: {metrics['health_score']}/100")
print(f"Status: {metrics['status']}")
print(f"Size: {metrics['health']['repository_size_bytes'] / (1024**2):.1f} MB")
print(f"Git Status: {metrics['performance']['status']['duration_seconds']:.2f}s")