This project detects deepfake videos using a Vision Transformer (ViT) model, classifying frames as real or manipulated with high accuracy.
- Dataset Preparation
- Model Architecture
- Training Process
- Validation and Metrics
- Video Prediction
- Installation and Setup
- Results
- Website Usage
- Real Videos:
/DFD_original_sequences - Manipulated Videos:
/DFD_manipulated_sequences
Extract frames at 1 frame per second for model input.
- Base Model: ViT (
vit_base_patch16_224) - Input Size: 224x224 pixels
- Classes: 2 (Real, Manipulated)
- Pretrained Weights: Yes (ImageNet)
model = timm.create_model('vit_base_patch16_224', pretrained=True, num_classes=2)
model.to(device)
model = nn.DataParallel(model)transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(),
transforms.ColorJitter(brightness=0.2),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])for epoch in range(num_epochs):
model.train()
for images, labels in train_loader:
images, labels = images.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()print(classification_report(all_labels, all_predictions, target_names=['Real', 'Manipulated']))sns.heatmap(cm, annot=True, cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()def predict_video(video_path, model, transform, device):
cap = cv2.VideoCapture(video_path)
real_count, manipulated_count = 0, 0
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
image = transform(Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))).unsqueeze(0).to(device)
with torch.no_grad():
outputs = model(image)
_, predicted = torch.max(outputs, 1)
real_count += (predicted.item() == 0)
manipulated_count += (predicted.item() == 1)
cap.release()pip install timm torch torchvision opencv-python pillow scikit-learn seaborn matplotlibprint("CUDA Available:", torch.cuda.is_available())
print("GPU Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")- Training Accuracy: ~89.71%
- Validation Accuracy: ~87.77%



