Spaces:

blanchon
/

motion_latent_diffusion_standalone_demo

Running on Zero

App Files Files Community

motion_latent_diffusion_standalone_demo / visualize.py

blanchon

Update

f875353 18 days ago

raw

history blame contribute delete

8.35 kB

	"""
	Simple 3D skeleton motion visualizer for HumanML3D motion data.
	Usage: python visualize.py <motion.pt> [--output output.mp4] [--fps 20]
	"""

	import argparse
	import numpy as np
	import torch
	import matplotlib.pyplot as plt
	from matplotlib.animation import FuncAnimation, FFMpegWriter
	from pathlib import Path


	# HumanML3D skeleton structure (22 joints)
	# Kinematic chain based on HumanML3D dataset specification
	# From mld/utils/joints.py and datasets/HumanML3D/paramUtil.py
	SKELETON_CHAINS = [
	[0, 3, 6, 9, 12, 15], # Body: root -> BP -> BT -> BLN -> BMN -> BUN (head)
	[9, 14, 17, 19, 21], # Left arm: BLN -> LSI -> LS -> LE -> LW
	[9, 13, 16, 18, 20], # Right arm: BLN -> RSI -> RS -> RE -> RW
	[0, 2, 5, 8, 11], # Left leg: root -> LH -> LK -> LMrot -> LF
	[0, 1, 4, 7, 10], # Right leg: root -> RH -> RK -> RMrot -> RF
	]


	def load_motion(pt_path: str) -> np.ndarray:
	"""
	Load motion data from .pt file (PyTorch tensor).

	HumanML3D format: (frames, 22, 3) where last dimension is (x, y, z)
	In HumanML3D: Y is vertical (up), X and Z are horizontal
	For proper 3D visualization: we'll map Y -> Z (vertical), X -> X, Z -> Y

	Returns numpy array for matplotlib visualization.
	"""
	# Load PyTorch tensor and convert to numpy for visualization
	motion_tensor = torch.load(pt_path, map_location="cpu")
	motion = motion_tensor.numpy()

	print(f"Loaded motion: {motion.shape}")
	print(f" Frames: {motion.shape[0]}")
	print(f" Joints: {motion.shape[1]}")
	print(f" Dimensions: {motion.shape[2]}")

	# Remap axes: HumanML3D (x, y, z) -> Visualization (x, z, y)
	# This makes Y axis (vertical in HumanML3D) become Z axis (vertical in plot)
	motion_remapped = motion.copy()
	motion_remapped[:, :, [0, 1, 2]] = motion[:, :, [0, 2, 1]] # x, z, y <- x, y, z

	return motion_remapped


	def setup_3d_plot():
	"""Set up the 3D plot with proper viewing angle."""
	fig = plt.figure(figsize=(10, 10))
	ax = fig.add_subplot(111, projection="3d")

	# Set labels
	ax.set_xlabel("X")
	ax.set_ylabel("Y")
	ax.set_zlabel("Z")

	return fig, ax


	def update_frame(frame_idx: int, motion: np.ndarray, ax, lines: list, points: list):
	"""Update function for animation."""
	ax.clear()

	# Get current frame
	frame = motion[frame_idx]

	# Set consistent axis limits based on all frames
	all_coords = motion.reshape(-1, 3)
	margin = 0.5
	x_range = [all_coords[:, 0].min() - margin, all_coords[:, 0].max() + margin]
	y_range = [all_coords[:, 1].min() - margin, all_coords[:, 1].max() + margin]
	z_range = [0, all_coords[:, 2].max() + margin] # Z starts at ground (0)

	ax.set_xlim(x_range)
	ax.set_ylim(y_range)
	ax.set_zlim(z_range)

	# Set labels and title
	ax.set_xlabel("X", fontsize=10)
	ax.set_ylabel("Y", fontsize=10)
	ax.set_zlabel("Z (Height)", fontsize=10)
	ax.set_title(f"Frame {frame_idx + 1}/{len(motion)}", fontsize=14, pad=20)

	# Set viewing angle (slightly elevated, rotated for better view)
	ax.view_init(elev=15, azim=45)

	# Draw ground plane at z=0
	xx, yy = np.meshgrid(
	np.linspace(x_range[0], x_range[1], 2), np.linspace(y_range[0], y_range[1], 2)
	)
	zz = np.zeros_like(xx)
	ax.plot_surface(xx, yy, zz, alpha=0.1, color="gray")

	# Plot skeleton bones with different colors for different parts
	colors = ["red", "blue", "green", "cyan", "magenta"]
	for chain_idx, chain in enumerate(SKELETON_CHAINS):
	color = colors[chain_idx % len(colors)]
	for i in range(len(chain) - 1):
	j1, j2 = chain[i], chain[i + 1]
	if j1 < len(frame) and j2 < len(frame):
	xs = [frame[j1, 0], frame[j2, 0]]
	ys = [frame[j1, 1], frame[j2, 1]]
	zs = [frame[j1, 2], frame[j2, 2]]
	linewidth = 4.0 if chain_idx == 0 else 3.0 # Thicker for body
	ax.plot(xs, ys, zs, color=color, linewidth=linewidth, alpha=0.8)

	# Plot joints (darker red)
	ax.scatter(
	frame[:, 0],
	frame[:, 1],
	frame[:, 2],
	c="darkred",
	marker="o",
	s=50,
	alpha=0.9,
	edgecolors="black",
	linewidth=0.5,
	)

	# Add grid
	ax.grid(True, alpha=0.3)

	return (ax,)


	def create_video_from_joints(
	joints: torch.Tensor \| np.ndarray, output_path: str, fps: int = 20
	) -> str:
	"""
	Create 3D skeleton animation directly from joint tensor or array.

	Args:
	joints: Joint positions as torch.Tensor or np.ndarray (frames, 22, 3)
	output_path: Path to save video
	fps: Frames per second for the video

	Returns:
	Path to output video
	"""
	# Convert to numpy if it's a torch tensor
	if isinstance(joints, torch.Tensor):
	joints = joints.cpu().numpy()

	# Remap axes for visualization (same as load_motion)
	motion = joints.copy()
	motion[:, :, [0, 1, 2]] = joints[:, :, [0, 2, 1]] # x, z, y <- x, y, z

	# Set up plot
	fig, ax = setup_3d_plot()
	lines, points = [], []

	# Create animation
	anim = FuncAnimation(
	fig,
	update_frame,
	frames=len(motion),
	fargs=(motion, ax, lines, points),
	interval=1000 / fps,
	blit=False,
	repeat=True,
	)

	# Save video using FFMpeg
	writer = FFMpegWriter(fps=fps, bitrate=1800, codec="libx264")
	anim.save(str(output_path), writer=writer, dpi=100)

	plt.close(fig)
	return str(output_path)


	def visualize_motion(
	pt_path: str, output_path: str \| None = None, fps: int = 20, show: bool = False
	) -> str:
	"""
	Visualize motion from .pt file (PyTorch tensor).

	Args:
	pt_path: Path to .pt motion file
	output_path: Path to save video (if None, will auto-generate)
	fps: Frames per second for the video
	show: If True, display the animation in a window

	Returns:
	Path to the generated video file
	"""
	# Load motion data (converts to numpy internally for matplotlib)
	motion = load_motion(pt_path)

	# Create output path if not specified
	if output_path is None:
	output_path = Path(pt_path).with_suffix(".mp4")
	else:
	output_path = Path(output_path)

	print(f"\nCreating animation with {fps} FPS...")

	# Set up plot
	fig, ax = setup_3d_plot()
	lines, points = [], []

	# Create animation
	anim = FuncAnimation(
	fig,
	update_frame,
	frames=len(motion),
	fargs=(motion, ax, lines, points),
	interval=1000 / fps,
	blit=False,
	repeat=True,
	)

	# Save video using FFMpeg
	print(f"Saving video to: {output_path}")
	writer = FFMpegWriter(fps=fps, bitrate=1800, codec="libx264")
	anim.save(str(output_path), writer=writer, dpi=100)
	print("✓ Video saved successfully!")

	# Show animation if requested
	if show:
	plt.show()

	plt.close(fig)
	return str(output_path)


	def main() -> int:
	"""Main entry point for CLI"""
	parser = argparse.ArgumentParser(
	description="Visualize HumanML3D motion data as 3D skeleton animation"
	)
	parser.add_argument("input", type=str, help="Path to input .pt motion file")
	parser.add_argument(
	"--output",
	"-o",
	type=str,
	default=None,
	help="Path to output video file (default: input_name.mp4)",
	)
	parser.add_argument(
	"--fps",
	type=int,
	default=20,
	help="Frames per second for output video (default: 20)",
	)
	parser.add_argument(
	"--show",
	action="store_true",
	help="Display the animation in a window (in addition to saving)",
	)

	args = parser.parse_args()

	# Check if input file exists
	input_path = Path(args.input)
	if not input_path.exists():
	print(f"Error: Input file not found: {args.input}")
	return 1

	# Visualize the motion
	try:
	output_path = visualize_motion(
	args.input, output_path=args.output, fps=args.fps, show=args.show
	)
	print(f"\n✓ Done! Video saved to: {output_path}")
	return 0
	except Exception as e:
	print(f"\n✗ Error: {e}")
	import traceback

	traceback.print_exc()
	return 1


	if __name__ == "__main__":
	exit(main())