Spaces:

facebook
/

fairchem_uma_demo

Running on CPU Upgrade

App Files Files Community

fairchem_uma_demo / simulation_scripts.py

zulissimeta

remove incar/poscar, add extxyz

4bac676 5 months ago

raw

history blame contribute delete

14 kB

	"""
	Copyright (c) Meta Platforms, Inc. and affiliates.

	This source code is licensed under the MIT license found in the
	LICENSE file in the root directory of this source tree.
	"""

	import glob
	import hashlib
	import os
	import tempfile

	import ase
	import ase.io
	import gradio as gr
	import numpy as np
	from ase import units
	from ase.filters import FrechetCellFilter
	from ase.io.trajectory import Trajectory
	from ase.md import MDLogger
	from ase.md.nose_hoover_chain import NoseHooverChainNVT
	from ase.md.velocitydistribution import MaxwellBoltzmannDistribution
	from ase.md.verlet import VelocityVerlet
	from ase.optimize import LBFGS

	from hf_calculator import HFEndpointCalculator, validate_uma_access


	def hash_file(file_path):
	"""Generate MD5 hash for a file."""
	hasher = hashlib.md5()
	with open(file_path, "rb") as f:
	data = f.read()
	hasher.update(data)
	return hasher.hexdigest()


	EXAMPLE_FILE_HASHES = set(
	[hash_file(file_path) for file_path in glob.glob("examples/*")]
	)
	MAX_ATOMS = os.environ.get("MAX_ATOMS", 2000)
	INFERENCE_ENDPOINT_URL = os.environ["INFERENCE_ENDPOINT_URL"]


	def validate_ase_atoms_and_login(
	structure_file: dict \| str,
	login_button_value: str,
	oauth_token: gr.OAuthToken \| None,
	) -> tuple[gr.Button, gr.Button, str]:
	# Validate and write the uploaded file content
	if not structure_file:
	return (
	gr.Button(interactive=False),
	gr.Button(interactive=False),
	"Missing input structure!",
	)

	if isinstance(structure_file, dict):
	structure_file = structure_file["path"]

	try:
	atoms = ase.io.read(structure_file)
	except Exception as e:
	return (
	gr.Button(interactive=False),
	gr.Button(interactive=False),
	f"Failed to load ASE input: {str(e)}!",
	)

	if len(atoms) == 0:
	return (
	gr.Button(interactive=False),
	gr.Button(interactive=False),
	"No atoms in the structure file!",
	)
	elif not (all(atoms.pbc) or np.all(~np.array(atoms.pbc))):
	return (
	gr.Button(interactive=False),
	gr.Button(interactive=False),
	f"Your atoms has PBC {atoms.pbc}. Mixed PBC are not supported yet - please set PBC all True or False in your structure before uploading!",
	)
	elif len(atoms) > MAX_ATOMS:
	return (
	gr.Button(interactive=False),
	gr.Button(interactive=False),
	f"Structure file contains {len(atoms)}, which is more than {MAX_ATOMS} atoms. Please use a smaller structure for this demo, or run this on a local machine!",
	)
	elif (hash_file(structure_file) not in EXAMPLE_FILE_HASHES) and (
	(oauth_token is None) or not validate_uma_access(oauth_token=oauth_token)
	):
	return (
	gr.Button(interactive=False),
	gr.Button(interactive=False),
	"""
	To use your own structures, you need access to the [gated UMA model repository](https://huggingface.co/facebook/UMA) and you need to login with the button above. See the final tab above '3. Try UMA with your own structures!' for more details and debugging steps!

	Note that uploaded structure will be stored by this demo to analyze model usage and identify domains where model accuracy can be improved.
	""",
	)
	else:
	return (
	gr.Button(interactive=True),
	gr.Button(interactive=True),
	"",
	)


	def load_check_ase_atoms(structure_file):
	# Validate and write the uploaded file content
	if not structure_file:
	raise gr.Error("You need an input structure file to run a simulation!")

	try:
	atoms = ase.io.read(structure_file)

	if not (all(atoms.pbc) or np.all(~np.array(atoms.pbc))):
	raise gr.Error(
	"Mixed PBC are not supported yet - please set PBC all True or False in your structure before uploading"
	)

	if len(atoms) == 0:
	raise gr.Error("Error: Structure file contains no atoms.")

	if len(atoms) > MAX_ATOMS:
	raise gr.Error(
	f"Error: Structure file contains {len(atoms)}, which is more than {MAX_ATOMS} atoms. Please use a smaller structure for this demo, or run this on a local machine!"
	)

	atoms.positions -= atoms.get_center_of_mass()
	cell_center = atoms.get_cell().sum(axis=0) / 2
	atoms.positions += cell_center

	return atoms
	except Exception as e:
	raise gr.Error(f"Error loading structure with ASE: {str(e)}")


	def run_md_simulation(
	structure_file,
	num_steps,
	num_prerelax_steps,
	md_timestep,
	temperature_k,
	md_ensemble,
	task_name,
	total_charge,
	spin_multiplicity,
	explanation: str \| None = None,
	oauth_token: gr.OAuthToken \| None = None,
	progress=gr.Progress(),
	):
	temp_path = None
	traj_path = None
	md_log_path = None
	atoms = None

	if task_name != "OMol":
	total_charge = 0
	spin_multiplicity = 0

	try:
	atoms = load_check_ase_atoms(structure_file)

	# Check if the file is an example
	example = hash_file(structure_file) in EXAMPLE_FILE_HASHES

	atoms.info["charge"] = total_charge
	atoms.info["spin"] = spin_multiplicity

	atoms.calc = HFEndpointCalculator(
	atoms,
	endpoint_url=INFERENCE_ENDPOINT_URL,
	oauth_token=oauth_token,
	example=example,
	task_name=task_name,
	)

	# Attach a progress callback to track in gradio
	interval = 1
	steps = [0]
	expected_steps = num_steps + num_prerelax_steps

	def update_progress():
	steps[-1] += interval
	progress(steps[-1] / expected_steps)

	with tempfile.NamedTemporaryFile(suffix=".traj", delete=False) as traj_f:
	traj_path = traj_f.name
	with tempfile.NamedTemporaryFile(suffix=".log", delete=False) as log_f:
	md_log_path = log_f.name

	# Do a quick pre-relaxation to make sure the system is stable before starting
	opt = LBFGS(atoms, logfile=md_log_path, trajectory=traj_path)
	opt.attach(update_progress, interval=interval)
	opt.run(fmax=0.05, steps=num_prerelax_steps)

	# Initialize the velocity distribution. Since we did a relaxation, half of this
	# will partition to the potential energy right away, so we double the temperature
	MaxwellBoltzmannDistribution(atoms, temperature_K=temperature_k * 2)

	# Initialize the MD integrator
	if md_ensemble == "NVE":
	dyn = VelocityVerlet(atoms, timestep=md_timestep * units.fs)
	elif md_ensemble == "NVT":
	dyn = NoseHooverChainNVT(
	atoms,
	timestep=md_timestep * units.fs,
	temperature_K=temperature_k,
	tdamp=10 * md_timestep * units.fs,
	)
	traj = Trajectory(traj_path, "a", atoms)
	dyn.attach(traj.write, interval=1)
	dyn.attach(update_progress, interval=interval)
	dyn.attach(
	MDLogger(
	dyn,
	atoms,
	md_log_path,
	header=True,
	stress=False,
	peratom=True,
	mode="a",
	),
	interval=10,
	)

	# Run the simulation!
	dyn.run(num_steps)

	reproduction_script = f"""
	import ase.io
	from ase.md.velocitydistribution import MaxwellBoltzmannDistribution
	from ase.md.verlet import VelocityVerlet
	from ase.optimize import LBFGS
	from ase.io.trajectory import Trajectory
	from ase.md import MDLogger
	from ase import units
	from fairchem.core import pretrained_mlip, FAIRChemCalculator

	# Read the atoms object from ASE read-able file
	atoms = ase.io.read('input_file.traj')

	# Set the total charge and spin multiplicity if using the OMol task
	atoms.info["charge"] = {total_charge}
	atoms.info["spin"] = {spin_multiplicity}

	# Set up the calculator
	predictor = pretrained_mlip.get_predict_unit('uma-sm', device='cuda')
	atoms.calc = FAIRChemCalculator(predictor, task_name='{task_name}')

	# Do a quick pre-relaxation to make sure the system is stable
	opt = LBFGS(atoms, trajectory="relaxation_output.traj")
	opt.run(fmax=0.05, steps={num_prerelax_steps})

	# Initialize the velocity distribution; we set twice the temperature since we did a relaxation and
	# much of the kinetic energy will partition to the potential energy right away
	MaxwellBoltzmannDistribution(atoms, temperature_K={temperature_k}*2)

	# Initialize the integrator; NVE is shown here as an example, see https://wiki.fysik.dtu.dk/ase/ase/md.html for all options
	dyn = VelocityVerlet(atoms, timestep={md_timestep} * units.fs)

	# Set up trajectory and MD logger
	dyn.attach(MDLogger(dyn, atoms, 'md.log', header=True, stress=False, peratom=True, mode="w"), interval=10)
	traj = Trajectory("md_output.traj"', "w", atoms)
	dyn.attach(traj.write, interval=1)

	# Run the simulation!
	dyn.run({num_steps})
	"""

	with open(md_log_path, "r") as md_log_file:
	md_log = md_log_file.read()

	if explanation is None:
	explanation = f"MD simulation of {len(atoms)} atoms for {num_steps} steps with a timestep of {md_timestep} fs at {temperature_k} K in the {md_ensemble} ensemble using the {task_name} UMA task. You submitted this simulation, so I hope you know what's you're looking for or what it means!"

	return traj_path, md_log, reproduction_script, explanation
	except Exception as e:
	raise gr.Error(
	f"Error running MD simulation: {str(e)}. Please try again or report this error."
	)
	finally:
	if temp_path and os.path.exists(temp_path):
	os.remove(temp_path)

	if md_log_path and os.path.exists(md_log_path):
	os.remove(md_log_path)

	if atoms is not None and getattr(atoms, "calc", None) is not None:
	calc = atoms.calc
	atoms.calc = None
	del calc


	def run_relaxation_simulation(
	structure_file,
	num_steps,
	fmax,
	task_name,
	total_charge: float,
	spin_multiplicity: float,
	relax_unit_cell,
	explanation: str \| None = None,
	oauth_token: gr.OAuthToken \| None = None,
	progress=gr.Progress(),
	):
	temp_path = None
	traj_path = None
	opt_log_path = None
	atoms = None

	if task_name != "OMol":
	total_charge = 0
	spin_multiplicity = 0

	try:
	atoms = load_check_ase_atoms(structure_file)

	# Check if the file is an example
	example = hash_file(structure_file) in EXAMPLE_FILE_HASHES

	# Center things for consistency in visualization
	atoms.positions -= atoms.get_center_of_mass()
	cell_center = atoms.get_cell().sum(axis=0) / 2
	atoms.positions += cell_center

	atoms.info["charge"] = total_charge
	atoms.info["spin"] = spin_multiplicity

	atoms.calc = HFEndpointCalculator(
	atoms,
	endpoint_url=INFERENCE_ENDPOINT_URL,
	oauth_token=oauth_token,
	example=example,
	task_name=task_name,
	)

	# Set up a trajectory file to keep the results
	with tempfile.NamedTemporaryFile(suffix=".traj", delete=False) as traj_f:
	traj_path = traj_f.name
	with tempfile.NamedTemporaryFile(suffix=".log", delete=False) as log_f:
	opt_log_path = log_f.name

	optimizer = LBFGS(
	FrechetCellFilter(atoms) if relax_unit_cell else atoms,
	trajectory=traj_path,
	logfile=opt_log_path,
	)

	# Attach a progress callback to track in gradio
	interval = 1
	steps = [0]

	def update_progress(steps):
	steps[-1] += interval
	progress(steps[-1] / num_steps)

	optimizer.attach(update_progress, interval=interval, steps=steps)

	optimizer.run(fmax=fmax, steps=num_steps)

	reproduction_script = f"""
	import ase.io
	from ase.optimize import LBFGS
	from ase.filters import FrechetCellFilter
	from fairchem.core import pretrained_mlip, FAIRChemCalculator

	# Read the atoms object from ASE read-able file
	atoms = ase.io.read('input_file.traj')

	# Set the total charge and spin multiplicity if using the OMol task
	atoms.info["charge"] = {total_charge}
	atoms.info["spin"] = {spin_multiplicity}

	# Set up the calculator
	predictor = pretrained_mlip.get_predict_unit('uma-sm', device='cuda')
	atoms.calc = FAIRChemCalculator(predictor, task_name='{task_name}')

	# Initialize the optimizer from ASE
	relax_unit_cell = {relax_unit_cell}
	optimizer = LBFGS(FrechetCellFilter(atoms) if relax_unit_cell else atoms, trajectory="relaxation_output.traj")

	# Run the simulation!
	dyn.run({num_steps}, fmax={fmax})
	"""

	with open(opt_log_path, "r") as opt_log_file:
	opt_log = opt_log_file.read()

	if explanation is None:
	explanation = f"Relaxation of {len(atoms)} atoms for {num_steps} steps with a force tolerance of {fmax} eV/Å using the {task_name} UMA task. You submitted this simulation, so I hope you know what's you're looking for or what it means!"
	return traj_path, opt_log, reproduction_script, explanation
	except Exception as e:
	raise gr.Error(
	f"Error running relaxation: {str(e)}. Please try again or report this error."
	)
	# Make sure we clean up the temp traj files
	finally:
	if temp_path and os.path.exists(temp_path):
	os.remove(temp_path)
	# if traj_path and os.path.exists(traj_path):
	# os.remove(traj_path)
	if opt_log_path and os.path.exists(opt_log_path):
	os.remove(opt_log_path)

	if atoms is not None and getattr(atoms, "calc", None) is not None:
	calc = atoms.calc
	atoms.calc = None
	del calc