🚀 [RofuncRL] Update

Skylark0924 · Mar 9, 2024 · 1ca1f6f · 1ca1f6f
1 parent d6ad694
commit 1ca1f6f
Show file tree

Hide file tree

Showing 5 changed files with 1,749 additions and 0 deletions.
diff --git a/examples/learning_rl/IsaacGym_RofuncRL/example_HumanoidPhysHOI_RofuncRL.py b/examples/learning_rl/IsaacGym_RofuncRL/example_HumanoidPhysHOI_RofuncRL.py
@@ -0,0 +1,108 @@
+"""
+HumanoidPhysHOI (RofuncRL)
+===========================
+
+Humanoid backflip/walk/run/dance/hop, trained by RofuncRL
+"""
+
+import isaacgym
+import argparse
+
+from rofunc.config.utils import omegaconf_to_dict, get_config
+from rofunc.learning.RofuncRL.tasks import Tasks
+from rofunc.learning.RofuncRL.trainers import Trainers
+from rofunc.learning.pre_trained_models.download import model_zoo
+from rofunc.learning.utils.utils import set_seed
+
+
+def train(custom_args):
+ # Config task and trainer parameters for Isaac Gym environments
+ task, motion_file = custom_args.task.split('_')
+ args_overrides = ["task={}".format(task),
+ "train={}{}RofuncRL".format(task, custom_args.agent.upper()),
+ "device_id={}".format(custom_args.sim_device),
+ "rl_device=cuda:{}".format(custom_args.rl_device),
+ "headless={}".format(custom_args.headless),
+ "num_envs={}".format(custom_args.num_envs)]
+ cfg = get_config('./learning/rl', 'config', args=args_overrides)
+ cfg.task.env.motion_file = f'amp_humanoid_{motion_file}.npy'
+ cfg_dict = omegaconf_to_dict(cfg.task)
+
+ set_seed(cfg.train.Trainer.seed)
+
+ # Instantiate the Isaac Gym environment
+ env = Tasks().task_map[task](cfg=cfg_dict,
+ rl_device=cfg.rl_device,
+ sim_device=f'cuda:{cfg.device_id}',
+ graphics_device_id=cfg.device_id,
+ headless=cfg.headless,
+ virtual_screen_capture=cfg.capture_video, # TODO: check
+ force_render=cfg.force_render)
+
+ # Instantiate the RL trainer
+ trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
+ env=env,
+ device=cfg.rl_device,
+ env_name=custom_args.task)
+ # Start training
+ trainer.train()
+
+
+def inference(custom_args):
+ # Config task and trainer parameters for Isaac Gym environments
+ task, motion_file = custom_args.task.split('_')
+ args_overrides = ["task={}".format(task),
+ "train={}{}RofuncRL".format(task, custom_args.agent.upper()),
+ "device_id={}".format(custom_args.sim_device),
+ "rl_device=cuda:{}".format(custom_args.rl_device),
+ "headless={}".format(False),
+ "num_envs={}".format(16)]
+ cfg = get_config('./learning/rl', 'config', args=args_overrides)
+ cfg_dict = omegaconf_to_dict(cfg.task)
+
+ set_seed(cfg.train.Trainer.seed)
+
+ # Instantiate the Isaac Gym environment
+ infer_env = Tasks().task_map[task](cfg=cfg_dict,
+ rl_device=cfg.rl_device,
+ sim_device=f'cuda:{cfg.device_id}',
+ graphics_device_id=cfg.device_id,
+ headless=cfg.headless,
+ virtual_screen_capture=cfg.capture_video, # TODO: check
+ force_render=cfg.force_render)
+
+ # Instantiate the RL trainer
+ trainer = Trainers().trainer_map[custom_args.agent](cfg=cfg,
+ env=infer_env,
+ device=cfg.rl_device,
+ env_name=custom_args.task,
+ inference=True)
+
+ # load checkpoint
+ if custom_args.ckpt_path is None:
+ custom_args.ckpt_path = model_zoo(name=f"{custom_args.task}.pth")
+ trainer.agent.load_ckpt(custom_args.ckpt_path)
+
+ # Start inference
+ trainer.inference()
+
+
+if __name__ == '__main__':
+ gpu_id = 0
+
+ parser = argparse.ArgumentParser()
+ # Available tasks: HumanoidAMP_backflip, HumanoidAMP_walk, HumanoidAMP_run, HumanoidAMP_dance, HumanoidAMP_hop
+ parser.add_argument("--task", type=str, default="HumanoidAMP_hop")
+ parser.add_argument("--agent", type=str, default="amp") # Available agent: amp
+ parser.add_argument("--num_envs", type=int, default=4096)
+ parser.add_argument("--sim_device", type=int, default=0)
+ parser.add_argument("--rl_device", type=int, default=gpu_id)
+ parser.add_argument("--headless", type=str, default="True")
+ parser.add_argument("--inference", action="store_true", help="turn to inference mode while adding this argument")
+ parser.add_argument("--ckpt_path", type=str, default=None)
+ custom_args = parser.parse_args()
+
+ if not custom_args.inference:
+ train(custom_args)
+ else:
+ inference(custom_args)
diff --git a/rofunc/config/learning/rl/task/HumanoidPhysHOI.yaml b/rofunc/config/learning/rl/task/HumanoidPhysHOI.yaml
@@ -0,0 +1,68 @@
+# if given, will override the device setting in gym. 
+env: 
+ numEnvs: 2048
+ envSpacing: 5
+ episodeLength: 40
+ isFlagrun: False
+ enableDebugVis: False
+ playdataset: False
+ projtype: "None"
+ saveImages: False
+ initVel: False
+
+ pdControl: True
+ powerScale: 1.0
+ controlFrequencyInv: 2 # 30 Hz
+ stateInit: "Start" #Random
+ hybridInitProb: 0.5
+ dataFPS: 25
+ dataFramesScale: 1.2 # 25->30fps
+ ballSize: 1.
+
+ localRootObs: False
+ keyBodies: ["Head", "L_Knee", "R_Knee", "L_Elbow", "R_Elbow", "L_Ankle", "R_Ankle", "L_Index3", "L_Middle3", "L_Pinky3", "L_Ring3","L_Thumb3","R_Index3", "R_Middle3", "R_Pinky3", "R_Ring3","R_Thumb3"] # #["L_Hip", "L_Knee", "left_foot", "L_Toe", "R_Hip", "R_Knee", "right_foot", "R_Toe", "Torso", "Spine", "Chest", "Neck", "Head", "L_Thorax", "L_Shoulder", "L_Elbow", "left_hand", "R_Thorax", "R_Shoulder", "R_Elbow", "right_hand"] #["right_hand", "left_hand", "right_foot", "left_foot"]
+ contactBodies: ["L_Index3", "L_Middle3", "L_Pinky3", "L_Ring3","L_Thumb3","R_Index3", "R_Middle3", "R_Pinky3", "R_Ring3","R_Thumb3"] #["right_foot", "left_foot"]
+ terminationHeight: 0.15
+ enableEarlyTermination: True
+
+ asset:
+ assetRoot: "physhoi/data/assets"
+ assetFileName: "smplx/smplx_capsule.xml"
+
+ plane:
+ staticFriction: 1.0
+ dynamicFriction: 1.0
+ restitution: 1.6
+
+ rewardWeights:
+ p: 50.
+ r: 50.
+ pv: 0.
+ rv: 0.
+
+ op: 1.
+ or: 0.
+ opv: 0.
+ orv: 0.
+
+ ig: 20.
+
+ cg1: 5.
+ cg2: 5.
+
+sim:
+ substeps: 2
+ physx:
+ num_threads: 4
+ solver_type: 1 # 0: pgs, 1: tgs
+ num_position_iterations: 4
+ num_velocity_iterations: 0
+ contact_offset: 0.02
+ rest_offset: 0.0
+ bounce_threshold_velocity: 0.2
+ max_depenetration_velocity: 10.0
+ default_buffer_size_multiplier: 10.0
+
+ flex:
+ num_inner_iterations: 10
+ warm_start: 0.25
diff --git a/rofunc/config/learning/rl/task/HumanoidPhyshoi60hz.yaml b/rofunc/config/learning/rl/task/HumanoidPhyshoi60hz.yaml
@@ -0,0 +1,68 @@
+# if given, will override the device setting in gym. 
+env: 
+ numEnvs: 2048
+ envSpacing: 5
+ episodeLength: 40
+ isFlagrun: False
+ enableDebugVis: False
+ playdataset: False
+ projtype: "None"
+ saveImages: False
+ initVel: False
+
+ pdControl: True
+ powerScale: 1.0
+ controlFrequencyInv: 1 # 60 Hz
+ stateInit: "Start" #Random
+ hybridInitProb: 0.5
+ dataFPS: 25
+ dataFramesScale: 2.4 # 25->60fps
+ ballSize: 1.
+
+ localRootObs: False
+ keyBodies: ["Head", "L_Knee", "R_Knee", "L_Elbow", "R_Elbow", "L_Ankle", "R_Ankle", "L_Index3", "L_Middle3", "L_Pinky3", "L_Ring3","L_Thumb3","R_Index3", "R_Middle3", "R_Pinky3", "R_Ring3","R_Thumb3"] # #["L_Hip", "L_Knee", "left_foot", "L_Toe", "R_Hip", "R_Knee", "right_foot", "R_Toe", "Torso", "Spine", "Chest", "Neck", "Head", "L_Thorax", "L_Shoulder", "L_Elbow", "left_hand", "R_Thorax", "R_Shoulder", "R_Elbow", "right_hand"] #["right_hand", "left_hand", "right_foot", "left_foot"]
+ contactBodies: ["L_Index3", "L_Middle3", "L_Pinky3", "L_Ring3","L_Thumb3","R_Index3", "R_Middle3", "R_Pinky3", "R_Ring3","R_Thumb3"] #["right_foot", "left_foot"]
+ terminationHeight: 0.15
+ enableEarlyTermination: True
+
+ asset:
+ assetRoot: "physhoi/data/assets"
+ assetFileName: "smplx/smplx_capsule.xml"
+
+ plane:
+ staticFriction: 1.0
+ dynamicFriction: 1.0
+ restitution: 1.6
+
+ rewardWeights:
+ p: 50.
+ r: 50.
+ pv: 0.
+ rv: 0.
+
+ op: 1.
+ or: 0.
+ opv: 0.
+ orv: 0.
+
+ ig: 20.
+
+ cg1: 5.
+ cg2: 1.
+
+sim:
+ substeps: 2
+ physx:
+ num_threads: 4
+ solver_type: 1 # 0: pgs, 1: tgs
+ num_position_iterations: 4
+ num_velocity_iterations: 0
+ contact_offset: 0.02
+ rest_offset: 0.0
+ bounce_threshold_velocity: 0.2
+ max_depenetration_velocity: 10.0
+ default_buffer_size_multiplier: 10.0
+
+ flex:
+ num_inner_iterations: 10
+ warm_start: 0.25
diff --git a/rofunc/learning/RofuncRL/agents/mixline/physhoi_agent.py b/rofunc/learning/RofuncRL/agents/mixline/physhoi_agent.py
@@ -0,0 +1,32 @@
+# Copyright 2023, Junjia LIU, [email protected]
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import gym
+import gymnasium
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from omegaconf import DictConfig
+from typing import Callable, Union, Tuple, Optional
+
+import rofunc as rf
+from rofunc.learning.RofuncRL.agents.base_agent import BaseAgent
+from rofunc.learning.RofuncRL.agents.mixline.amp_agent import AMPAgent
+from rofunc.learning.RofuncRL.models.base_models import BaseMLP
+from rofunc.learning.RofuncRL.utils.memory import Memory
+
+
+class PhysHOIAgent(AMPAgent):
+ def __init__(self):
+ super().__init__()